Hmisc/0000755000176200001440000000000014371140723011317 5ustar liggesusersHmisc/NAMESPACE0000644000176200001440000002535114371002726012544 0ustar liggesusersexport("%nin%",.q,abs.error.pred,addMarginal,all.digits,all.is.numeric,approxExtrap,areg,areg.boot,aregImpute,aregTran,arrGrob,as.discrete,asNumericMatrix,ballocation,bezier,binconf,biVar,bootkm,bpower,bpower.sim,bpplot,bpplotM,bpplt,bppltp,bpx,bsamsize,bystats,bystats2,capitalize,catTestchisq,Cbind,ceil,character.table,chiSquare,ciapower,cleanup.import,clowess,cnvrt.coords,code.levels,colorFacet,combine.levels,combineLabels,combplotp,confbar,consolidate,contents,conTestkw,cpower,Cs,csv.get,cumcategory,curveRep,curveSmooth,cut2,datadensity,dataDensityString,dataframeReduce,dataRep,ddmmmyy,deff,describe,discrete,dhistboxp,dotchart2,dotchart3,dotchartp,dotchartpl,Dotplot,drawPlot,dvi,dvigv,dvips,ecdfpM,Ecdf,equalBins,errbar,escapeBS,escapeRegex,estSeqMarkovOrd,estSeqSim,event.chart,event.convert,event.history,expr.tree,fillin,find.matches,first.word,fit.mult.impute,format.df,format.pval,formatCats,formatCons,formatDateTime,formatdescribeSingle,formatSep,formatTestStats,ftupwr,ftuss,Function,gbayes,gbayes1PowerNP,gbayes2,gbayesMixPost,gbayesMixPowerNP,gbayesMixPredNoData,gbayesSeqSim,get2rowHeads,getHdata,getLatestSource,GetModelFrame,getRs,getZip,ggfreqScatter,ggplotlyr,GiniMd,Gompertz2,groupn,grType,hdquantile,hidingTOC,hist.data.frame,histbackback,histboxp,histboxpM,histSpike,histSpikeg,hoeffd,html,htmlGreek,htmlSN,htmlSpecial,htmlSpecialType,htmlTranslate,htmlVerbatim,importConvertDateTime,improveProb,impute,impute.transcan,inmChoice,inmChoicelike,intMarkovOrd,inverseFunction,invertTabulated,is.discrete,is.imputed,is.mChoice,is.present,is.special.miss,james.stein,jitter2,keepHattrib,Key,Key2,km.quick,knitrSet,labcurve,label,Label,labelLatex,labelPlotmath,Lag,largest.empty,latex,latexBuild,latexCheckOptions,latexDotchart,latexNeedle,latexSN,latexTabular,latexTherm,latexTranslate,latexVerbatim,list.tree,llist,lm.fit.qr.bare,Load,Lognorm2,logrank,lookupSASContents,lrcum,makeNames,makeNstr,makeSteps,mApply,markupSpecs,mask,match.mChoice,matchCases,matrix2dataFrame,matxv,mbarclPanel,mbarclpl,mChoice,mdb.get,Mean,medvPanel,medvpl,Merge,mgp.axis,mgp.axis.labels,mhgr,minor.tick,monotone,monthDays,mtitle,multEventChart,multLines,na.delete,na.detail.response,na.include,na.keep,na.pattern,na.retain,naclus,nafitted.delete,Names2names,naplot,napredict.delete,naprint.delete,naprint.keep,naresid.delete,naresid.keep,nFm,nmChoice,nobsY,nomiss,nstr,num.denom.setup,num.intercepts,numeric.string,numericScale,oPar,optionsCmds,ordGridFun,ordTestpo,outerText,pairUpDiff,panel.bpplot,panel.Dotplot,panel.Ecdf,panel.plsmo,panel.xYplot,parGrid,partition.matrix,partition.vector,pasteFit,pBlock,pc1,plotCorrM,plotCorrPrecision,plotp,plotlyM,plotlyParm,plotlySave,plotmathTranslate,plotMultSim,plotpsummaryM,plsmo,pngNeedle,pomodm,popower,posamsize,prepanel.Dotplot,prepanel.Ecdf,prepanel.xYplot,print.char.matrix,prList,prn,propsPO,propsTrans,prselect,prType,pstamp,putHcap,putHfig,putKey,putKeyEmpty,Quantile,Quantile2,R2Measures,rcorr,rcorr.cens,rcorrcens,rcorrp.cens,rcspline.eval,rcspline.plot,rcspline.restate,rcsplineFunction,read.xportDataload,readSAScsv,redun,reformM,reLabelled,rendHTML,replace.substring.wild,reShape,responseSummary,restoreHattrib,rlegend,rlegendg,rm.boot,rMultinom,roundN,roundPOSIXt,samplesize.bin,sas.codes,sas.get,sas.get.macro,sasdsLabels,sasxport.get,Save,scat1d,score.binary,sedit,sepUnitsTrans,setParNro,setTrellis,show.col,show.pch,showPsfrag,simMarkovOrd,simplifyDims,simPOcuts,simRegOrd,sKey,smean.cl.boot,smean.cl.normal,smean.sd,smean.sdl,smearingEst,smedian.hilow,solvet,somers2,soprobMarkovOrd,soprobMarkovOrdm,spearman,spearman.test,spearman2,spower,spss.get,src,stat_plsmo,stata.get,StatPlsmo,stepfun.eval,stratify,strgraphwrap,string.bounding.box,string.break.line,stringDims,stripChart,subplot,substi,substi.source,substring.location,substring2,summarize,summaryD,summaryDp,summaryM,summaryP,summaryRc,summaryS,symbol.freq,sys,t.test.cluster,table_formatpct,table_freq,table_latexdefs,table_N,table_pc,table_trio,tabulr,termsDrop,testDateTime,tex,tobase64image,transace,transcan,translate,trap.rule,trellis.strip.blank,truncPOSIXt,uncbind,units,unPaste,upData,upFirst,valueLabel,valueName,valueTags,valueUnit,var.inner,varclus,Weibull2,whichClosek,whichClosePW,whichClosest,wtd.Ecdf,wtd.loess.noiter,wtd.mean,wtd.quantile,wtd.rank,wtd.table,wtd.var,xInch,xless,xy.group,xYplot,xySortNoDupNoNA,yearDays,yInch,ynbind,zoom,"[<-.discrete","consolidate<-","is.na<-.discrete","label<-","length<-.discrete","substring2<-","valueLabel<-","valueName<-","valueTags<-","valueUnit<-") useDynLib(Hmisc, .registration=TRUE, .fixes="F_") import(methods) import(stats) import(survival) import(Formula) # import(grid) import(lattice) import(ggplot2) ## import(scales) # importFrom(acepack, ace, avas) importFrom(base64enc, base64encode) importFrom(latticeExtra, useOuterStrips, resizePanels) importFrom(cluster, clara) importFrom(foreign, read.dta, lookup.xport, read.spss, read.xport) importFrom(gtable, gtable_add_grob) importFrom(nnet, multinom) importFrom(rpart, rpart, rpart.control) importFrom(grid, convertX, convertY, grid.segments, gpar, convertUnit, grid.polygon, is.unit, grid.rect, grid.xaxis, grid.yaxis, convertWidth, convertHeight, grid.text, viewport, grobWidth, grobHeight) importFrom(gridExtra, arrangeGrob) importFrom(htmlTable, htmlTable, txtRound) importFrom(htmltools, HTML, browsable) importFrom(colorspace, rainbow_hcl) ## importFrom(tables, tabular, table_options) ## importFrom(ggplot2, geom_segment) importFrom(grDevices, adjustcolor, col2rgb, dev.list, gray, rainbow, xy.coords, png, dev.off, colorRampPalette) importFrom(graphics, abline, arrows, axis, axis.Date, axis.POSIXct, barplot, box, hist, legend, lines, locator, mtext, par, plot, plot.new, plot.window, points, polygon, rect, segments, strheight, strwidth, symbols, text, title) importFrom(utils, browseURL, capture.output, download.file, file.edit, find, getFromNamespace, object.size, read.csv, read.table) importFrom(data.table, setDT, data.table, is.data.table, key, melt) S3method(print, arrGrob) S3method(ggplot, transcan) S3method(ggplot, summaryP) S3method(print, spower) S3method(print, Quantile2) S3method(plot, describe) S3method(plot, Quantile2) S3method(plot, summaryP) S3method(plot, summaryS) S3method(plot, transcan) S3method(label, default) S3method(label, data.frame) S3method('label<-', default) S3method('label<-', data.frame) S3method('[', labelled) S3method(plotp, summaryS) S3method(print, labelled) S3method(relevel, labelled) S3method(label, Surv) S3method(units, default) S3method(units, Surv) S3method(Ecdf,data.frame) S3method(Ecdf,default) S3method(Ecdf,formula) S3method(Function,areg.boot) S3method(Function,transcan) S3method(Label,data.frame) S3method(Math,mChoice) S3method(Mean,areg.boot) S3method(Ops,mChoice) S3method(Quantile,areg.boot) S3method(Summary,mChoice) S3method(as.character,mChoice) S3method(as.data.frame,impute) S3method(as.data.frame,labelled) S3method(as.data.frame,roundN) S3method(as.data.frame,special.miss) S3method(as.data.frame,substi) S3method(as.discrete,default) S3method(as.double,Cbind) S3method(as.double,mChoice) S3method(as.numeric,Cbind) S3method(ceil,POSIXt) S3method(ceil,default) S3method(consolidate,default) S3method(contents,data.frame) S3method(contents,list) S3method(datadensity,data.frame) S3method(describe,data.frame) S3method(describe,default) S3method(describe,formula) S3method(describe,matrix) S3method(describe,vector) S3method(dvi,latex) S3method(dvigv,dvi) S3method(dvigv,latex) S3method(dvips,dvi) S3method(dvips,latex) S3method(format,mChoice) S3method(format,special.miss) S3method(format,timePOSIXt) S3method(formula,summary.formula.cross) S3method(hist,data.frame) S3method(html,contents.data.frame) S3method(html,data.frame) S3method(html,default) S3method(html,describe) S3method(html,describe.single) S3method(html,summaryM) S3method(html,latex) S3method(impute,default) S3method(impute,transcan) S3method('is.na<-',discrete) S3method(jitter2,data.frame) S3method(jitter2,default) S3method(latex,bystats) S3method(latex,bystats2) S3method(latex,default) S3method(latex,describe) S3method(latex,describe.single) S3method(latex,'function') S3method(latex,list) S3method(latex,responseSummary) S3method(latex,summary.formula.cross) S3method(latex,summary.formula.response) S3method(latex,summary.formula.reverse) S3method(latex,summaryM) S3method(latex,summaryP) S3method('length<-',discrete) S3method(napredict,delete) S3method(naprint,delete) S3method(naprint,keep) S3method(naresid,delete) S3method(naresid,keep) S3method(plot,areg) S3method(plot,areg.boot) S3method(plot,aregImpute) S3method(plot,biVar) S3method(plot,curveRep) S3method(plot,drawPlot) S3method(plot,gbayes) S3method(plot,rm.boot) S3method(plot,summary.formula.response) S3method(plot,summary.formula.reverse) S3method(plot,summaryM) S3method(plot,varclus) S3method(predict,areg) S3method(predict,areg.boot) S3method(predict,dataRep) S3method(predict,transcan) S3method(print,abs.error.pred) S3method(print,areg) S3method(print,areg.boot) S3method(print,aregImpute) S3method(print,biVar) S3method(print,bystats) S3method(print,bystats2) S3method(print,char.list) S3method(print,char.matrix) S3method(print,contents.data.frame) S3method(print,contents.list) S3method(print,curveRep) S3method(print,dataRep) S3method(print,describe) S3method(print,describe.single) S3method(print,dvi) S3method(print,find.matches) S3method(print,hoeffd) # S3method(print,html) S3method(print,improveProb) S3method(print,impute) S3method(print,latex) S3method(print,lrcum) S3method(print,mChoice) S3method(print,mhgr) S3method(print,popower) S3method(print,posamsize) S3method(print,predict.dataRep) S3method(print,rcorr) S3method(print,redun) S3method(print,responseSummary) S3method(print,special.miss) S3method(print,substi) S3method(print,summary.areg.boot) S3method(print,summary.formula.cross) S3method(print,summary.formula.response) S3method(print,summary.formula.reverse) S3method(print,summary.mChoice) S3method(print,summaryM) S3method(print,t.test.cluster) S3method(print,timePOSIXt) S3method(print,transcan) S3method(print,varclus) S3method(rcorrcens,formula) #S3method(round,POSIXt) S3method(spearman2,default) S3method(spearman2,formula) S3method(summary,areg.boot) S3method(summary,find.matches) S3method(summary,formula) S3method(summary,impute) S3method(summary,mChoice) S3method(summary,transcan) #S3method(trunc,POSIXt) S3method('units<-',default) S3method(vcov,default) S3method(vcov,fit.mult.impute) S3method(xtfrm,labelled) S3method('[',Cbind) S3method('[',describe) S3method('[',discrete) S3method('[',impute) S3method('[',mChoice) S3method('[',pBlock) S3method('[',roundN) S3method('[',special.miss) S3method('[',substi) S3method('[',summary.formula.response) S3method('[',transcan) S3method('[',ynbind) S3method('[[',discrete) S3method('[<-',discrete) Hmisc/README.md0000644000176200001440000000211413663264633012606 0ustar liggesusersHmisc ===== Harrell Miscellaneous Current Goals ============= * Continue to refine the summaryX class of functions that replace tables with graphics * See also bpplotM and tabulr * See https://hbiostat.org/R/Hmisc/summaryFuns.pdf Web Sites ============= * Overall: https://hbiostat.org/R/Hmisc * CRAN: http://cran.r-project.org/web/packages/Hmisc * Changelog: https://github.com/harrelfe/Hmisc/commits/master To Do ===== * Consider using the haven package for importing SAS, Stata, and SPSS files; haven stores labels as the label attribute of each variable as does Hmisc; it converts date and time variables automatically and allows one to specify a format catalog along with the primary dataset * See if the readstata13 package has advantages over the foreign package for Stata file import * Consider creating xl.get using the readxl package to read .xls and .xlsx Excel files * In impute.transcan, sense if a variable in data is not a factor whereas it was treated as a factor during aregImpute; it should be converted to factor before the line v[sub] <- ... levels(as.integer...)) is run Hmisc/man/0000755000176200001440000000000014370731135012074 5ustar liggesusersHmisc/man/nobsY.Rd0000644000176200001440000000417012316031541013447 0ustar liggesusers\name{nobsY} \alias{nobsY} \title{Compute Number of Observations for Left Hand Side of Formula} \usage{ nobsY(formula, group=NULL, data = NULL, subset = NULL, na.action = na.retain, matrixna=c('all', 'any')) } \arguments{ \item{formula}{a formula object} \item{group}{character string containing optional name of a stratification variable for computing sample sizes} \item{data}{a data frame} \item{subset}{an optional subsetting criterion} \item{na.action}{an optional \code{NA}-handling function} \item{matrixna}{set to \code{"all"} if an observation is to be considered \code{NA} if all the columns of the variable are \code{NA}, otherwise use \code{matrixna="any"} to consider the row missing if any of the columns are missing} } \value{an integer, with an attribute \code{"formula"} containing the original formula but with an \code{id} variable (if present) removed} \description{ After removing any artificial observations added by \code{addMarginal}, computes the number of non-missing observations for all left-hand-side variables in \code{formula}. If \code{formula} contains a term \code{id(variable)} \code{variable} is assumed to be a subject ID variable, and only unique subject IDs are counted. If group is given and its value is the name of a variable in the right-hand-side of the model, an additional object \code{nobsg} is returned that is a matrix with as many columns as there are left-hand variables, and as many rows as there are levels to the \code{group} variable. This matrix has the further breakdown of unique non-missing observations by \code{group}. The concatenation of all ID variables, is returned in a \code{list} element \code{id}. } \examples{ d <- expand.grid(sex=c('female', 'male', NA), country=c('US', 'Romania'), reps=1:2) d$subject.id <- c(0, 0, 3:12) dm <- addMarginal(d, sex, country) dim(dm) nobsY(sex + country ~ 1, data=d) nobsY(sex + country ~ id(subject.id), data=d) nobsY(sex + country ~ id(subject.id) + reps, group='reps', data=d) nobsY(sex ~ 1, data=d) nobsY(sex ~ 1, data=dm) nobsY(sex ~ id(subject.id), data=dm) } \keyword{utilities} \keyword{manip} Hmisc/man/t.test.cluster.Rd0000644000176200001440000000233312243661443015266 0ustar liggesusers\name{t.test.cluster} \alias{t.test.cluster} \alias{print.t.test.cluster} \title{t-test for Clustered Data} \description{ Does a 2-sample t-test for clustered data. } \usage{ t.test.cluster(y, cluster, group, conf.int = 0.95) \method{print}{t.test.cluster}(x, digits, \dots) } \arguments{ \item{y}{normally distributed response variable to test} \item{cluster}{cluster identifiers, e.g. subject ID} \item{group}{grouping variable with two values} \item{conf.int}{confidence coefficient to use for confidence limits} \item{x}{an object created by \code{t.test.cluster}} \item{digits}{number of significant digits to print} \item{\dots}{unused} } \value{ a matrix of statistics of class \code{t.test.cluster} } \references{ Donner A, Birkett N, Buck C, Am J Epi 114:906-914, 1981. Donner A, Klar N, J Clin Epi 49:435-439, 1996. Hsieh FY, Stat in Med 8:1195-1201, 1988. } \author{Frank Harrell} \seealso{\code{\link{t.test}}} \examples{ set.seed(1) y <- rnorm(800) group <- sample(1:2, 800, TRUE) cluster <- sample(1:40, 800, TRUE) table(cluster,group) t.test(y ~ group) # R only t.test.cluster(y, cluster, group) # Note: negate estimates of differences from t.test to # compare with t.test.cluster } \keyword{htest} Hmisc/man/dotchart3.Rd0000644000176200001440000003003113561050067014252 0ustar liggesusers\name{dotchart3} \alias{dotchart3} \alias{dotchartp} \alias{summaryD} \alias{summaryDp} \title{Enhanced Version of dotchart Function} \description{ These are adaptations of the R dotchart function that sorts categories top to bottom, adds \code{auxdata} and \code{auxtitle} arguments to put extra information in the right margin, and for \code{dotchart3} adds arguments \code{cex.labels}, \code{cex.group.labels}, and \code{groupfont}. By default, group headings are in a larger, bold font. \code{dotchart3} also cuts a bit of white space from the top and bottom of the chart. The most significant change, however, is in how \code{x} is interpreted. Columns of \code{x} no longer provide an alternate way to define groups. Instead, they define superpositioned values. This is useful for showing three quartiles, for example. Going along with this change, for \code{dotchart3} \code{pch} can now be a vector specifying symbols to use going across columns of \code{x}. \code{x} was changed in this way because to put multiple points on a line (e.g., quartiles) and keeping track of \code{par()} parameters when \code{dotchart2} was called with \code{add=TRUE} was cumbersome. \code{dotchart3} changes the margins to account for horizontal labels. \code{dotchartp} is a version of \code{dotchart3} for making the chart with the \code{plotly} package. \code{summaryD} creates aggregate data using \code{\link{summarize}} and calls \code{dotchart3} with suitable arguments to summarize data by major and minor categories. If \code{options(grType='plotly')} is in effect and the \code{plotly} package is installed, \code{summaryD} uses \code{dotchartp} instead of \code{dotchart3}. \code{summaryDp} is a streamlined \code{summaryD}-like function that uses the \code{dotchartpl} function to render a \code{plotly} graphic. It is used to compute summary statistics stratified separately by a series of variables. } \usage{ dotchart3(x, labels = NULL, groups = NULL, gdata = NULL, cex = par("cex"), pch = 21, gpch = pch, bg = par("bg"), color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlim = range(c(x, gdata), na.rm=TRUE), main = NULL, xlab = NULL, ylab = NULL, auxdata = NULL, auxtitle = NULL, auxgdata=NULL, axisat=NULL, axislabels=NULL, cex.labels = cex, cex.group.labels = cex.labels * 1.25, cex.auxdata=cex, groupfont = 2, auxwhere=NULL, height=NULL, width=NULL, \dots) dotchartp(x, labels = NULL, groups = NULL, gdata = NULL, xlim = range(c(x, gdata), na.rm=TRUE), main=NULL, xlab = NULL, ylab = '', auxdata=NULL, auxtitle=NULL, auxgdata=NULL, auxwhere=c('right', 'hover'), symbol='circle', col=colorspace::rainbow_hcl, legendgroup=NULL, axisat=NULL, axislabels=NULL, sort=TRUE, digits=4, dec=NULL, height=NULL, width=700, layoutattr=FALSE, showlegend=TRUE, \dots) summaryD(formula, data=NULL, fun=mean, funm=fun, groupsummary=TRUE, auxvar=NULL, auxtitle='', auxwhere=c('hover', 'right'), vals=length(auxvar) > 0, fmtvals=format, symbol=if(use.plotly) 'circle' else 21, col=if(use.plotly) colorspace::rainbow_hcl else 1:10, legendgroup=NULL, cex.auxdata=.7, xlab=v[1], ylab=NULL, gridevery=NULL, gridcol=gray(.95), sort=TRUE, \dots) summaryDp(formula, fun=function(x) c(Mean=mean(x, na.rm=TRUE), N=sum(! is.na(x))), overall=TRUE, xlim=NULL, xlab=NULL, data=NULL, subset=NULL, na.action=na.retain, ncharsmax=c(50, 30), digits=4, ...) } \arguments{ \item{x}{a numeric vector or matrix} \item{labels}{labels for categories corresponding to rows of \code{x}. If not specified these are taken from row names of \code{x}.} \item{groups,gdata,cex,pch,gpch,bg,color,gcolor,lcolor,xlim,main,xlab,ylab}{see \code{\link{dotchart}}} \item{auxdata}{a vector of information to be put in the right margin, in the same order as \code{x}. May be numeric, character, or a vector of expressions containing \code{\link{plotmath}} markup. For \code{dotchartp}, \code{auxdata} may be a matrix to go along with the numeric x-axis variable, to result in point-specific hover text.} \item{auxtitle}{a column heading for \code{auxdata}} \item{auxgdata}{similar to \code{auxdata} but corresponding to the \code{gdata} argument. These usually represent overall sample sizes for each group of lines.} \item{axisat}{a vector of tick mark locations to pass to \code{axis}. Useful if transforming the data axis} \item{axislabels}{a vector of strings specifying axis tick mark labels. Useful if transforming the data axis} \item{digits}{number of significant digits for formatting numeric data in hover text for \code{dotchartp} and \code{summaryDp}} \item{dec}{for \code{dotchartp} only, overrides \code{digits} to specify the argument to \code{round()} for rounding values for hover labels} \item{cex.labels}{\code{cex} for labels} \item{cex.group.labels}{\code{cex} for group labels} \item{cex.auxdata}{\code{cex} for \code{auxdata}} \item{groupfont}{font number for group headings} \item{auxwhere}{for \code{summaryD} and \code{dotchartp} specifies whether \code{auxdata} and \code{auxgdata} are to be placed on the far right of the chart, or should appear as pop-up tooltips when hovering the mouse over the ordinary \code{x} data points on the chart. Ignored for \code{dotchart3}.} \item{\dots}{other arguments passed to some of the graphics functions, or to \code{dotchart3} or \code{dotchartp} from \code{summaryD}. The \code{auxwhere='hover'} option is a useful argument to pass from \code{summaryD} to \code{dotchartp}. Also used to pass other arguments to \code{dotchartpl} from \code{summaryDp}.} \item{layoutattr}{set to \code{TRUE} to put \code{plotly::layout} information in a list as an attribute \code{layout} of the returned \code{plotly} object instead of running the \code{plotly} object through the \code{layout} function. This is useful if running \code{dotchartp} multiple times to later put together using \code{plotly::subplot} and only then running the result through \code{plotly::layout}.} \item{showlegend}{set to \code{FALSE} to suppress the \code{plotly} legend with \code{dotchartp}} \item{formula}{a formula with one variable on the left hand side (the variable to compute summary statistics on), and one or two variables on the right hand side. If there are two variables, the first is taken as the major grouping variable. If the left hand side variable is a matrix it has to be a legal R variable name, not an expression, and \code{fun} needs to be able to process a matrix. For \code{summaryDp} there may be more than two right-hand-side variables.} \item{data}{a data frame or list used to find the variables in \code{formula}. If omitted, the parent environment is used.} \item{fun}{a summarization function creating a single number from a vector. Default is the mean. For \code{summaryDp}, \code{fun} produces a named vector of summary statistics, with the default computing the \code{Mean} and \code{N} (number of non-missing values).} \item{funm}{applies if there are two right hand variables and \code{groupsummary=TRUE} and the marginal summaries over just the first \code{x} variable need to be computed differently than the summaries that are cross-classified by both variables. \code{funm} defaults to \code{fun} and should have the same structure as \code{fun}.} \item{groupsummary}{By default, when there are two right-hand variables, \code{summarize(..., fun)} is called a second time without the use of the second variable, to obtain marginal summaries for the major grouping variable and display the results as a dot (and optionally in the right margin). Set \code{groupsummary=FALSE} to suppress this information.} \item{auxvar}{when \code{fun} returns more than one statistic and the user names the elements in the returned vector, you can specify \code{auxvar} as a single character string naming one of them. This will cause the named element to be written in the right margin, and that element to be deleted when plotting the statistics.} \item{vals}{set to \code{TRUE} to show data values (dot locations) in the right margin. Defaults to \code{TRUE} if \code{auxvar} is specified.} \item{fmtvals}{an optional function to format values before putting them in the right margin. Default is the \code{format} function.} \item{symbol}{a scalar or vector of \code{pch} values for ordinary graphics or a character vector or scalar of \code{plotly} symbols. These correspond to columns of \code{x} or elements produced by \code{fun}.} \item{col}{a function or vector of colors to assign to multiple points plotted in one line. If a function it will be evaluated with an argument equal to the number of groups/columns.} \item{legendgroup}{see \code{plotly} documentation; corresponds to column names/\code{fun} output for \code{plotly} graphs only} \item{gridevery}{specify a positive number to draw very faint vertical grid lines every \code{gridevery} \code{x}-axis units; for non-\code{plotly} charts} \item{gridcol}{color for grid lines; default is very faint gray scale} \item{sort}{specify \code{sort=FALSE} to plot data in the original order, from top to bottom on the dot chart. For \code{dotchartp}, set \code{sort} to \code{'descending'} to sort in descending order of the first column of \code{x}, or \code{'ascending'} to do the reverse. These do not make sense if \code{groups} is present.} \item{height,width}{height and width in pixels for \code{dotchartp} if not using \code{plotly} defaults. Ignored for \code{dotchart3}. If set to \code{"auto"} the height is computed using \code{Hmisc::plotlyHeightDotchart}.} \item{overall}{set to \code{FALSE} to suppress plotting of unstratified estimates} \item{subset}{an observation subsetting expression} \item{na.action}{an \code{NA} action function} \item{ncharsmax}{a 2-vector specifying the number of characters after which an html new line character should be placed, respectively for the x-axis label and the stratification variable levels} } \value{the function returns invisibly} \author{Frank Harrell} \seealso{\code{\link{dotchart}},\code{\link{dotchart2}},\code{\link{summarize}}, \code{\link{rlegend}}} \examples{ set.seed(135) maj <- factor(c(rep('North',13),rep('South',13))) g <- paste('Category',rep(letters[1:13],2)) n <- sample(1:15000, 26, replace=TRUE) y1 <- runif(26) y2 <- pmax(0, y1 - runif(26, 0, .1)) dotchart3(cbind(y1,y2), g, groups=maj, auxdata=n, auxtitle='n', xlab='Y', pch=c(1,17)) ## Compare with dotchart function (no superpositioning or auxdata allowed): ## dotchart(y1, g, groups=maj, xlab='Y') \dontrun{ dotchartp(cbind(y1, y2), g, groups=maj, auxdata=n, auxtitle='n', xlab='Y', gdata=cbind(c(0,.1), c(.23,.44)), auxgdata=c(-1,-2), symbol=c('circle', 'line-ns-open')) summaryDp(sbp ~ region + sex + race + cut2(age, g=5), data=mydata) } ## Put options(grType='plotly') to have the following use dotchartp ## (rlegend will not apply) ## Add argument auxwhere='hover' to summaryD or dotchartp to put ## aux info in hover text instead of right margin summaryD(y1 ~ maj + g, xlab='Mean') summaryD(y1 ~ maj + g, groupsummary=FALSE) summaryD(y1 ~ g, fmtvals=function(x) sprintf('\%4.2f', x)) Y <- cbind(y1, y2) # summaryD cannot handle cbind(...) ~ ... summaryD(Y ~ maj + g, fun=function(y) y[1,], symbol=c(1,17)) rlegend(.1, 26, c('y1','y2'), pch=c(1,17)) summaryD(y1 ~ maj, fun=function(y) c(Mean=mean(y), n=length(y)), auxvar='n', auxtitle='N') } \keyword{hplot} Hmisc/man/estSeqSim.Rd0000644000176200001440000000560014112727067014304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gbayesSeqSim.r \name{estSeqSim} \alias{estSeqSim} \title{estSeqSim} \usage{ estSeqSim(parameter, looks, gendat, fitter, nsim = 1, progress = FALSE) } \arguments{ \item{parameter}{vector of true parameter (effects; group differences) values} \item{looks}{integer vector of observation numbers at which posterior probabilities are computed} \item{gendat}{a function of three arguments: true parameter value (scalar), sample size for first group, sample size for second group} \item{fitter}{a function of two arguments: 0/1 group indicator vector and the dependent variable vector} \item{nsim}{number of simulations (default is 1)} \item{progress}{set to \code{TRUE} to send current iteration number to the console} } \value{ a data frame with number of rows equal to the product of \code{nsim}, the length of \code{looks}, and the length of \code{parameter}. } \description{ Simulate Comparisons For Use in Sequential Clinical Trial Simulations } \details{ Simulates sequential clinical trials. Looks are done sequentially at observation numbers given in the vector \code{looks} with the earliest possible look being at observation 2. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function \code{gendat} that given a true effect of \code{parameter} and the two sample sizes (for treatment groups 1 and 2) returns a list with vectors \code{y1} and \code{y2} containing simulated data. The user also provides a function \code{fitter} with arguments \code{x} (group indicator 0/1) and \code{y} (response variable) that returns a 2-vector containing the effect estimate and its variance. \code{parameter} is usually on the scale of a regression coefficient, e.g., a log odds ratio. } \examples{ if (requireNamespace("rms", quietly = TRUE)) { # Run 100 simulations, 5 looks, 2 true parameter values # Total simulation time: 2s lfit <- function(x, y) { f <- rms::lrm.fit(x, y) k <- length(coef(f)) c(coef(f)[k], vcov(f)[k, k]) } gdat <- function(beta, n1, n2) { # Cell probabilities for a 7-category ordinal outcome for the control group p <- c(2, 1, 2, 7, 8, 38, 42) / 100 # Compute cell probabilities for the treated group p2 <- pomodm(p=p, odds.ratio=exp(beta)) y1 <- sample(1 : 7, n1, p, replace=TRUE) y2 <- sample(1 : 7, n2, p2, replace=TRUE) list(y1=y1, y2=y2) } set.seed(1) est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200), gendat=gdat, fitter=lfit, nsim=100) head(est) } } \seealso{ \code{gbayesSeqSim()}, \code{simMarkovOrd()}, \code{estSeqMarkovOrd()} } \author{ Frank Harrell } Hmisc/man/xtfrm.labelled.Rd0000644000176200001440000000056512243661443015275 0ustar liggesusers\name{xtfrm.labelled} \alias{xtfrm.labelled} \title{ Auxiliary Function Method for Sorting and Ranking } \description{ An auxiliary function method that is a workaround for bug in the implementation of xtfrm handles inheritance. } \usage{ \method{xtfrm}{labelled}(x) } \arguments{ \item{x}{ any object of class labelled. } } \seealso{ \code{\link{xtfrm}} } Hmisc/man/valueTags.Rd0000644000176200001440000000472214112730147014317 0ustar liggesusers\name{valueTags} \alias{valueTags} \alias{valueTags<-} \alias{valueLabel} \alias{valueLabel<-} \alias{valueUnit} \alias{valueUnit<-} \alias{valueName} \alias{valueName<-} \title{Store Descriptive Information About an Object} \description{ Functions get or set useful information about the contents of the object for later use. } \usage{ valueTags(x) valueTags(x) <- value valueLabel(x) valueLabel(x) <- value valueName(x) valueName(x) <- value valueUnit(x) valueUnit(x) <- value } \arguments{ \item{x}{ an object } \item{value}{ for \code{valueTags<-} a named list of value tags. a character vector of length 1, or \code{NULL}. } } \value{ \code{valueTag} returns \code{NULL} or a named list with each of the named values \code{name}, \code{label}, \code{unit} set if they exists in the object. For \code{valueTag<-} returns \code{list} For \code{valueName}, \code{valueLable}, and \code{valueUnit} returns \code{NULL} or character vector of length 1. For \code{valueName<-}, \code{valueLabel<-}, and \code{valueUnit} returns \code{value} } \details{ These functions store the a short name of for the contents, a longer label that is useful for display, and the units of the contents that is useful for display. \code{valueTag} is an accessor, and \code{valueTag<-} is a replacement function for all of the value's information. \code{valueName} is an accessor, and \code{valueName<-} is a replacement function for the value's name. This name is used when a plot or a latex table needs a short name and the variable name is not useful. \code{valueLabel} is an accessor, and \code{valueLabel<-} is a replacement function for the value's label. The label is used in a plots or latex tables when they need a descriptive name. \code{valueUnit} is an accessor, and \code{valueUnit<-} is a replacement function for the value's unit. The unit is used to add unit information to the R output. } \seealso{ \code{\link{names}}, \code{\link{attributes}} } \examples{ age <- c(21,65,43) y <- 1:3 valueLabel(age) <- "Age in Years" plot(age, y, xlab=valueLabel(age)) x1 <- 1:10 x2 <- 10:1 valueLabel(x2) <- 'Label for x2' valueUnit(x2) <- 'mmHg' x2 x2[1:5] dframe <- data.frame(x1, x2) Label(dframe) ##In these examples of llist, note that labels are printed after ##variable names, because of print.labelled a <- 1:3 b <- 4:6 valueLabel(b) <- 'B Label' } \author{Charles Dupont} \keyword{attribute} \keyword{misc} \keyword{utilities} Hmisc/man/histbackback.Rd0000644000176200001440000000416312243661443015001 0ustar liggesusers\name{histbackback} \alias{histbackback} \title{ Back to Back Histograms } \description{ Takes two vectors or a list with \code{x} and \code{y} components, and produces back to back histograms of the two datasets. } \usage{ histbackback(x, y, brks=NULL, xlab=NULL, axes=TRUE, probability=FALSE, xlim=NULL, ylab='', \dots) } \arguments{ \item{x,y}{ either two vectors or a list given as \code{x} with two components. If the components have names, they will be used to label the axis (modification FEH). } \item{brks}{ vector of the desired breakpoints for the histograms. } \item{xlab}{ a vector of two character strings naming the two datasets. } \item{axes}{ logical flag stating whether or not to label the axes. } \item{probability}{ logical flag: if \code{TRUE}, then the x-axis corresponds to the units for a density. If \code{FALSE}, then the units are counts. } \item{xlim}{ x-axis limits. First value must be negative, as the left histogram is placed at negative x-values. Second value must be positive, for the right histogram. To make the limits symmetric, use e.g. \code{ylim=c(-20,20)}. } \item{ylab}{ label for y-axis. Default is no label. } \item{...}{ additional graphics parameters may be given. }} \value{ a list is returned invisibly with the following components: \item{left}{ the counts for the dataset plotted on the left. } \item{right}{ the counts for the dataset plotted on the right. } \item{breaks}{ the breakpoints used. }} \section{Side Effects}{ a plot is produced on the current graphics device. } \author{ Pat Burns \cr Salomon Smith Barney \cr London \cr \email{pburns@dorado.sbi.com} } \seealso{ \code{\link{hist}}, \code{\link[lattice]{histogram}} } \examples{ options(digits=3) set.seed(1) histbackback(rnorm(20), rnorm(30)) fool <- list(x=rnorm(40), y=rnorm(40)) histbackback(fool) age <- rnorm(1000,50,10) sex <- sample(c('female','male'),1000,TRUE) histbackback(split(age, sex)) agef <- age[sex=='female']; agem <- age[sex=='male'] histbackback(list(Female=agef,Male=agem), probability=TRUE, xlim=c(-.06,.06)) } \keyword{dplot} \keyword{hplot} \keyword{distribution} % Converted by Sd2Rd version 1.21. Hmisc/man/minor.tick.Rd0000644000176200001440000000345413714234051014442 0ustar liggesusers\name{minor.tick} \alias{minor.tick} \title{Minor Tick Marks} \description{ Adds minor tick marks to an existing plot. All minor tick marks that will fit on the axes will be drawn. } \usage{ minor.tick(nx=2, ny=2, tick.ratio=0.5, x.args = list(), y.args = list()) } \arguments{ \item{nx}{ number of intervals in which to divide the area between major tick marks on the X-axis. Set to 1 to suppress minor tick marks. } \item{ny}{ same as \code{nx} but for the Y-axis. } \item{tick.ratio}{ ratio of lengths of minor tick marks to major tick marks. The length of major tick marks is retrieved from \code{par("tck")}.} \item{x.args}{ additionl arguments (e.g. \code{post}, \code{lwd}) used by \code{axis()} function when rendering the X-axis.} \item{y.args}{ same as \code{x.args} but for Y-axis.} } \section{Side Effects}{ plots } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} \cr Earl Bellinger \cr Max Planck Institute \cr \email{earlbellinger@gmail.com} \cr Viktor Horvath \cr Brandeis University \cr \email{vhorvath@brandeis.edu} } \seealso{ \code{\link{axis}} } \examples{ # Plot with default settings plot(runif(20), runif(20)) minor.tick() # Plot with arguments passed to axis() plot(c(0,1), c(0,1), type = 'n', axes = FALSE, ann = FALSE) # setting up a plot without axes and annotation points(runif(20), runif(20)) # plotting data axis(1, pos = 0.5, lwd = 2) # showing X-axis at Y = 0.5 with formatting axis(2, col = 2) # formatted Y-axis minor.tick( nx = 4, ny = 4, tick.ratio = 0.3, x.args = list(pos = 0.5, lwd = 2), # X-minor tick format argumnets y.args = list(col = 2)) # Y-minor tick format arguments } \keyword{aplot} \keyword{hplot} Hmisc/man/print.char.list.Rd0000644000176200001440000000515412243661443015413 0ustar liggesusers\name{print.char.list} \alias{print.char.list} \title{ prints a list of lists in a visually readable format. } \description{ Takes a list that is composed of other lists and matrixes and prints it in a visually readable format. } \usage{ \method{print}{char.list}(x, \dots, hsep = c("|"), vsep = c("-"), csep = c("+"), print.it = TRUE, rowname.halign = c("left", "centre", "right"), rowname.valign = c("top", "centre", "bottom"), colname.halign = c("centre", "left", "right"), colname.valign = c("centre", "top", "bottom"), text.halign = c("right", "centre", "left"), text.valign = c("top", "centre", "bottom"), rowname.width, rowname.height, min.colwidth = .Options$digits, max.rowheight = NULL, abbreviate.dimnames = TRUE, page.width = .Options$width, colname.width, colname.height, prefix.width, superprefix.width = prefix.width) } \arguments{ \item{x}{ list object to be printed } \item{\dots}{ place for extra arguments to reside. } \item{hsep}{ character used to separate horizontal fields } \item{vsep}{ character used to separate veritcal feilds } \item{csep}{ character used where horizontal and veritcal separators meet. } \item{print.it}{ should the value be printed to the console or returned as a string. } \item{rowname.halign}{ horizontal justification of row names. } \item{rowname.valign}{ verical justification of row names. } \item{colname.halign}{ horizontal justification of column names. } \item{colname.valign}{ verical justification of column names. } \item{text.halign}{ horizontal justification of cell text. } \item{text.valign}{ vertical justification of cell text. } \item{rowname.width}{ minimum width of row name strings. } \item{rowname.height}{ minimum height of row name strings. } \item{min.colwidth}{ minimum column width. } \item{max.rowheight}{ maximum row height. } \item{abbreviate.dimnames}{ should the row and column names be abbreviated. } \item{page.width}{ width of the page being printed on. } \item{colname.width}{ minimum width of the column names. } \item{colname.height}{ minimum height of the column names } \item{prefix.width}{ maximum width of the rowname columns } \item{superprefix.width}{ maximum width of the super rowname columns } } \value{ String that formated table of the list object. } \author{ Charles Dupont } \keyword{ print } \keyword{ list } Hmisc/man/multLines.Rd0000644000176200001440000000475614112727067014356 0ustar liggesusers\name{multLines} \alias{multLines} \title{Plot Multiple Lines} \description{ Plots multiple lines based on a vector \code{x} and a matrix \code{y}, draws thin vertical lines connecting limits represented by columns of \code{y} beyond the first. It is assumed that either (1) the second and third columns of \code{y} represent lower and upper confidence limits, or that (2) there is an even number of columns beyond the first and these represent ascending quantiles that are symmetrically arranged around 0.5. If \code{options(grType='plotly')} is in effect, uses \code{plotly} graphics instead of \code{grid} or base graphics. For \code{plotly} you may want to set the list of possible colors, etc. using \code{pobj=plot_ly(colors=...)}. \code{lwd,lty,lwd.vert} are ignored under \code{plotly}. } \usage{ multLines(x, y, pos = c('left', 'right'), col='gray', lwd=1, lty=1, lwd.vert = .85, lty.vert = 1, alpha = 0.4, grid = FALSE, pobj=plotly::plot_ly(), xlim, name=colnames(y)[1], legendgroup=name, showlegend=TRUE, ...) } \arguments{ \item{x}{a numeric vector} \item{y}{a numeric matrix with number of rows equal to the number of \code{x} elements} \item{pos}{when \code{pos='left'} the vertical lines are drawn, right to left, to the left of the point \code{(x, y[,1)}. Otherwise lines are drawn left to right to the right of the point.} \item{col}{a color used to connect \code{(x, y[,1])} pairs. The same color but with transparency given by the \code{alpha} argument is used to draw the vertical lines} \item{lwd}{line width for main lines} \item{lty}{line types for main lines} \item{lwd.vert}{line width for vertical lines} \item{lty.vert}{line type for vertical lines} \item{alpha}{transparency} \item{grid}{set to \code{TRUE} when using \code{grid}/\code{lattice}} \item{pobj}{an already started \code{plotly} object to add to} \item{xlim}{global x-axis limits (required if using \code{plotly})} \item{name}{trace name if using \code{plotly}} \item{legendgroup}{legend group name if using \code{plotly}} \item{showlegend}{whether or not to show traces in legend, if using \code{plotly}} \item{\dots}{passed to \code{add_lines} or \code{add_segments} if using \code{plotly}} } \author{Frank Harrell} \examples{ if (requireNamespace("plotly")) { x <- 1:4 y <- cbind(x, x-3, x-2, x-1, x+1, x+2, x+3) plot(NA, NA, xlim=c(1,4), ylim=c(-2, 7)) multLines(x, y, col='blue') multLines(x, y, col='red', pos='right') } } \keyword{hplot} Hmisc/man/print.char.matrix.Rd0000644000176200001440000000676612243661443015756 0ustar liggesusers\name{print.char.matrix} \alias{print.char.matrix} %- Also NEED an `\alias' for EACH other topic documented here. \title{ Function to print a matrix with stacked cells } \description{ Prints a dataframe or matrix in stacked cells. Line break charcters in a matrix element will result in a line break in that cell, but tab characters are not supported. } \usage{ \method{print}{char.matrix}(x, file = "", col.name.align = "cen", col.txt.align = "right", cell.align = "cen", hsep = "|", vsep = "-", csep = "+", row.names = TRUE, col.names = FALSE, append = FALSE, top.border = TRUE, left.border = TRUE, \dots) } \arguments{ \item{x}{a matrix or dataframe} \item{file}{name of file if file output is desired. If left empty, output will be to the screen} \item{col.name.align}{if column names are used, they can be aligned right, left or centre. Default \code{"cen"} results in names centred between the sides of the columns they name. If the width of the text in the columns is less than the width of the name, \code{col.name.align} will have no effect. Other options are \code{"right"} and \code{"left"}.} \item{col.txt.align}{how character columns are aligned. Options are the same as for \code{col.name.align} with no effect when the width of the column is greater than its name.} \item{cell.align}{how numbers are displayed in columns} \item{hsep}{character string to use as horizontal separator, i.e. what separates columns} \item{vsep}{character string to use as vertical separator, i.e. what separates rows. Length cannot be more than one.} \item{csep}{character string to use where vertical and horizontal separators cross. If \code{hsep} is more than one character, \code{csep} will need to be the same length. There is no provision for multiple vertical separators} \item{row.names}{logical: are we printing the names of the rows?} \item{col.names}{logical: are we printing the names of the columns?} \item{append}{logical: if \code{file} is not \code{""}, are we appending to the file or overwriting?} \item{top.border}{logical: do we want a border along the top above the columns?} \item{left.border}{logical: do we want a border along the left of the first column?} \item{\dots}{unused} } \details{ If any column of \code{x} is a mixture of character and numeric, the distinction between character and numeric columns will be lost. This is especially so if the matrix is of a form where you would not want to print the column names, the column information being in the rows at the beginning of the matrix. Row names, if not specified in the making of the matrix will simply be numbers. To prevent printing them, set \code{row.names = FALSE}.} \value{ No value is returned. The matrix or dataframe will be printed to file or to the screen. } \author{Patrick Connolly \email{p.connolly@hortresearch.co.nz}} \seealso{\code{write}, \code{write.table}} \examples{ data(HairEyeColor) print.char.matrix(HairEyeColor[ , , "Male"], col.names = TRUE) print.char.matrix(HairEyeColor[ , , "Female"], col.txt.align = "left", col.names = TRUE) z <- rbind(c("", "N", "y"), c("[ 1.34,40.3)\n[40.30,48.5)\n[48.49,58.4)\n[58.44,87.8]", " 50\n 50\n 50\n 50", "0.530\n0.489\n0.514\n0.507"), c("female\nmale", " 94\n106", "0.552\n0.473" ), c("", "200", "0.510")) dimnames(z) <- list(c("", "age", "sex", "Overall"),NULL) print.char.matrix(z) } \keyword{print} \keyword{array} Hmisc/man/nstr.Rd0000644000176200001440000000223112243661443013350 0ustar liggesusers\name{nstr} \alias{nstr} \title{ Creates a string of arbitry length } \description{ Creates a vector of strings which consists of the string segment given in each element of the \code{string} vector repeated \code{times}. } \usage{ nstr(string, times) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{string}{ character: vector of string segments to be repeated. Will be recycled if argument \code{times} is longer.} \item{times}{ integer: vector of number of times to repeat the corisponding segment. Will be recycled if argument \code{string} is longer. } } \value{ returns a character vector the same length as the longest of the two arguments. } \author{ Charles Dupont } \note{ Will throw a warning if the length of the longer argment is not a even multiple of the shorter argument. } \seealso{ \code{\link{paste}}, \code{\link{rep}} } \examples{ nstr(c("a"), c(0,3,4)) nstr(c("a", "b", "c"), c(1,2,3)) nstr(c("a", "b", "c"), 4) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} \keyword{character}% __ONLY ONE__ keyword per line \keyword{utilities} Hmisc/man/hdquantile.Rd0000644000176200001440000000436012243661443014525 0ustar liggesusers\name{hdquantile} \alias{hdquantile} \title{Harrell-Davis Distribution-Free Quantile Estimator} \description{ Computes the Harrell-Davis (1982) quantile estimator and jacknife standard errors of quantiles. The quantile estimator is a weighted linear combination or order statistics in which the order statistics used in traditional nonparametric quantile estimators are given the greatest weight. In small samples the H-D estimator is more efficient than traditional ones, and the two methods are asymptotically equivalent. The H-D estimator is the limit of a bootstrap average as the number of bootstrap resamples becomes infinitely large. } \usage{ hdquantile(x, probs = seq(0, 1, 0.25), se = FALSE, na.rm = FALSE, names = TRUE, weights=FALSE) } \arguments{ \item{x}{a numeric vector} \item{probs}{vector of quantiles to compute} \item{se}{set to \code{TRUE} to also compute standard errors} \item{na.rm}{set to \code{TRUE} to remove \code{NA}s from \code{x} before computing quantiles} \item{names}{set to \code{FALSE} to prevent names attributions from being added to quantiles and standard errors} \item{weights}{set to \code{TRUE} to return a \code{"weights"} attribution with the matrix of weights used in the H-D estimator corresponding to order statistics, with columns corresponding to quantiles.} } \details{ A Fortran routine is used to compute the jackknife leave-out-one quantile estimates. Standard errors are not computed for quantiles 0 or 1 (\code{NA}s are returned). } \value{ A vector of quantiles. If \code{se=TRUE} this vector will have an attribute \code{se} added to it, containing the standard errors. If \code{weights=TRUE}, also has a \code{"weights"} attribute which is a matrix. } \references{ Harrell FE, Davis CE (1982): A new distribution-free quantile estimator. Biometrika 69:635-640. Hutson AD, Ernst MD (2000): The exact bootstrap mean and variance of an L-estimator. J Roy Statist Soc B 62:89-94. } \author{Frank Harrell} \seealso{\code{\link{quantile}}} \examples{ set.seed(1) x <- runif(100) hdquantile(x, (1:3)/4, se=TRUE) \dontrun{ # Compare jackknife standard errors with those from the bootstrap library(boot) boot(x, function(x,i) hdquantile(x[i], probs=(1:3)/4), R=400) } } \keyword{univar} Hmisc/man/plotCorrM.Rd0000644000176200001440000000365614020163447014313 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotCorrM.r \name{plotCorrM} \alias{plotCorrM} \title{plotCorrM} \usage{ plotCorrM( r, what = c("plots", "data"), type = c("rectangle", "circle"), xlab = "", ylab = "", maxsize = 12, xangle = 0 ) } \arguments{ \item{r}{correlation matrix} \item{what}{specifies whether to return plots or the data frame used in making the plots} \item{type}{specifies whether to use bottom-aligned rectangles (the default) or centered circles} \item{xlab}{x-axis label for correlation matrix} \item{ylab}{y-axis label for correlation matrix} \item{maxsize}{maximum circle size if \code{type='circle'}} \item{xangle}{angle for placing x-axis labels, defaulting to 0. Consider using \code{xangle=45} when labels are long.} } \value{ a list containing two \code{ggplot2} objects if \code{what='plots'}, or a data frame if \code{what='data'} } \description{ Plot Correlation Matrix and Correlation vs. Time Gap } \details{ Constructs two \code{ggplot2} graphics. The first is a half matrix of rectangles where the height of the rectangle is proportional to the absolute value of the correlation coefficient, with positive and negative coefficients shown in different colors. The second graphic is a variogram-like graph of correlation coefficients on the y-axis and absolute time gap on the x-axis, with a \code{loess} smoother added. The times are obtained from the correlation matrix's row and column names if these are numeric. If any names are not numeric, the times are taken as the integers 1, 2, 3, ... The two graphics are \code{ggplotly}-ready if you use \code{plotly::ggplotly(..., tooltip='label')}. } \examples{ set.seed(1) r <- cor(matrix(rnorm(100), ncol=10)) g <- plotCorrM(r) g[[1]] # plot matrix g[[2]] # plot correlation vs gap time # ggplotlyr(g[[2]]) # ggplotlyr uses ggplotly with tooltip='label' then removes # txt: from hover text } \author{ Frank Harrell } Hmisc/man/sedit.Rd0000644000176200001440000001541313714234044013475 0ustar liggesusers\name{sedit} \alias{sedit} \alias{substring.location} \alias{substring2} \alias{substring2<-} \alias{replace.substring.wild} \alias{numeric.string} \alias{all.digits} \title{ Character String Editing and Miscellaneous Character Handling Functions } \description{ This suite of functions was written to implement many of the features of the UNIX \code{sed} program entirely within S (function \code{sedit}). The \code{substring.location} function returns the first and last position numbers that a sub-string occupies in a larger string. The \code{substring2<-} function does the opposite of the builtin function \code{substring}. It is named \code{substring2} because for S-Plus there is a built-in function \code{substring}, but it does not handle multiple replacements in a single string. \code{replace.substring.wild} edits character strings in the fashion of "change xxxxANYTHINGyyyy to aaaaANYTHINGbbbb", if the "ANYTHING" passes an optional user-specified \code{test} function. Here, the "yyyy" string is searched for from right to left to handle balancing parentheses, etc. \code{numeric.string} and \code{all.digits} are two examples of \code{test} functions, to check, respectively if each of a vector of strings is a legal numeric or if it contains only the digits 0-9. For the case where \code{old="*$" or "^*"}, or for \code{replace.substring.wild} with the same values of \code{old} or with \code{front=TRUE} or \code{back=TRUE}, \code{sedit} (if \code{wild.literal=FALSE}) and \code{replace.substring.wild} will edit the largest substring satisfying \code{test}. \code{substring2} is just a copy of \code{substring} so that \code{substring2<-} will work. } \usage{ sedit(text, from, to, test, wild.literal=FALSE) substring.location(text, string, restrict) # substring(text, first, last) <- setto # S-Plus only replace.substring.wild(text, old, new, test, front=FALSE, back=FALSE) numeric.string(string) all.digits(string) substring2(text, first, last) substring2(text, first, last) <- value } \arguments{ \item{text}{ a vector of character strings for \code{sedit, substring2, substring2<-} or a single character string for \code{substring.location, replace.substring.wild}. } \item{from}{ a vector of character strings to translate from, for \code{sedit}. A single asterisk wild card, meaning allow any sequence of characters (subject to the \code{test} function, if any) in place of the \code{"*"}. An element of \code{from} may begin with \code{"^"} to force the match to begin at the beginning of \code{text}, and an element of \code{from} can end with \code{"$"} to force the match to end at the end of \code{text}. } \item{to}{ a vector of character strings to translate to, for \code{sedit}. If a corresponding element in \code{from} had an \code{"*"}, the element in \code{to} may also have an \code{"*"}. Only single asterisks are allowed. If \code{to} is not the same length as \code{from}, the \code{rep} function is used to make it the same length. } \item{string}{ a single character string, for \code{substring.location}, \code{numeric.string}, \code{all.digits} } \item{first}{ a vector of integers specifying the first position to replace for \code{substring2<-}. \code{first} may also be a vector of character strings that are passed to \code{sedit} to use as patterns for replacing substrings with \code{setto}. See one of the last examples below. } \item{last}{ a vector of integers specifying the ending positions of the character substrings to be replaced. The default is to go to the end of the string. When \code{first} is character, \code{last} must be omitted. } \item{setto}{ a character string or vector of character strings used as replacements, in \code{substring2<-} } \item{old}{ a character string to translate from for \code{replace.substring.wild}. May be \code{"*$"} or \code{"^*"} or any string containing a single \code{"*"} but not beginning with \code{"^"} or ending with \code{"$"}. } \item{new}{ a character string to translate to for \code{replace.substring.wild} } \item{test}{ a function of a vector of character strings returning a logical vector whose elements are \code{TRUE} or \code{FALSE} according to whether that string element qualifies as the wild card string for \code{sedit, replace.substring.wild} } \item{wild.literal}{ set to \code{TRUE} to not treat asterisks as wild cards and to not look for \code{"^"} or \code{"$"} in \code{old} } \item{restrict}{ a vector of two integers for \code{substring.location} which specifies a range to which the search for matches should be restricted } \item{front}{ specifying \code{front = TRUE} and \code{old = "*"} is the same as specifying \code{old = "^*"} } \item{back}{ specifying \code{back = TRUE} and \code{old = "*"} is the same as specifying \code{old = "*$"} } \item{value}{a character vector} } \value{ \code{sedit} returns a vector of character strings the same length as \code{text}. \code{substring.location} returns a list with components named \code{first} and \code{last}, each specifying a vector of character positions corresponding to matches. \code{replace.substring.wild} returns a single character string. \code{numeric.string} and \code{all.digits} return a single logical value. } \section{Side Effects}{ \code{substring2<-} modifies its first argument } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{ \code{\link{grep}}, \code{\link{substring}} } \examples{ x <- 'this string' substring2(x, 3, 4) <- 'IS' x substring2(x, 7) <- '' x substring.location('abcdefgabc', 'ab') substring.location('abcdefgabc', 'ab', restrict=c(3,999)) replace.substring.wild('this is a cat','this*cat','that*dog') replace.substring.wild('there is a cat','is a*', 'is not a*') replace.substring.wild('this is a cat','is a*', 'Z') qualify <- function(x) x==' 1.5 ' | x==' 2.5 ' replace.substring.wild('He won 1.5 million $','won*million', 'lost*million', test=qualify) replace.substring.wild('He won 1 million $','won*million', 'lost*million', test=qualify) replace.substring.wild('He won 1.2 million $','won*million', 'lost*million', test=numeric.string) x <- c('a = b','c < d','hello') sedit(x, c('=','he*o'),c('==','he*')) sedit('x23', '*$', '[*]', test=numeric.string) sedit('23xx', '^*', 'Y_{*} ', test=all.digits) replace.substring.wild("abcdefabcdef", "d*f", "xy") x <- "abcd" substring2(x, "bc") <- "BCX" x substring2(x, "B*d") <- "B*D" x } \keyword{manip} \keyword{character} % Converted by Sd2Rd version 1.21. Hmisc/man/smean.sd.Rd0000644000176200001440000000650313714234043014074 0ustar liggesusers\name{smean.sd} \alias{smean.cl.normal} \alias{smean.sd} \alias{smean.sdl} \alias{smean.cl.boot} \alias{smedian.hilow} \title{ Compute Summary Statistics on a Vector } \description{ A number of statistical summary functions is provided for use with \code{summary.formula} and \code{summarize} (as well as \code{tapply} and by themselves). \code{smean.cl.normal} computes 3 summary variables: the sample mean and lower and upper Gaussian confidence limits based on the t-distribution. \code{smean.sd} computes the mean and standard deviation. \code{smean.sdl} computes the mean plus or minus a constant times the standard deviation. \code{smean.cl.boot} is a very fast implementation of the basic nonparametric bootstrap for obtaining confidence limits for the population mean without assuming normality. These functions all delete NAs automatically. \code{smedian.hilow} computes the sample median and a selected pair of outer quantiles having equal tail areas. } \usage{ smean.cl.normal(x, mult=qt((1+conf.int)/2,n-1), conf.int=.95, na.rm=TRUE) smean.sd(x, na.rm=TRUE) smean.sdl(x, mult=2, na.rm=TRUE) smean.cl.boot(x, conf.int=.95, B=1000, na.rm=TRUE, reps=FALSE) smedian.hilow(x, conf.int=.95, na.rm=TRUE) } \arguments{ \item{x}{ for summary functions \code{smean.*}, \code{smedian.hilow}, a numeric vector from which NAs will be removed automatically } \item{na.rm}{ defaults to \code{TRUE} unlike built-in functions, so that by default \code{NA}s are automatically removed } \item{mult}{ for \code{smean.cl.normal} is the multiplier of the standard error of the mean to use in obtaining confidence limits of the population mean (default is appropriate quantile of the t distribution). For \code{smean.sdl}, \code{mult} is the multiplier of the standard deviation used in obtaining a coverage interval about the sample mean. The default is \code{mult=2} to use plus or minus 2 standard deviations. } \item{conf.int}{ for \code{smean.cl.normal} and \code{smean.cl.boot} specifies the confidence level (0-1) for interval estimation of the population mean. For \code{smedian.hilow}, \code{conf.int} is the coverage probability the outer quantiles should target. When the default, 0.95, is used, the lower and upper quantiles computed are 0.025 and 0.975. } \item{B}{ number of bootstrap resamples for \code{smean.cl.boot} } \item{reps}{ set to \code{TRUE} to have \code{smean.cl.boot} return the vector of bootstrapped means as the \code{reps} attribute of the returned object } } \value{ a vector of summary statistics } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{summarize}}, \code{\link{summary.formula}} } \examples{ set.seed(1) x <- rnorm(100) smean.sd(x) smean.sdl(x) smean.cl.normal(x) smean.cl.boot(x) smedian.hilow(x, conf.int=.5) # 25th and 75th percentiles # Function to compute 0.95 confidence interval for the difference in two means # g is grouping variable bootdif <- function(y, g) { g <- as.factor(g) a <- attr(smean.cl.boot(y[g==levels(g)[1]], B=2000, reps=TRUE),'reps') b <- attr(smean.cl.boot(y[g==levels(g)[2]], B=2000, reps=TRUE),'reps') meandif <- diff(tapply(y, g, mean, na.rm=TRUE)) a.b <- quantile(b-a, c(.025,.975)) res <- c(meandif, a.b) names(res) <- c('Mean Difference','.025','.975') res } } \keyword{nonparametric} \keyword{htest} \concept{bootstrap} Hmisc/man/Lag.Rd0000644000176200001440000000147412243661443013075 0ustar liggesusers\name{Lag} \alias{Lag} \title{Lag a Numeric, Character, or Factor Vector} \description{ Shifts a vector \code{shift} elements later. Character or factor variables are padded with \code{""}, numerics with \code{NA}. The shift may be negative. } \usage{ Lag(x, shift = 1) } \arguments{ \item{x}{a vector} \item{shift}{integer specifying the number of observations to be shifted to the right. Negative values imply shifts to the left.} } \details{ A.ttributes of the original object are carried along to the new lagged one. } \value{ a vector like \code{x} } \author{Frank Harrell} \seealso{\code{\link{lag}}} \examples{ Lag(1:5,2) Lag(letters[1:4],2) Lag(factor(letters[1:4]),-2) # Find which observations are the first for a given subject id <- c('a','a','b','b','b','c') id != Lag(id) !duplicated(id) } \keyword{manip} Hmisc/man/showPsfrag.Rd0000644000176200001440000000174313714234044014511 0ustar liggesusers\name{showPsfrag} \alias{showPsfrag} \title{ Display image from psfrag LaTeX strings } \description{ \code{showPsfrag} is used to display (using ghostview) a postscript image that contained psfrag LaTeX strings, by building a small LaTeX script and running \command{latex} and \command{dvips}. } \usage{ showPsfrag(filename) } \arguments{ \item{filename}{ name or character string or character vector specifying file prefix. } } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \references{ Grant MC, Carlisle (1998): The PSfrag System, Version 3. Full documentation is obtained by searching www.ctan.org for \file{pfgguide.ps}. } \seealso{ \code{\link{postscript}}, \code{\link{par}}, \code{\link{ps.options}}, \code{\link{mgp.axis.labels}}, \code{\link{pdf}}, \code{\link[lattice]{trellis.device}}, \code{\link{setTrellis}} } \keyword{hplot} \keyword{device} \concept{trellis} \concept{lattice} Hmisc/man/rcorr.Rd0000644000176200001440000000457713714234045013526 0ustar liggesusers\name{rcorr} \alias{rcorr} \alias{print.rcorr} \title{Matrix of Correlations and P-values} \description{ \code{rcorr} Computes a matrix of Pearson's \code{r} or Spearman's \code{rho} rank correlation coefficients for all possible pairs of columns of a matrix. Missing values are deleted in pairs rather than deleting all rows of \code{x} having any missing variables. Ranks are computed using efficient algorithms (see reference 2), using midranks for ties. } \usage{ rcorr(x, y, type=c("pearson","spearman")) \method{print}{rcorr}(x, \dots) } \arguments{ \item{x}{ a numeric matrix with at least 5 rows and at least 2 columns (if \code{y} is absent). For \code{print}, \code{x} is an object produced by \code{rcorr}. } \item{y}{ a numeric vector or matrix which will be concatenated to \code{x}. If \code{y} is omitted for \code{rcorr}, \code{x} must be a matrix. } \item{type}{ specifies the type of correlations to compute. Spearman correlations are the Pearson linear correlations computed on the ranks of non-missing elements, using midranks for ties. } \item{\dots}{argument for method compatiblity.} } \value{ \code{rcorr} returns a list with elements \code{r}, the matrix of correlations, \code{n} the matrix of number of observations used in analyzing each pair of variables, and \code{P}, the asymptotic P-values. Pairs with fewer than 2 non-missing values have the r values set to NA. The diagonals of \code{n} are the number of non-NAs for the single variable corresponding to that row and column. } \details{ Uses midranks in case of ties, as described by Hollander and Wolfe. P-values are approximated by using the \code{t} or \code{F} distributions. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods. New York: Wiley. Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical Recipes in C. Cambridge: Cambridge University Press. } \seealso{ \code{\link{hoeffd}}, \code{\link{cor}}, \code{\link{combine.levels}}, \code{\link{varclus}}, \code{\link{dotchart3}}, \code{\link{impute}}, \code{\link{chisq.test}}, \code{\link{cut2}}. } \examples{ x <- c(-2, -1, 0, 1, 2) y <- c(4, 1, 0, 1, 4) z <- c(1, 2, 3, 4, NA) v <- c(1, 2, 3, 4, 5) rcorr(cbind(x,y,z,v)) } \keyword{nonparametric} \keyword{htest} \keyword{category} Hmisc/man/data.frame.create.modify.check.Rd0000644000176200001440000004122213663722465020204 0ustar liggesusers\name{data.frame.create.modify.check} \alias{data.frame.create.modify.check} \title{ Tips for Creating, Modifying, and Checking Data Frames } \description{ This help file contains a template for importing data to create an R data frame, correcting some problems resulting from the import and making the data frame be stored more efficiently, modifying the data frame (including better annotating it and changing the names of some of its variables), and checking and inspecting the data frame for reasonableness of the values of its variables and to describe patterns of missing data. Various built-in functions and functions in the Hmisc library are used. At the end some methods for creating data frames \dQuote{from scratch} within \R are presented. The examples below attempt to clarify the separation of operations that are done on a data frame as a whole, operations that are done on a small subset of its variables without attaching the whole data frame, and operations that are done on many variables after attaching the data frame in search position one. It also tries to clarify that for analyzing several separate variables using \R commands that do not support a \code{data} argument, it is helpful to attach the data frame in a search position later than position one. It is often useful to create, modify, and process datasets in the following order. \enumerate{ \item{ Import external data into a data frame (if the raw data do not contain column names, provide these during the import if possible) } \item{ Make global changes to a data frame (e.g., changing variable names) } \item{ Change attributes or values of variables within a data frame } \item{ Do analyses involving the whole data frame (without attaching it)\cr (Data frame still in .Data) } \item{ Do analyses of individual variables (after attaching the data frame in search position two or later) } } } \details{ The examples below use the \code{FEV} dataset from \cite{Rosner 1995}. Almost any dataset would do. The jcetable data are taken from \cite{Galobardes, etal.} Presently, giving a variable the \code{"units"} attribute (using the \pkg{Hmisc} \code{\link{units}} function) only benefits the \pkg{Hmisc} \code{\link{describe}} function and the \pkg{rms} library's version of the \code{link[rms]{Surv}} function. Variables labels defined with the Hmisc \code{\link{label}} function are used by \code{\link{describe}}, \code{\link{summary.formula}}, and many of the plotting functions in \pkg{Hmisc} and \pkg{rms}. } \references{ Alzola CF, Harrell FE (2006): \emph{An Introduction to S and the Hmisc and Design Libraries.} Chapters 3 and 4, \url{https://hbiostat.org/R/doc/sintro.pdf}. Galobardes, et al. (1998), \emph{J Clin Epi} 51:875-881. Rosner B (1995): \emph{Fundamentals of Biostatistics, 4th Edition. } New York: Duxbury Press. } \seealso{ \code{\link{scan}}, \code{\link{read.table}}, \code{\link{cleanup.import}}, \code{\link{sas.get}}, \code{\link{data.frame}}, \code{\link{attach}}, \code{\link{detach}}, \code{\link{describe}}, \code{\link{datadensity}}, \code{\link{plot.data.frame}}, \code{\link{hist.data.frame}}, \code{\link{naclus}}, \code{\link{factor}}, \code{\link{label}}, \code{\link{units}}, \code{\link{names}}, \code{\link{expand.grid}}, \code{\link{summary.formula}}, \code{\link{summary.data.frame}}, \code{\link{casefold}}, \code{\link{edit}}, \code{\link{page}}, \code{\link{plot.data.frame}}, \code{\link{Cs}}, \code{\link{combine.levels}},\code{\link{upData}} } \examples{ \dontrun{ # First, we do steps that create or manipulate the data # frame in its entirety. For S-Plus, these are done with # .Data in search position one (the default at the # start of the session). # # ----------------------------------------------------------------------- # Step 1: Create initial draft of data frame # # We usually begin by importing a dataset from # # another application. ASCII files may be imported # using the scan and read.table functions. SAS # datasets may be imported using the Hmisc sas.get # function (which will carry more attributes from # SAS than using File \dots Import) from the GUI # menus. But for most applications (especially # Excel), File \dots Import will suffice. If using # the GUI, it is often best to provide variable # names during the import process, using the Options # tab, rather than renaming all fields later Of # course, if the data to be imported already have # field names (e.g., in Excel), let S use those # automatically. If using S-Plus, you can use a # command to execute File \dots Import, e.g.: import.data(FileName = "/windows/temp/fev.asc", FileType = "ASCII", DataFrame = "FEV") # Here we name the new data frame FEV rather than # fev, because we wanted to distinguish a variable # in the data frame named fev from the data frame # name. For S-Plus the command will look # instead like the following: FEV <- importData("/tmp/fev.asc") # ----------------------------------------------------------------------- # Step 2: Clean up data frame / make it be more # efficiently stored # # Unless using sas.get to import your dataset # (sas.get already stores data efficiently), it is # usually a good idea to run the data frame through # the Hmisc cleanup.import function to change # numeric variables that are always whole numbers to # be stored as integers, the remaining numerics to # single precision, strange values from Excel to # NAs, and character variables that always contain # legal numeric values to numeric variables. # cleanup.import typically halves the size of the # data frame. If you do not specify any parameters # to cleanup.import, the function assumes that no # numeric variable needs more than 7 significant # digits of precision, so all non-integer-valued # variables will be converted to single precision. FEV <- cleanup.import(FEV) # ----------------------------------------------------------------------- # Step 3: Make global changes to the data frame # # A data frame has attributes that are "external" to # its variables. There are the vector of its # variable names ("names" attribute), the # observation identifiers ("row.names"), and the # "class" (whose value is "data.frame"). The # "names" attribute is the one most commonly in need # of modification. If we had wanted to change all # the variable names to lower case, we could have # specified lowernames=TRUE to the cleanup.import # invocation above, or type names(FEV) <- casefold(names(FEV)) # The upData function can also be used to change # variable names in two ways (see below). # To change names in a non-systematic way we use # other options. Under Windows/NT the most # straigtforward approach is to change the names # interactively. Click on the data frame in the # left panel of the Object Browser, then in the # right pane click twice (slowly) on a variable. # Use the left arrow and other keys to edit the # name. Click outside that name field to commit the # change. You can also rename columns while in a # Data Sheet. To instead use programming commands # to change names, use something like: names(FEV)[6] <- 'smoke' # assumes you know the positions! names(FEV)[names(FEV)=='smoking'] <- 'smoke' names(FEV) <- edit(names(FEV)) # The last example is useful if you are changing # many names. But none of the interactive # approaches such as edit() are handy if you will be # re-importing the dataset after it is updated in # its original application. This problem can be # addressed by saving the new names in a permanent # vector in .Data: new.names <- names(FEV) # Then if the data are re-imported, you can type names(FEV) <- new.names # to rename the variables. # ----------------------------------------------------------------------- # Step 4: Delete unneeded variables # # To delete some of the variables, you can # right-click on variable names in the Object # Browser's right pane, then select Delete. You can # also set variables to have NULL values, which # causes the system to delete them. We don't need # to delete any variables from FEV but suppose we # did need to delete some from mydframe. mydframe$x1 <- NULL mydframe$x2 <- NULL mydframe[c('age','sex')] <- NULL # delete 2 variables mydframe[Cs(age,sex)] <- NULL # same thing # The last example uses the Hmisc short-cut quoting # function Cs. See also the drop parameter to upData. # ----------------------------------------------------------------------- # Step 5: Make changes to individual variables # within the data frame # # After importing data, the resulting variables are # seldom self - documenting, so we commonly need to # change or enhance attributes of individual # variables within the data frame. # # If you are only changing a few variables, it is # efficient to change them directly without # attaching the entire data frame. FEV$sex <- factor(FEV$sex, 0:1, c('female','male')) FEV$smoke <- factor(FEV$smoke, 0:1, c('non-current smoker','current smoker')) units(FEV$age) <- 'years' units(FEV$fev) <- 'L' label(FEV$fev) <- 'Forced Expiratory Volume' units(FEV$height) <- 'inches' # When changing more than one or two variables it is # more convenient change the data frame using the # Hmisc upData function. FEV2 <- upData(FEV, rename=c(smoking='smoke'), # omit if renamed above drop=c('var1','var2'), levels=list(sex =list(female=0,male=1), smoke=list('non-current smoker'=0, 'current smoker'=1)), units=list(age='years', fev='L', height='inches'), labels=list(fev='Forced Expiratory Volume')) # An alternative to levels=list(\dots) is for example # upData(FEV, sex=factor(sex,0:1,c('female','male'))). # # Note that we saved the changed data frame into a # new data frame FEV2. If we were confident of the # correctness of our changes we could have stored # the new data frame on top of the old one, under # the original name FEV. # ----------------------------------------------------------------------- # Step 6: Check the data frame # # The Hmisc describe function is perhaps the first # function that should be used on the new data # frame. It provides documentation of all the # variables and the frequency tabulation, counts of # NAs, and 5 largest and smallest values are # helpful in detecting data errors. Typing # describe(FEV) will write the results to the # current output window. To put the results in a # new window that can persist, even upon exiting # S, we use the page function. The describe # output can be minimized to an icon but kept ready # for guiding later steps of the analysis. page(describe(FEV2), multi=TRUE) # multi=TRUE allows that window to persist while # control is returned to other windows # The new data frame is OK. Store it on top of the # old FEV and then use the graphical user interface # to delete FEV2 (click on it and hit the Delete # key) or type rm(FEV2) after the next statement. FEV <- FEV2 # Next, we can use a variety of other functions to # check and describe all of the variables. As we # are analyzing all or almost all of the variables, # this is best done without attaching the data # frame. Note that plot.data.frame plots inverted # CDFs for continuous variables and dot plots # showing frequency distributions of categorical # ones. summary(FEV) # basic summary function (summary.data.frame) plot(FEV) # plot.data.frame datadensity(FEV) # rug plots and freq. bar charts for all var. hist.data.frame(FEV) # for variables having > 2 values by(FEV, FEV$smoke, summary) # use basic summary function with stratification # ----------------------------------------------------------------------- # Step 7: Do detailed analyses involving individual # variables # # Analyses based on the formula language can use # data= so attaching the data frame may not be # required. This saves memory. Here we use the # Hmisc summary.formula function to compute 5 # statistics on height, stratified separately by age # quartile and by sex. options(width=80) summary(height ~ age + sex, data=FEV, fun=function(y)c(smean.sd(y), smedian.hilow(y,conf.int=.5))) # This computes mean height, S.D., median, outer quartiles fit <- lm(height ~ age*sex, data=FEV) summary(fit) # For this analysis we could also have attached the # data frame in search position 2. For other # analyses, it is mandatory to attach the data frame # unless FEV$ prefixes each variable name. # Important: DO NOT USE attach(FEV, 1) or # attach(FEV, pos=1, \dots) if you are only analyzing # and not changing the variables, unless you really # need to avoid conflicts with variables in search # position 1 that have the same names as the # variables in FEV. Attaching into search position # 1 will cause S-Plus to be more of a memory hog. attach(FEV) # Use e.g. attach(FEV[,Cs(age,sex)]) if you only # want to analyze a small subset of the variables # Use e.g. attach(FEV[FEV$sex=='male',]) to # analyze a subset of the observations summary(height ~ age + sex, fun=function(y)c(smean.sd(y), smedian.hilow(y,conf.int=.5))) fit <- lm(height ~ age*sex) # Run generic summary function on height and fev, # stratified by sex by(data.frame(height,fev), sex, summary) # Cross-classify into 4 sex x smoke groups by(FEV, list(sex,smoke), summary) # Plot 5 quantiles s <- summary(fev ~ age + sex + height, fun=function(y)quantile(y,c(.1,.25,.5,.75,.9))) plot(s, which=1:5, pch=c(1,2,15,2,1), #pch=c('=','[','o',']','='), main='A Discovery', xlab='FEV') # Use the nonparametric bootstrap to compute a # 0.95 confidence interval for the population mean fev smean.cl.boot(fev) # in Hmisc # Use the Statistics \dots Compare Samples \dots One Sample # keys to get a normal-theory-based C.I. Then do it # more manually. The following method assumes that # there are no NAs in fev sd <- sqrt(var(fev)) xbar <- mean(fev) xbar sd n <- length(fev) qt(.975,n-1) # prints 0.975 critical value of t dist. with n-1 d.f. xbar + c(-1,1)*sd/sqrt(n)*qt(.975,n-1) # prints confidence limits # Fit a linear model # fit <- lm(fev ~ other variables \dots) detach() # The last command is only needed if you want to # start operating on another data frame and you want # to get FEV out of the way. # ----------------------------------------------------------------------- # Creating data frames from scratch # # Data frames can be created from within S. To # create a small data frame containing ordinary # data, you can use something like dframe <- data.frame(age=c(10,20,30), sex=c('male','female','male'), stringsAsFactors=TRUE) # You can also create a data frame using the Data # Sheet. Create an empty data frame with the # correct variable names and types, then edit in the # data. dd <- data.frame(age=numeric(0),sex=character(0), stringsAsFactors=TRUE) # The sex variable will be stored as a factor, and # levels will be automatically added to it as you # define new values for sex in the Data Sheet's sex # column. # # When the data frame you need to create is defined # by systematically varying variables (e.g., all # possible combinations of values of each variable), # the expand.grid function is useful for quickly # creating the data. Then you can add # non-systematically-varying variables to the object # created by expand.grid, using programming # statements or editing the Data Sheet. This # process is useful for creating a data frame # representing all the values in a printed table. # In what follows we create a data frame # representing the combinations of values from an 8 # x 2 x 2 x 2 (event x method x sex x what) table, # and add a non-systematic variable percent to the # data. jcetable <- expand.grid( event=c('Wheezing at any time', 'Wheezing and breathless', 'Wheezing without a cold', 'Waking with tightness in the chest', 'Waking with shortness of breath', 'Waking with an attack of cough', 'Attack of asthma', 'Use of medication'), method=c('Mail','Telephone'), sex=c('Male','Female'), what=c('Sensitivity','Specificity')) jcetable$percent <- c(756,618,706,422,356,578,289,333, 576,421,789,273,273,212,212,212, 613,763,713,403,377,541,290,226, 613,684,632,290,387,613,258,129, 656,597,438,780,732,679,938,919, 714,600,494,877,850,703,963,987, 755,420,480,794,779,647,956,941, 766,423,500,833,833,604,955,986) / 10 # In jcetable, event varies most rapidly, then # method, then sex, and what. } } \keyword{data} \keyword{manip} \keyword{programming} \keyword{interface} \keyword{htest} \concept{overview} Hmisc/man/R2Measures.Rd0000644000176200001440000001267714225271705014371 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/R2Measures.r \name{R2Measures} \alias{R2Measures} \title{R2Measures} \usage{ R2Measures(lr, p, n, ess = NULL, padj = 1) } \arguments{ \item{lr}{likelihoood ratio chi-square statistic} \item{p}{number of non-intercepts in the model that achieved \code{lr}} \item{n}{raw number of observations} \item{ess}{if a single number, is the effective sample size. If a vector of numbers is assumed to be the frequency tabulation of all distinct values of the outcome variable, from which the effective sample size is computed.} \item{padj}{set to 2 to use the classical adjusted R^2 penalty, 1 (the default) to subtract \code{p} from \code{lr}} } \value{ named vector of R2 measures. The notation for results is \code{R^2(p, n)} where the \code{p} component is empty for unadjusted estimates and \code{n} is the sample size used (actual sample size for first measures, effective sample size for remaining ones). For indexes that are not adjusted, only \code{n} appears. } \description{ Generalized R^2 Measures } \details{ Computes various generalized R^2 measures related to the Maddala-Cox-Snell (MCS) R^2 for regression models fitted with maximum likelihood. The original MCS R^2 is labeled \code{R2} in the result. This measure uses the raw sample size \code{n} and does not penalize for the number of free parameters, so it can be rewarded for overfitting. A measure adjusted for the number of fitted regression coefficients \code{p} uses the analogy to R^2 in linear models by computing \code{1 - exp(- lr / n) * (n-1)/(n-p-1)} if \code{padj=2}, which is approximately \code{1 - exp(- (lr - p) / n)}, the version used if \code{padj=1} (the default). The latter measure is appealing because the expected value of the likelihood ratio chi-square statistic \code{lr} is \code{p} under the global null hypothesis of no predictors being associated with the response variable. See \url{https://hbiostat.org/bib/r2.html} for more details. It is well known that in logistic regression the MCS R^2 cannot achieve a value of 1.0 even with a perfect model, which prompted Nagelkerke to divide the R^2 measure by its maximum attainable value. This is not necessarily the best recalibration of R^2 throughout its range. An alternative is to use the formulas above but to replace the raw sample size \code{n} with the effective sample size, which for data with many ties can be significantly lower than the number of observations. As used in the \code{popower()} and \code{describe()} functions, in the context of a Wilcoxon test or the proportional odds model, the effective sample size is \code{n * (1 - f)} where \code{f} is the sums of cubes of the proportion of observations at each distict value of the response variable. Whitehead derived this from an approximation to the variance of a log odds ratio in a proportional odds model. To obtain R^2 measures using the effective sample size, either provide \code{ess} as a single number specifying the effective sample size, or specify a vector of frequencies of distinct Y values from which the effective sample size will be computed. In the context of survival analysis, the single number effective sample size you may wish to specify is the number of uncensored observations. This is exactly correct when estimating the hazard rate from a simple exponential distribution or when using the Cox PH/log-rank test. For failure time distributions with a very high early hazard, censored observations contain enough information that the effective sample size is greater than the number of events. See Benedetti et al, 1982. If the effective sample size equals the raw sample size, measures involving the effective sample size are set to \code{NA}. } \examples{ x <- c(rep(0, 50), rep(1, 50)) y <- x # f <- lrm(y ~ x) # f # Nagelkerke R^2=1.0 # lr <- f$stats['Model L.R.'] # 1 - exp(- lr / 100) # Maddala-Cox-Snell (MCS) 0.75 lr <- 138.6267 # manually so don't need rms package R2Measures(lr, 1, 100, c(50, 50)) # 0.84 Effective n=75 R2Measures(lr, 1, 100, 50) # 0.94 # MCS requires unreasonable effective sample size = minimum outcome # frequency to get close to the 1.0 that Nagelkerke R^2 achieves } \references{ Smith TJ and McKenna CM (2013): A comparison of logistic regression pseudo R^2 indices. Multiple Linear Regression Viewpoints 39:17-26. \url{https://www.glmj.org/archives/articles/Smith_v39n2.pdf} Benedetti JK, et al (1982): Effective sample size for tests of censored survival data. Biometrika 69:343--349. Mittlbock M, Schemper M (1996): Explained variation for logistic regression. Stat in Med 15:1987-1997. Date, S: R-squared, adjusted R-squared and pseudo R-squared. \url{https://timeseriesreasoning.com/contents/r-squared-adjusted-r-squared-pseudo-r-squared/} UCLA: What are pseudo R-squareds? \url{https://stats.oarc.ucla.edu/other/mult-pkg/faq/general/faq-what-are-pseudo-r-squareds/} Allison P (2013): What's the beset R-squared for logistic regression? \url{https://statisticalhorizons.com/r2logistic/} Menard S (2000): Coefficients of determination for multiple logistic regression analysis. The Am Statistician 54:17-24. Whitehead J (1993): Sample size calculations for ordered categorical data. Stat in Med 12:2257-2271. See errata (1994) 13:871 and letter to the editor by Julious SA, Campbell MJ (1996) 15:1065-1066 showing that for 2-category Y the Whitehead sample size formula agrees closely with the usual formula for comparing two proportions. } \author{ Frank Harrell } Hmisc/man/scat1d.Rd0000644000176200001440000006050414275453631013555 0ustar liggesusers\name{scat1d} \alias{scat1d} \alias{jitter2} \alias{jitter2.default} \alias{jitter2.data.frame} \alias{datadensity} \alias{datadensity.data.frame} \alias{histSpike} \alias{histSpikeg} \alias{ecdfpM} \title{One-Dimensional Scatter Diagram, Spike Histogram, or Density} \description{ \code{scat1d} adds tick marks (bar codes. rug plot) on any of the four sides of an existing plot, corresponding with non-missing values of a vector \code{x}. This is used to show the data density. Can also place the tick marks along a curve by specifying y-coordinates to go along with the \code{x} values. If any two values of \code{x} are within \eqn{\code{eps}*w} of each other, where \code{eps} defaults to .001 and w is the span of the intended axis, values of \code{x} are jittered by adding a value uniformly distributed in \eqn{[-\code{jitfrac}*w, \code{jitfrac}*w]}, where \code{jitfrac} defaults to .008. Specifying \code{preserve=TRUE} invokes \code{jitter2} with a different logic of jittering. Allows plotting random sub-segments to handle very large \code{x} vectors (see\code{tfrac}). \code{jitter2} is a generic method for jittering, which does not add random noise. It retains unique values and ranks, and randomly spreads duplicate values at equidistant positions within limits of enclosing values. \code{jitter2} is especially useful for numeric variables with discrete values, like rating scales. Missing values are allowed and are returned. Currently implemented methods are \code{jitter2.default} for vectors and \code{jitter2.data.frame} which returns a data.frame with each numeric column jittered. \code{datadensity} is a generic method used to show data densities in more complex situations. Here, another \code{datadensity} method is defined for data frames. Depending on the \code{which} argument, some or all of the variables in a data frame will be displayed, with \code{scat1d} used to display continuous variables and, by default, bars used to display frequencies of categorical, character, or discrete numeric variables. For such variables, when the total length of value labels exceeds 200, only the first few characters from each level are used. By default, \code{datadensity.data.frame} will construct one axis (i.e., one strip) per variable in the data frame. Variable names appear to the left of the axes, and the number of missing values (if greater than zero) appear to the right of the axes. An optional \code{group} variable can be used for stratification, where the different strata are depicted using different colors. If the \code{q} vector is specified, the desired quantiles (over all \code{group}s) are displayed with solid triangles below each axis. When the sample size exceeds 2000 (this value may be modified using the \code{nhistSpike} argument, \code{datadensity} calls \code{histSpike} instead of \code{scat1d} to show the data density for numeric variables. This results in a histogram-like display that makes the resulting graphics file much smaller. In this case, \code{datadensity} uses the \code{minf} argument (see below) so that very infrequent data values will not be lost on the variable's axis, although this will slightly distortthe histogram. \code{histSpike} is another method for showing a high-resolution data distribution that is particularly good for very large datasets (say \eqn{\code{n} > 1000}). By default, \code{histSpike} bins the continuous \code{x} variable into 100 equal-width bins and then computes the frequency counts within bins (if \code{n} does not exceed 10, no binning is done). If \code{add=FALSE} (the default), the function displays either proportions or frequencies as in a vertical histogram. Instead of bars, spikes are used to depict the frequencies. If \code{add=FALSE}, the function assumes you are adding small density displays that are intended to take up a small amount of space in the margins of the overall plot. The \code{frac} argument is used as with \code{scat1d} to determine the relative length of the whole plot that is used to represent the maximum frequency. No jittering is done by \code{histSpike}. \code{histSpike} can also graph a kernel density estimate for \code{x}, or add a small density curve to any of 4 sides of an existing plot. When \code{y} or \code{curve} is specified, the density or spikes are drawn with respect to the curve rather than the x-axis. \code{histSpikeg} is similar to \code{histSpike} but is for adding layers to a \code{ggplot2} graphics object or traces to a \code{plotly} object. \code{histSpikeg} can also add \code{lowess} curves to the plot. \code{ecdfpM} makes a \code{plotly} graph or series of graphs showing possibly superposed empirical cumulative distribution functions. } \usage{ scat1d(x, side=3, frac=0.02, jitfrac=0.008, tfrac, eps=ifelse(preserve,0,.001), lwd=0.1, col=par("col"), y=NULL, curve=NULL, bottom.align=FALSE, preserve=FALSE, fill=1/3, limit=TRUE, nhistSpike=2000, nint=100, type=c('proportion','count','density'), grid=FALSE, \dots) jitter2(x, \dots) \method{jitter2}{default}(x, fill=1/3, limit=TRUE, eps=0, presorted=FALSE, \dots) \method{jitter2}{data.frame}(x, \dots) datadensity(object, \dots) \method{datadensity}{data.frame}(object, group, which=c("all","continuous","categorical"), method.cat=c("bar","freq"), col.group=1:10, n.unique=10, show.na=TRUE, nint=1, naxes, q, bottom.align=nint>1, cex.axis=sc(.5,.3), cex.var=sc(.8,.3), lmgp=NULL, tck=sc(-.009,-.002), ranges=NULL, labels=NULL, \dots) # sc(a,b) means default to a if number of axes <= 3, b if >=50, use # linear interpolation within 3-50 histSpike(x, side=1, nint=100, bins=NULL, frac=.05, minf=NULL, mult.width=1, type=c('proportion','count','density'), xlim=range(x), ylim=c(0,max(f)), xlab=deparse(substitute(x)), ylab=switch(type,proportion='Proportion', count ='Frequency', density ='Density'), y=NULL, curve=NULL, add=FALSE, minimal=FALSE, bottom.align=type=='density', col=par('col'), lwd=par('lwd'), grid=FALSE, \dots) histSpikeg(formula=NULL, predictions=NULL, data, plotly=NULL, lowess=FALSE, xlim=NULL, ylim=NULL, side=1, nint=100, frac=function(f) 0.01 + 0.02*sqrt(f-1)/sqrt(max(f,2)-1), span=3/4, histcol='black', showlegend=TRUE) ecdfpM(x, group=NULL, what=c('F','1-F','f','1-f'), q=NULL, extra=c(0.025, 0.025), xlab=NULL, ylab=NULL, height=NULL, width=NULL, colors=NULL, nrows=NULL, ncols=NULL, ...) } \arguments{ \item{x}{ a vector of numeric data, or a data frame (for \code{jitter2} or \code{ecdfpM}) } \item{object}{ a data frame or list (even with unequal number of observations per variable, as long as \code{group} is notspecified) } \item{side}{ axis side to use (1=bottom (default for \code{histSpike}), 2=left, 3=top (default for \code{scat1d}), 4=right) } \item{frac}{ fraction of smaller of vertical and horizontal axes for tick mark lengths. Can be negative to move tick marks outside of plot. For \code{histSpike}, this is the relative y-direction length to be used for the largest frequency. When \code{scat1d} calls \code{histSpike}, it multiplies its \code{frac} argument by 2.5. For \code{histSpikeg}, \code{frac} is a function of \code{f}, the vector of all frequencies. The default function scales tick marks so that they are between 0.01 and 0.03 of the y range, linearly scaled in the square root of the frequency less one. } \item{jitfrac}{ fraction of axis for jittering. If \eqn{\code{jitfrac} \le 0}{\code{jitfrac} <= 0}, no jittering is done. If \code{preserve=TRUE}, the amount of jittering is independent of jitfrac. } \item{tfrac}{ Fraction of tick mark to actually draw. If \eqn{\code{tfrac}<1}, will draw a random fraction \code{tfrac} of the line segment at each point. This is useful for very large samples or ones with some very dense points. The default value is 1 if the number of non-missing observations \code{n} is less than 125, and \eqn{\max{(.1, 125/n)}} otherwise. } \item{eps}{ fraction of axis for determining overlapping points in \code{x}. For \code{preserve=TRUE} the default is 0 and original unique values are retained, bigger values of eps tends to bias observations from dense to sparse regions, but ranks are still preserved. } \item{lwd}{ line width for tick marks, passed to \code{segments} } \item{col}{ color for tick marks, passed to \code{segments} } \item{y}{ specify a vector the same length as \code{x} to draw tick marks along a curve instead of by one of the axes. The \code{y} values are often predicted values from a model. The \code{side} argument is ignored when \code{y} is given. If the curve is already represented as a table look-up, you may specify it using the \code{curve} argument instead. \code{y} may be a scalar to use a constant verticalplacement. } \item{curve}{ a list containing elements \code{x} and \code{y} for which linear interpolation is used to derive \code{y} values corresponding to values of \code{x}. This results in tick marks being drawn along the curve. For \code{histSpike}, interpolated \code{y} values are derived for binmidpoints. } \item{minimal}{for \code{histSpike} set \code{minimal=TRUE} to draw a minimalist spike histogram with no y-axis. This works best when produce graphics images that are short, e.g., have a height of two inches. \code{add} is forced to be \code{FALSE} in this case so that a standalone graph is produced. Only base graphics are used.} \item{bottom.align}{ set to \code{TRUE} to have the bottoms of tick marks (for \code{side=1} or \code{side=3}) aligned at the y-coordinate. The default behavior is to center the tick marks. For \code{datadensity.data.frame}, \code{bottom.align} defaults to \code{TRUE} if \code{nint>1}. In other words, if you are only labeling the first and last axis tick mark, the \code{scat1d} tick marks are centered on the variable's axis. } \item{preserve}{ set to \code{TRUE} to invoke \code{jitter2} } \item{fill}{ maximum fraction of the axis filled by jittered values. If \code{d} are duplicated values between a lower value l and upper value u, then d will be spread within \eqn{\pm \code{fill}*\min{(u-d,d-l)}/2}{ +/- \code{fill}*min(u-d,d-l)/2}. } \item{limit}{ specifies a limit for maximum shift in jittered values. Duplicate values will be spread within \eqn{\pm\code{fill}*\min{(u-d,d-l)}/2}{ +/- \code{fill}*min(u-d,d-l)/2}. The default \code{TRUE} restricts jittering to the smallest \eqn{\min{(u-d,d-l)}/2} observed and results in equal amount of jittering for all d. Setting to \code{FALSE} allows for locally different amount of jittering, using maximum space available. } \item{nhistSpike}{ If the number of observations exceeds or equals \code{nhistSpike}, \code{scat1d} will automatically call \code{histSpike} to draw the data density, to prevent the graphics file from being too large. } \item{type}{ used by or passed to \code{histSpike}. Set to \code{"count"} to display frequency counts rather than relative frequencies, or \code{"density"} to display a kernel density estimate computed using the \code{density} function. } \item{grid}{ set to \code{TRUE} if the \R \code{grid} package is in effect for the current plot } \item{nint}{ number of intervals to divide each continuous variable's axis for \code{datadensity}. For \code{histSpike}, is the number of equal-width intervals for which to bin \code{x}, and if instead \code{nint} is a character string (e.g.,\code{nint="all"}), the frequency tabulation is done with no binning. In other words, frequencies for all unique values of \code{x} are derived and plotted. For \code{histSpikeg}, if \code{x} has no more than \code{nint} unique values, all observed values are used, otherwise the data are rounded before tabulation so that there are no more than \code{nint} intervals. For \code{histSpike}, \code{nint} is ignored if \code{bins} is given. } \item{bins}{for \code{histSpike} specifies the actual cutpoints to use for binning \code{x}. The default is to use \code{nint} in conjunction with \code{xlim}.} \item{\dots}{ optional arguments passed to \code{scat1d} from \code{datadensity} or to \code{histSpike} from \code{scat1d}. For \code{histSpikep} are passed to the \code{lines} list to \code{add_trace}. For \code{ecdfpM} these arguments are passed to \code{add_lines}. } \item{presorted}{ set to \code{TRUE} to prevent from sorting for determining the order \eqn{l 4} are stored as S double precision numerics, which allow for the same precision as a \acronym{SAS} \preformatted{LENGTH} 8 variable. Set \code{force.single = TRUE} to store every numeric variable in single precision (7 digits of precision). This option is useful when the creator of the \acronym{SAS} dataset has failed to use a \preformatted{LENGTH} statement. R does not have single precision, so no attempt is made to convert to single if running R. } \item{dates}{ One of the character strings \code{"sas"}, \code{"yearfrac"}, \code{"yearfrac2"}, \code{"yymmdd"}. If a \acronym{SAS} variable has a date format (one of \code{"DATE"}, \code{"MMDDYY"}, \code{"YYMMDD"}, \code{"DDMMYY"}, \code{"YYQ"}, \code{"MONYY"}, \code{"JULIAN"}), it will be converted to the format specified by \code{dates} before being given to S. \code{"sas"} gives days from 1/1/1960 (from 1/1/1970 if using \code{chron}), \code{"yearfrac"} gives days from 1/1/1900 divided by 365.25, \code{"yearfrac2"} gives year plus fraction of current year, and \code{"yymmdd"} gives a 6 digit number \preformatted{YYMMDD} (year\%\%100, month, day). Note that \R will store these as numbers, not as character strings. If \code{dates="sas"} and a variable has one of the \acronym{SAS} date formats listed above, the variable will be given a class of \samp{date} to work with Terry Therneau's implementation of the \samp{date} class in S. If the \code{chron} package or \code{timeDate} function is available, these are used instead. } \item{keep.log}{ logical flag: if \code{FALSE}, delete the \acronym{SAS} log file upon completion. } \item{log.file}{ the name of the \acronym{SAS} log file. } \item{macro}{ the name of an S object in the current search path that contains the text of the \acronym{SAS} macro called by \R. The \R object is a character vector that can be edited using for example \code{sas.get.macro <- editor(sas.get.macro)}. } \item{data.frame.out}{ logical flag: if \code{TRUE}, the return value will be an S data frame, otherwise it will be a list. } \item{clean.up}{ logical flag: if \code{TRUE}, remove all temporary files when finished. You may want to keep these while debugging the \acronym{SAS} macro. Not needed for \R. } \item{quiet}{ logical flag: if \code{FALSE}, print the contents of the \acronym{SAS} log file if there has been an error. } \item{temp}{ the prefix to use for the temporary files. Two characters will be added to this, the resulting name must fit on your file system. } \item{sasprog}{ the name of the system command to invoke \acronym{SAS} } \item{uncompress}{ set to \code{TRUE} to automatically invoke the \acronym{UNIX} \command{gunzip} command (if \file{\var{member}.ssd01.gz} exists) or the \command{uncompress} command (if \file{\var{member}.ssd01.Z} exists) to uncompress the \acronym{SAS} dataset before proceeding. This assumes you have the file permissions to allow uncompressing in place. If the file is already uncompressed, this option is ignored. } \item{pos}{ by default, a list or data frame which contains all the variables is returned. If you specify \code{pos}, each individual variable is placed into a separate object (whose name is the name of the variable) using the \code{assign} function with the \code{pos} argument. For example, you can put each variable in its own file in a directory, which in some cases may save memory over attaching a data frame. } \item{code}{ a special missing value code (\samp{A} through \samp{Z} or \samp{\_}) to check against. If \code{code} is omitted, \code{is.special.miss} will return a \code{TRUE} for each observation that has any special missing value. } \item{defaultencoding}{ encoding to assume if the SAS dataset does not specify one. Defaults to "latin1". } \item{var.case}{default is to change case of SAS variable names to lower case. Specify alternatively \code{"upper"} or \code{"preserve"}.} \item{object}{ a variable in a data frame created by \code{sas.get} } \item{\dots}{ignored} } \value{ if \code{data.frame.out} is \code{TRUE}, the output will be a data frame resembling the \acronym{SAS} dataset. If \code{id} was specified, that column of the data frame will be used as the row names of the data frame. Each variable in the data frame or vector in the list will have the attributes \code{label} and \code{format} containing \acronym{SAS} labels and formats. Underscores in formats are converted to periods. Formats for character variables have \code{\$} placed in front of their names. If \code{formats} is \code{TRUE} and there are any appropriate format definitions in \code{format.library}, the returned object will have attribute \code{formats} containing lists named the same as the format names (with periods substituted for underscores and character formats prefixed by \code{\$}). Each of these lists has a vector called \code{values} and one called \code{labels} with the \preformatted{PROC FORMAT; VALUE ...} definitions. If \code{data.frame.out} is \code{FALSE}, the output will be a list of vectors, each containing a variable from the \acronym{SAS} dataset. If \code{id} was specified, that element of the list will be used as the \code{id} attribute of the entire list. } \section{Side Effects}{ if a \acronym{SAS} error occurs and \code{quiet} is \code{FALSE}, then the \acronym{SAS} log file will be printed under the control of the \command{less} pager. } \details{ If you specify \code{special.miss = TRUE} and there are no special missing values in the data \acronym{SAS} dataset, the \acronym{SAS} step will bomb. For variables having a \preformatted{PROC FORMAT VALUE} format with some of the levels undefined, \code{sas.get} will interpret those values as \code{NA} if you are using \code{recode}. The \acronym{SAS} macro \file{sas_get} uses record lengths of up to 4096 in two places. If you are exporting records that are very long (because of a large number of variables and/or long character variables), you may want to edit these \preformatted{LRECL}s to quadruple them, for example. } \note{ You must be able to run \acronym{SAS} (by typing \command{sas}) on your system. If the S command \code{!sas} does not start \acronym{SAS}, then this function cannot work. If you are reading time or date-time variables, you will need to execute the command \code{library(chron)} to print those variables or the data frame if the \code{timeDate} function is not available. } \section{BACKGROUND}{ The references cited below explain the structure of \acronym{SAS} datasets and how they are stored under \acronym{UNIX}. See \emph{\acronym{SAS} Language} for a discussion of the \dQuote{subsetting if} statement. } \author{ Terry Therneau, Mayo Clinic \cr Frank Harrell, Vanderbilt University \cr Bill Dunlap, University of Washington and Insightful Corporation \cr Michael W. Kattan, Cleveland Clinic Foundation \cr Reinhold Koch (encoding) } \references{ \acronym{SAS} Institute Inc. (1990). \emph{\acronym{SAS} Language: Reference, Version 6.} First Edition. \acronym{SAS} Institute Inc., Cary, North Carolina. \acronym{SAS} Institute Inc. (1988). \acronym{SAS} Technical Report P-176, \emph{Using the \acronym{SAS} System, Release 6.03, under UNIX Operating Systems and Derivatives. } \acronym{SAS} Institute Inc., Cary, North Carolina. \acronym{SAS} Institute Inc. (1985). \emph{\acronym{SAS} Introductory Guide.} Third Edition. \acronym{SAS} Institute Inc., Cary, North Carolina. } \seealso{ \code{\link{data.frame}}, \code{\link[Hmisc]{describe}}, \code{\link[Hmisc]{label}}, \code{\link[Hmisc]{upData}}, \code{\link[Hmisc:upData]{cleanup.import}} } \examples{ \dontrun{ sas.contents("saslib", "mice") # [1] "dose" "ld50" "strain" "lab_no" attr(, "n"): # [1] 117 mice <- sas.get("saslib", mem="mice", var=c("dose", "strain", "ld50")) plot(mice$dose, mice$ld50) nude.mice <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", ifs="if strain='nude'") nude.mice.dl <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", var=c("dose", "ld50"), ifs="if strain='nude'") # Get a dataset from current directory, recode PROC FORMAT; VALUE \dots # variables into factors with labels of the form "good(1)" "better(2)", # get special missing values, recode missing codes .D and .R into new # factor levels "Don't know" and "Refused to answer" for variable q1 d <- sas.get(".", "mydata", recode=2, special.miss=TRUE) attach(d) nl <- length(levels(q1)) lev <- c(levels(q1), "Don't know", "Refused") q1.new <- as.integer(q1) q1.new[is.special.miss(q1,"D")] <- nl+1 q1.new[is.special.miss(q1,"R")] <- nl+2 q1.new <- factor(q1.new, 1:(nl+2), lev) # Note: would like to use factor() in place of as.integer \dots but # factor in this case adds "NA" as a category level d <- sas.get(".", "mydata") sas.codes(d$x) # for PROC FORMATted variables returns original data codes d$x <- code.levels(d$x) # or attach(d); x <- code.levels(x) # This makes levels such as "good" "better" "best" into e.g. # "1:good" "2:better" "3:best", if the original SAS values were 1,2,3 # Retrieve the same variables from another dataset (or an update of # the original dataset) mydata2 <- sas.get('mydata2', var=names(d)) # This only works if none of the original SAS variable names contained _ mydata2 <- cleanup.import(mydata2) # will make true integer variables # Code from Don MacQueen to generate SAS dataset to test import of # date, time, date-time variables # data ssd.test; # d1='3mar2002'd ; # dt1='3mar2002 9:31:02'dt; # t1='11:13:45't; # output; # # d1='3jun2002'd ; # dt1='3jun2002 9:42:07'dt; # t1='11:14:13't; # output; # format d1 mmddyy10. dt1 datetime. t1 time.; # run; } } \keyword{interface} \keyword{manip} % Converted by Sd2Rd version 1.21. Hmisc/man/areg.Rd0000644000176200001440000002046213714234051013301 0ustar liggesusers\name{areg} \alias{areg} \alias{print.areg} \alias{predict.areg} \alias{plot.areg} \title{Additive Regression with Optimal Transformations on Both Sides using Canonical Variates} \description{ Expands continuous variables into restricted cubic spline bases and categorical variables into dummy variables and fits a multivariate equation using canonical variates. This finds optimum transformations that maximize \eqn{R^2}. Optionally, the bootstrap is used to estimate the covariance matrix of both left- and right-hand-side transformation parameters, and to estimate the bias in the \eqn{R^2} due to overfitting and compute the bootstrap optimism-corrected \eqn{R^2}. Cross-validation can also be used to get an unbiased estimate of \eqn{R^2} but this is not as precise as the bootstrap estimate. The bootstrap and cross-validation may also used to get estimates of mean and median absolute error in predicted values on the original \code{y} scale. These two estimates are perhaps the best ones for gauging the accuracy of a flexible model, because it is difficult to compare \eqn{R^2} under different y-transformations, and because \eqn{R^2} allows for an out-of-sample recalibration (i.e., it only measures relative errors). Note that uncertainty about the proper transformation of \code{y} causes an enormous amount of model uncertainty. When the transformation for \code{y} is estimated from the data a high variance in predicted values on the original \code{y} scale may result, especially if the true transformation is linear. Comparing bootstrap or cross-validated mean absolute errors with and without restricted the \code{y} transform to be linear (\code{ytype='l'}) may help the analyst choose the proper model complexity. } \usage{ areg(x, y, xtype = NULL, ytype = NULL, nk = 4, B = 0, na.rm = TRUE, tolerance = NULL, crossval = NULL) \method{print}{areg}(x, digits=4, \dots) \method{plot}{areg}(x, whichx = 1:ncol(x$x), \dots) \method{predict}{areg}(object, x, type=c('lp','fitted','x'), what=c('all','sample'), \dots) } \arguments{ \item{x}{ A single predictor or a matrix of predictors. Categorical predictors are required to be coded as integers (as \code{factor} does internally). For \code{predict}, \code{x} is a data matrix with the same integer codes that were originally used for categorical variables. } \item{y}{a \code{factor}, categorical, character, or numeric response variable} \item{xtype}{ a vector of one-letter character codes specifying how each predictor is to be modeled, in order of columns of \code{x}. The codes are \code{"s"} for smooth function (using restricted cubic splines), \code{"l"} for no transformation (linear), or \code{"c"} for categorical (to cause expansion into dummy variables). Default is \code{"s"} if \code{nk > 0} and \code{"l"} if \code{nk=0}. } \item{ytype}{same coding as for \code{xtype}. Default is \code{"s"} for a numeric variable with more than two unique values, \code{"l"} for a binary numeric variable, and \code{"c"} for a factor, categorical, or character variable.} \item{nk}{number of knots, 0 for linear, or 3 or more. Default is 4 which will fit 3 parameters to continuous variables (one linear term and two nonlinear terms)} \item{B}{number of bootstrap resamples used to estimate covariance matrices of transformation parameters. Default is no bootstrapping.} \item{na.rm}{set to \code{FALSE} if you are sure that observations with \code{NA}s have already been removed} \item{tolerance}{singularity tolerance. List source code for \code{lm.fit.qr.bare} for details.} \item{crossval}{set to a positive integer k to compute k-fold cross-validated R-squared (square of first canonical correlation) and mean and median absolute error of predictions on the original scale} \item{digits}{number of digits to use in formatting for printing} \item{object}{an object created by \code{areg}} \item{whichx}{integer or character vector specifying which predictors are to have their transformations plotted (default is all). The \code{y} transformation is always plotted.} \item{type}{tells \code{predict} whether to obtain predicted untransformed \code{y} (\code{type='lp'}, the default) or predicted \code{y} on the original scale (\code{type='fitted'}), or the design matrix for the right-hand side (\code{type='x'}).} \item{what}{When the \code{y}-transform is non-monotonic you may specify \code{what='sample'} to \code{predict} to obtain a random sample of \code{y} values on the original scale instead of a matrix of all \code{y}-inverses. See \code{\link{inverseFunction}}.} \item{\dots}{arguments passed to the plot function.} } \details{ \code{areg} is a competitor of \code{ace} in the \code{acepack} package. Transformations from \code{ace} are seldom smooth enough and are often overfitted. With \code{areg} the complexity can be controlled with the \code{nk} parameter, and predicted values are easy to obtain because parametric functions are fitted. If one side of the equation has a categorical variable with more than two categories and the other side has a continuous variable not assumed to act linearly, larger sample sizes are needed to reliably estimate transformations, as it is difficult to optimally score categorical variables to maximize \eqn{R^2} against a simultaneously optimally transformed continuous variable. } \value{ a list of class \code{"areg"} containing many objects } \references{Breiman and Friedman, Journal of the American Statistical Association (September, 1985).} \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{\code{\link{cancor}},\code{\link[acepack]{ace}}, \code{\link{transcan}}} \examples{ set.seed(1) ns <- c(30,300,3000) for(n in ns) { y <- sample(1:5, n, TRUE) x <- abs(y-3) + runif(n) par(mfrow=c(3,4)) for(k in c(0,3:5)) { z <- areg(x, y, ytype='c', nk=k) plot(x, z$tx) title(paste('R2=',format(z$rsquared))) tapply(z$ty, y, range) a <- tapply(x,y,mean) b <- tapply(z$ty,y,mean) plot(a,b) abline(lsfit(a,b)) # Should get same result to within linear transformation if reverse x and y w <- areg(y, x, xtype='c', nk=k) plot(z$ty, w$tx) title(paste('R2=',format(w$rsquared))) abline(lsfit(z$ty, w$tx)) } } par(mfrow=c(2,2)) # Example where one category in y differs from others but only in variance of x n <- 50 y <- sample(1:5,n,TRUE) x <- rnorm(n) x[y==1] <- rnorm(sum(y==1), 0, 5) z <- areg(x,y,xtype='l',ytype='c') z plot(z) z <- areg(x,y,ytype='c') z plot(z) \dontrun{ # Examine overfitting when true transformations are linear par(mfrow=c(4,3)) for(n in c(200,2000)) { x <- rnorm(n); y <- rnorm(n) + x for(nk in c(0,3,5)) { z <- areg(x, y, nk=nk, crossval=10, B=100) print(z) plot(z) title(paste('n=',n)) } } par(mfrow=c(1,1)) # Underfitting when true transformation is quadratic but overfitting # when y is allowed to be transformed set.seed(49) n <- 200 x <- rnorm(n); y <- rnorm(n) + .5*x^2 #areg(x, y, nk=0, crossval=10, B=100) #areg(x, y, nk=4, ytype='l', crossval=10, B=100) z <- areg(x, y, nk=4) #, crossval=10, B=100) z # Plot x vs. predicted value on original scale. Since y-transform is # not monotonic, there are multiple y-inverses xx <- seq(-3.5,3.5,length=1000) yhat <- predict(z, xx, type='fitted') plot(x, y, xlim=c(-3.5,3.5)) for(j in 1:ncol(yhat)) lines(xx, yhat[,j], col=j) # Plot a random sample of possible y inverses yhats <- predict(z, xx, type='fitted', what='sample') points(xx, yhats, pch=2) } # True transformation of x1 is quadratic, y is linear n <- 200 x1 <- rnorm(n); x2 <- rnorm(n); y <- rnorm(n) + x1^2 z <- areg(cbind(x1,x2),y,xtype=c('s','l'),nk=3) par(mfrow=c(2,2)) plot(z) # y transformation is inverse quadratic but areg gets the same answer by # making x1 quadratic n <- 5000 x1 <- rnorm(n); x2 <- rnorm(n); y <- (x1 + rnorm(n))^2 z <- areg(cbind(x1,x2),y,nk=5) par(mfrow=c(2,2)) plot(z) # Overfit 20 predictors when no true relationships exist n <- 1000 x <- matrix(runif(n*20),n,20) y <- rnorm(n) z <- areg(x, y, nk=5) # add crossval=4 to expose the problem # Test predict function n <- 50 x <- rnorm(n) y <- rnorm(n) + x g <- sample(1:3, n, TRUE) z <- areg(cbind(x,g),y,xtype=c('s','c')) range(predict(z, cbind(x,g)) - z$linear.predictors) } \keyword{smooth} \keyword{regression} \keyword{multivariate} \keyword{models} \concept{bootstrap} Hmisc/man/upData.Rd0000644000176200001440000003145114252142701013577 0ustar liggesusers\name{upData} \alias{cleanup.import} \alias{upData} \alias{dataframeReduce} \title{ Update a Data Frame or Cleanup a Data Frame after Importing } \description{ \code{cleanup.import} will correct errors and shrink the size of data frames. By default, double precision numeric variables are changed to integer when they contain no fractional components. Infinite values or values greater than 1e20 in absolute value are set to NA. This solves problems of importing Excel spreadsheets that contain occasional character values for numeric columns, as S converts these to \code{Inf} without warning. There is also an option to convert variable names to lower case and to add labels to variables. The latter can be made easier by importing a CNTLOUT dataset created by SAS PROC FORMAT and using the \code{sasdict} option as shown in the example below. \code{cleanup.import} can also transform character or factor variables to dates. \code{upData} is a function facilitating the updating of a data frame without attaching it in search position one. New variables can be added, old variables can be modified, variables can be removed or renamed, and \code{"labels"} and \code{"units"} attributes can be provided. Observations can be subsetted. Various checks are made for errors and inconsistencies, with warnings issued to help the user. Levels of factor variables can be replaced, especially using the \code{list} notation of the standard \code{merge.levels} function. Unless \code{force.single} is set to \code{FALSE}, \code{upData} also converts double precision vectors to integer if no fractional values are present in a vector. \code{upData} is also used to process R workspace objects created by StatTransfer, which puts variable and value labels as attributes on the data frame rather than on each variable. If such attributes are present, they are used to define all the labels and value labels (through conversion to factor variables) before any label changes take place, and \code{force.single} is set to a default of \code{FALSE}, as StatTransfer already does conversion to integer. Variables having labels but not classed \code{"labelled"} (e.g., data imported using the \code{haven} package) have that class added to them by \code{upData}. The \code{dataframeReduce} function removes variables from a data frame that are problematic for certain analyses. Variables can be removed because the fraction of missing values exceeds a threshold, because they are character or categorical variables having too many levels, or because they are binary and have too small a prevalence in one of the two values. Categorical variables can also have their levels combined when a level is of low prevalence. A data frame listing actions take is return as attribute \code{"info"} to the main returned data frame. } \usage{ cleanup.import(obj, labels, lowernames=FALSE, force.single=TRUE, force.numeric=TRUE, rmnames=TRUE, big=1e20, sasdict, print, datevars=NULL, datetimevars=NULL, dateformat='\%F', fixdates=c('none','year'), autodate=FALSE, autonum=FALSE, fracnn=0.3, considerNA=NULL, charfactor=FALSE) upData(object, \dots, subset, rename, drop, keep, labels, units, levels, force.single=TRUE, lowernames=FALSE, caplabels=FALSE, moveUnits=FALSE, charfactor=FALSE, print=TRUE, html=FALSE) dataframeReduce(data, fracmiss=1, maxlevels=NULL, minprev=0, print=TRUE) } \arguments{ \item{obj}{a data frame or list} \item{object}{a data frame or list} \item{data}{a data frame} \item{force.single}{ By default, double precision variables are converted to single precision (in S-Plus only) unless \code{force.single=FALSE}. \code{force.single=TRUE} will also convert vectors having only integer values to have a storage mode of integer, in R or S-Plus. } \item{force.numeric}{ Sometimes importing will cause a numeric variable to be changed to a factor vector. By default, \code{cleanup.import} will check each factor variable to see if the levels contain only numeric values and \code{""}. In that case, the variable will be converted to numeric, with \code{""} converted to NA. Set \code{force.numeric=FALSE} to prevent this behavior. } \item{rmnames}{ set to `F' to not have `cleanup.import' remove `names' or `.Names' attributes from variables } \item{labels}{ a character vector the same length as the number of variables in \code{obj}. These character values are taken to be variable labels in the same order of variables in \code{obj}. For \code{upData}, \code{labels} is a named list or named vector with variables in no specific order. } \item{lowernames}{ set this to \code{TRUE} to change variable names to lower case. \code{upData} does this before applying any other changes, so variable names given inside arguments to \code{upData} need to be lower case if \code{lowernames==TRUE}. } \item{big}{ a value such that values larger than this in absolute value are set to missing by \code{cleanup.import} } \item{sasdict}{ the name of a data frame containing a raw imported SAS PROC CONTENTS CNTLOUT= dataset. This is used to define variable names and to add attributes to the new data frame specifying the original SAS dataset name and label. } \item{print}{ set to \code{TRUE} or \code{FALSE} to force or prevent printing of the current variable number being processed. By default, such messages are printed if the product of the number of variables and number of observations in \code{obj} exceeds 500,000. For \code{dataframeReduce} set \code{print} to \code{FALSE} to suppress printing information about dropped or modified variables. Similar for \code{upData}.} \item{datevars}{character vector of names (after \code{lowernames} is applied) of variables to consider as a factor or character vector containing dates in a format matching \code{dateformat}. The default is \code{"\%F"} which uses the yyyy-mm-dd format.} \item{datetimevars}{character vector of names (after \code{lowernames} is applied) of variables to consider to be date-time variables, with date formats as described under \code{datevars} followed by a space followed by time in hh:mm:ss format. \code{chron} is used to store date-time variables. If all times in the variable are 00:00:00 the variable will be converted to an ordinary date variable.} \item{dateformat}{for \code{cleanup.import} is the input format (see \code{\link{strptime}})} \item{fixdates}{for any of the variables listed in \code{datevars} that have a \code{dateformat} that \code{cleanup.import} understands, specifying \code{fixdates} allows corrections of certain formatting inconsistencies before the fields are attempted to be converted to dates (the default is to assume that the \code{dateformat} is followed for all observation for \code{datevars}). Currently \code{fixdates='year'} is implemented, which will cause 2-digit or 4-digit years to be shifted to the alternate number of digits when \code{dateform} is the default \code{"\%F"} or is \code{"\%y-\%m-\%d"}, \code{"\%m/\%d/\%y"}, or \code{"\%m/\%d/\%Y"}. Two-digits years are padded with \code{20} on the left. Set \code{dateformat} to the desired format, not the exceptional format. } \item{autodate}{set to \code{TRUE} to have \code{cleanup.import} determine and automatically handle factor or character vectors that mainly contain dates of the form YYYY-mm-dd, mm/dd/YYYY, YYYY, or mm/YYYY, where the later two are imputed to, respectively, July 3 and the 15th of the month. Takes effect when the fraction of non-dates (of non-missing values) is less than \code{fracnn} to allow for some free text such as \code{"unknown"}. Attributes \code{special.miss} and \code{imputed} are created for the vector so that \code{describe()} will inform the user. Illegal values are converted to \code{NA}s and stored in the \code{special.miss} attribute.} \item{autonum}{set to \code{TRUE} to have \code{cleanup.import} examine (after \code{autodate}) character and factor variables to see if they are legal numerics exact for at most a fraction of \code{fracnn} of non-missing non-numeric values. Qualifying variables are converted to numeric, and illegal values set to \code{NA} and stored in the \code{special.miss} attribute to enhance \code{describe} output.} \item{fracnn}{see \code{autodate} and \code{autonum}} \item{considerNA}{for \code{autodate} and \code{autonum}, considers character values in the vector \code{considerNA} to be the same as \code{NA}. Leading and trailing white space and upper/lower case are ignored.} \item{charfactor}{set to \code{TRUE} to change character variables to factors if they have fewer than n/2 unique values. Null strings and blanks are converted to \code{NA}s.} \item{\dots}{ for \code{upData}, one or more expressions of the form \code{variable=expression}, to derive new variables or change old ones. } \item{subset}{an expression that evaluates to a logical vector specifying which rows of \code{object} should be retained. The expressions should use the original variable names, i.e., before any variables are renamed but after \code{lowernames} takes effect.} \item{rename}{ list or named vector specifying old and new names for variables. Variables are renamed before any other operations are done. For example, to rename variables \code{age} and \code{sex} to respectively \code{Age} and \code{gender}, specify \code{rename=list(age="Age", sex="gender")} or \code{rename=c(age=\dots)}. } \item{drop}{a vector of variable names to remove from the data frame} \item{keep}{a vector of variable names to keep, with all other variables dropped} \item{units}{ a named vector or list defining \code{"units"} attributes of variables, in no specific order } \item{levels}{ a named list defining \code{"levels"} attributes for factor variables, in no specific order. The values in this list may be character vectors redefining \code{levels} (in order) or another list (see \code{merge.levels} if using S-Plus). } \item{caplabels}{ set to \code{TRUE} to capitalize the first letter of each word in each variable label } \item{moveUnits}{ set to \code{TRUE} to look for units of measurements in variable labels and move them to a \code{"units"} attribute. If an expression in a label is enclosed in parentheses or brackets it is assumed to be units if \code{moveUnits=TRUE}.} \item{html}{set to \code{TRUE} to print conversion information as html vertabim at 0.6 size. The user will need to put \code{results='asis'} in a \code{knitr} chunk header to properly render this output.} \item{fracmiss}{the maximum permissable proportion of \code{NA}s for a variable to be kept. Default is to keep all variables no matter how many \code{NA}s are present.} \item{maxlevels}{the maximum number of levels of a character or categorical or factor variable before the variable is dropped} \item{minprev}{the minimum proportion of non-missing observations in a category for a binary variable to be retained, and the minimum relative frequency of a category before it will be combined with other small categories} } \value{a new data frame} \author{ Frank Harrell, Vanderbilt University } \seealso{ \code{\link{sas.get}}, \code{\link{data.frame}}, \code{\link{describe}}, \code{\link{label}}, \code{\link{read.csv}}, \code{\link{strptime}}, \code{\link{POSIXct}},\code{\link{Date}} } \examples{ \dontrun{ dat <- read.table('myfile.asc') dat <- cleanup.import(dat) } dat <- data.frame(a=1:3, d=c('01/02/2004',' 1/3/04','')) cleanup.import(dat, datevars='d', dateformat='\%m/\%d/\%y', fixdates='year') dat <- data.frame(a=(1:3)/7, y=c('a','b1','b2'), z=1:3) dat2 <- upData(dat, x=x^2, x=x-5, m=x/10, rename=c(a='x'), drop='z', labels=c(x='X', y='test'), levels=list(y=list(a='a',b=c('b1','b2')))) dat2 describe(dat2) dat <- dat2 # copy to original name and delete dat2 if OK rm(dat2) dat3 <- upData(dat, X=X^2, subset = x < (3/7)^2 - 5, rename=c(x='X')) # Remove hard to analyze variables from a redundancy analysis of all # variables in the data frame d <- dataframeReduce(dat, fracmiss=.1, minprev=.05, maxlevels=5) # Could run redun(~., data=d) at this point or include dataframeReduce # arguments in the call to redun # If you import a SAS dataset created by PROC CONTENTS CNTLOUT=x.datadict, # the LABELs from this dataset can be added to the data. Let's also # convert names to lower case for the main data file \dontrun{ mydata2 <- cleanup.import(mydata2, lowernames=TRUE, sasdict=datadict) } } \keyword{data} \keyword{manip} Hmisc/man/rMultinom.Rd0000644000176200001440000000166512243661443014362 0ustar liggesusers\name{rMultinom} \alias{rMultinom} \title{Generate Multinomial Random Variables with Varying Probabilities} \description{ Given a matrix of multinomial probabilities where rows correspond to observations and columns to categories (and each row sums to 1), generates a matrix with the same number of rows as has \code{probs} and with \code{m} columns. The columns represent multinomial cell numbers, and within a row the columns are all samples from the same multinomial distribution. The code is a modification of that in the \code{impute.polyreg} function in the \code{MICE} package. } \usage{ rMultinom(probs, m) } \arguments{ \item{probs}{matrix of probabilities} \item{m}{number of samples for each row of \code{probs}} } \value{ an integer matrix having \code{m} columns } \seealso{\code{\link{rbinom}}} \examples{ set.seed(1) w <- rMultinom(rbind(c(.1,.2,.3,.4),c(.4,.3,.2,.1)),200) t(apply(w, 1, table)/200) } \keyword{distribution} Hmisc/man/find.matches.Rd0000644000176200001440000002207313714234051014726 0ustar liggesusers\name{find.matches} \alias{find.matches} \alias{summary.find.matches} \alias{print.find.matches} \alias{matchCases} \title{ Find Close Matches } \description{ Compares each row in \code{x} against all the rows in \code{y}, finding rows in \code{y} with all columns within a tolerance of the values a given row of \code{x}. The default tolerance \code{tol} is zero, i.e., an exact match is required on all columns. For qualifying matches, a distance measure is computed. This is the sum of squares of differences between \code{x} and \code{y} after scaling the columns. The default scaling values are \code{tol}, and for columns with \code{tol=1} the scale values are set to 1.0 (since they are ignored anyway). Matches (up to \code{maxmatch} of them) are stored and listed in order of increasing distance. \cr The \code{summary} method prints a frequency distribution of the number of matches per observation in \code{x}, the median of the minimum distances for all matches per \code{x}, as a function of the number of matches, and the frequency of selection of duplicate observations as those having the smallest distance. The \code{print} method prints the entire \code{matches} and \code{distance} components of the result from \code{find.matches}. \cr \code{matchCases} finds all controls that match cases on a single variable \code{x} within a tolerance of \code{tol}. This is intended for prospective cohort studies that use matching for confounder adjustment (even though regression models usually work better). } \usage{ find.matches(x, y, tol=rep(0, ncol(y)), scale=tol, maxmatch=10) \method{summary}{find.matches}(object, \dots) \method{print}{find.matches}(x, digits, \dots) matchCases(xcase, ycase, idcase=names(ycase), xcontrol, ycontrol, idcontrol=names(ycontrol), tol=NULL, maxobs=max(length(ycase),length(ycontrol))*10, maxmatch=20, which=c('closest','random')) } \arguments{ \item{x}{ a numeric matrix or the result of \code{find.matches} } \item{y}{ a numeric matrix with same number of columns as \code{x} } \item{xcase}{ } \item{xcontrol}{ vectors, not necessarily of the same length, specifying a numeric variable used to match cases and control } \item{ycase}{ } \item{ycontrol}{ vectors or matrices, not necessarily having the same number of rows, specifying a variable to carry along from cases and matching controls. If you instead want to carry along rows from a data frame, let \code{ycase} and \code{ycontrol} be non-overlapping integer subscripts of the donor data frame. } \item{tol}{ a vector of tolerances with number of elements the same as the number of columns of \code{y}, for \code{find.matches}. For \code{matchCases} is a scalar tolerance. } \item{scale}{ a vector of scaling constants with number of elements the same as the number of columns of \code{y}. } \item{maxmatch}{ maximum number of matches to allow. For \code{matchCases}, maximum number of controls to match with a case (default is 20). If more than \code{maxmatch} matching controls are available, a random sample without replacement of \code{maxmatch} controls is used (if \code{which="random"}). } \item{object}{an object created by \code{find.matches}} \item{digits}{ number of digits to use in printing distances } \item{idcase}{ } \item{idcontrol}{ vectors the same length as \code{xcase} and \code{xcontrol} respectively, specifying the id of cases and controls. Defaults are integers specifying original element positions within each of cases and controls. } \item{maxobs}{ maximum number of cases and all matching controls combined (maximum dimension of data frame resulting from \code{matchControls}). Default is ten times the maximum of the number of cases and number of controls. \code{maxobs} is used to allocate space for the resulting data frame. } \item{which}{ set to \code{"closest"} (the default) to match cases with up to \code{maxmatch} controls that most closely match on \code{x}. Set \code{which="random"} to use randomly chosen controls. In either case, only those controls within \code{tol} on \code{x} are allowed to be used. } \item{\dots}{unused} } \value{ \code{find.matches} returns a list of class \code{find.matches} with elements \code{matches} and \code{distance}. Both elements are matrices with the number of rows equal to the number of rows in \code{x}, and with \code{k} columns, where \code{k} is the maximum number of matches (\code{<= maxmatch}) that occurred. The elements of \code{matches} are row identifiers of \code{y} that match, with zeros if fewer than \code{maxmatch} matches are found (blanks if \code{y} had row names). \code{matchCases} returns a data frame with variables \code{idcase} (id of case currently being matched), \code{type} (factor variable with levels \code{"case"} and \code{"control"}), \code{id} (id of case if case row, or id of matching case), and \code{y}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Ming K, Rosenbaum PR (2001): A note on optimal matching with variable controls using the assignment algorithm. J Comp Graph Stat 10:455--463. Cepeda MS, Boston R, Farrar JT, Strom BL (2003): Optimal matching with a variable number of controls vs. a fixed number of controls for a cohort study: trade-offs. J Clin Epidemiology 56:230-237. Note: These papers were not used for the functions here but probably should have been. } \seealso{ \code{\link{scale}}, \code{\link{apply}} } \examples{ y <- rbind(c(.1, .2),c(.11, .22), c(.3, .4), c(.31, .41), c(.32, 5)) x <- rbind(c(.09,.21), c(.29,.39)) y x w <- find.matches(x, y, maxmatch=5, tol=c(.05,.05)) set.seed(111) # so can replicate results x <- matrix(runif(500), ncol=2) y <- matrix(runif(2000), ncol=2) w <- find.matches(x, y, maxmatch=5, tol=c(.02,.03)) w$matches[1:5,] w$distance[1:5,] # Find first x with 3 or more y-matches num.match <- apply(w$matches, 1, function(x)sum(x > 0)) j <- ((1:length(num.match))[num.match > 2])[1] x[j,] y[w$matches[j,],] summary(w) # For many applications would do something like this: # attach(df1) # x <- cbind(age, sex) # Just do as.matrix(df1) if df1 has no factor objects # attach(df2) # y <- cbind(age, sex) # mat <- find.matches(x, y, tol=c(5,0)) # exact match on sex, 5y on age # Demonstrate matchCases xcase <- c(1,3,5,12) xcontrol <- 1:6 idcase <- c('A','B','C','D') idcontrol <- c('a','b','c','d','e','f') ycase <- c(11,33,55,122) ycontrol <- c(11,22,33,44,55,66) matchCases(xcase, ycase, idcase, xcontrol, ycontrol, idcontrol, tol=1) # If y is a binary response variable, the following code # will produce a Mantel-Haenszel summary odds ratio that # utilizes the matching. # Standard variance formula will not work here because # a control will match more than one case # WARNING: The M-H procedure exemplified here is suspect # because of the small strata and widely varying number # of controls per case. x <- c(1, 2, 3, 3, 3, 6, 7, 12, 1, 1:7) y <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1) case <- c(rep(TRUE, 8), rep(FALSE, 8)) id <- 1:length(x) m <- matchCases(x[case], y[case], id[case], x[!case], y[!case], id[!case], tol=1) iscase <- m$type=='case' # Note: the first tapply on insures that event indicators are # sorted by case id. The second actually does something. event.case <- tapply(m$y[iscase], m$idcase[iscase], sum) event.control <- tapply(m$y[!iscase], m$idcase[!iscase], sum) n.control <- tapply(!iscase, m$idcase, sum) n <- tapply(m$y, m$idcase, length) or <- sum(event.case * (n.control - event.control) / n) / sum(event.control * (1 - event.case) / n) or # Bootstrap this estimator by sampling with replacement from # subjects. Assumes id is unique when combine cases+controls # (id was constructed this way above). The following algorithms # puts all sampled controls back with the cases to whom they were # originally matched. ids <- unique(m$id) idgroups <- split(1:nrow(m), m$id) B <- 50 # in practice use many more ors <- numeric(B) # Function to order w by ids, leaving unassigned elements zero align <- function(ids, w) { z <- structure(rep(0, length(ids)), names=ids) z[names(w)] <- w z } for(i in 1:B) { j <- sample(ids, replace=TRUE) obs <- unlist(idgroups[j]) u <- m[obs,] iscase <- u$type=='case' n.case <- align(ids, tapply(u$type, u$idcase, function(v)sum(v=='case'))) n.control <- align(ids, tapply(u$type, u$idcase, function(v)sum(v=='control'))) event.case <- align(ids, tapply(u$y[iscase], u$idcase[iscase], sum)) event.control <- align(ids, tapply(u$y[!iscase], u$idcase[!iscase], sum)) n <- n.case + n.control # Remove sets having 0 cases or 0 controls in resample s <- n.case > 0 & n.control > 0 denom <- sum(event.control[s] * (n.case[s] - event.case[s]) / n[s]) or <- if(denom==0) NA else sum(event.case[s] * (n.control[s] - event.control[s]) / n[s]) / denom ors[i] <- or } describe(ors) } \keyword{math} \keyword{multivariate} \keyword{htest} \concept{bootstrap} \concept{matching} \concept{epidemiology} \concept{case-control} Hmisc/man/csv.get.Rd0000644000176200001440000001105612327102642013732 0ustar liggesusers\name{csv.get} \alias{csv.get} \title{Read Comma-Separated Text Data Files} \description{ Read comma-separated text data files, allowing optional translation to lower case for variable names after making them valid S names. There is a facility for reading long variable labels as one of the rows. If labels are not specified and a final variable name is not the same as that in the header, the original variable name is saved as a variable label. Uses \code{read.csv} if the \code{data.table} package is not in effect, otherwise calls \code{fread}. } \usage{ csv.get(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL, dateformat='\%F', fixdates=c('none','year'), comment.char="", autodates=TRUE, allow=NULL, charfactor=FALSE, sep=',', skip=0, vnames=NULL, labels=NULL, \dots) } \arguments{ \item{file}{the file name for import.} \item{lowernames}{set this to \code{TRUE} to change variable names to lower case.} \item{datevars}{character vector of names (after \code{lowernames} is applied) of variables to consider as a factor or character vector containing dates in a format matching \code{dateformat}. The default is \code{"\%F"} which uses the yyyy-mm-dd format.} \item{datetimevars}{character vector of names (after \code{lowernames} is applied) of variables to consider to be date-time variables, with date formats as described under \code{datevars} followed by a space followed by time in hh:mm:ss format. \code{chron} is used to store such variables. If all times in the variable are 00:00:00 the variable will be converted to an ordinary date variable.} \item{dateformat}{for \code{cleanup.import} is the input format (see \code{\link{strptime}})} \item{fixdates}{for any of the variables listed in \code{datevars} that have a \code{dateformat} that \code{cleanup.import} understands, specifying \code{fixdates} allows corrections of certain formatting inconsistencies before the fields are attempted to be converted to dates (the default is to assume that the \code{dateformat} is followed for all observation for \code{datevars}). Currently \code{fixdates='year'} is implemented, which will cause 2-digit or 4-digit years to be shifted to the alternate number of digits when \code{dateform} is the default \code{"\%F"} or is \code{"\%y-\%m-\%d"}, \code{"\%m/\%d/\%y"}, or \code{"\%m/\%d/\%Y"}. Two-digits years are padded with \code{20} on the left. Set \code{dateformat} to the desired format, not the exceptional format.} \item{comment.char}{a character vector of length one containing a single character or an empty string. Use '""' to turn off the interpretation of comments altogether.} \item{autodates}{Set to true to allow function to guess at which variables are dates} \item{allow}{a vector of characters allowed by \R that should not be converted to periods in variable names. By default, underscores in variable names are converted to periods as with \R before version 1.9.} \item{charfactor}{set to \code{TRUE} to change character variables to factors if they have fewer than n/2 unique values. Blanks and null strings are converted to \code{NA}s.} \item{sep}{field separator, defaults to comma} \item{skip}{number of records to skip before data start. Required if \code{vnames} or \code{labels} is given.} \item{vnames}{number of row containing variable names, default is one} \item{labels}{number of row containing variable labels, default is no labels} \item{\dots}{arguments to pass to \code{read.csv} other than \code{skip} and \code{sep}.} } \details{ \code{csv.get} reads comma-separated text data files, allowing optional translation to lower case for variable names after making them valid S names. Original possibly non-legal names are taken to be variable labels if \code{labels} is not specified. Character or factor variables containing dates can be converted to date variables. \code{cleanup.import} is invoked to finish the job. } \value{a new data frame.} \author{Frank Harrell, Vanderbilt University} \seealso{ \code{\link{sas.get}}, \code{\link{data.frame}}, \code{\link{cleanup.import}}, \code{\link{read.csv}}, \code{\link{strptime}}, \code{\link{POSIXct}}, \code{\link{Date}}, \code{\link[data.table]{fread}} } \examples{ \dontrun{ dat <- csv.get('myfile.csv') # Read a csv file with junk in the first row, variable names in the # second, long variable labels in the third, and junk in the 4th row dat <- csv.get('myfile.csv', vnames=2, labels=3, skip=4) } } \keyword{manip} \keyword{IO} \keyword{file} Hmisc/man/getZip.Rd0000644000176200001440000000143314005007710013614 0ustar liggesusers\name{getZip} \alias{getZip} \title{Open a Zip File From a URL} \description{ Allows downloading and reading of a zip file containing one file } \usage{ getZip(url, password=NULL) } \arguments{ \item{url}{either a path to a local file or a valid URL.} \item{password}{required to decode password-protected zip files} } \value{ Returns a file O/I pipe. } \details{ Allows downloading and reading of zip file containing one file. The file may be password protected. If a password is needed then one will be requested unless given. Note: to make password-protected zip file z.zip, do zip -e z myfile } \seealso{ \code{\link{pipe}} } \examples{ \dontrun{ read.csv(getZip('http://test.com/z.zip')) } } \author{Frank E. Harrell} \keyword{file} \keyword{IO} \concept{compressed file} Hmisc/man/cnvrt.coords.Rd0000755000176200001440000001214512243661443015016 0ustar liggesusers\name{cnvrt.coords} \alias{cnvrt.coords} %- Also NEED an '\alias' for EACH other topic documented here. \title{Convert between the 5 different coordinate sytems on a graphical device} \description{ Takes a set of coordinates in any of the 5 coordinate systems (usr, plt, fig, dev, or tdev) and returns the same points in all 5 coordinate systems. } \usage{ cnvrt.coords(x, y = NULL, input = c("usr", "plt", "fig", "dev","tdev")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{Vector, Matrix, or list of x coordinates (or x and y coordinates), NA's allowed. } \item{y}{y coordinates (if \code{x} is a vector), NA's allowed. } \item{input}{Character scalar indicating the coordinate system of the input points. } } \details{ Every plot has 5 coordinate systems: usr (User): the coordinate system of the data, this is shown by the tick marks and axis labels. plt (Plot): Plot area, coordinates range from 0 to 1 with 0 corresponding to the x and y axes and 1 corresponding to the top and right of the plot area. Margins of the plot correspond to plot coordinates less than 0 or greater than 1. fig (Figure): Figure area, coordinates range from 0 to 1 with 0 corresponding to the bottom and left edges of the figure (including margins, label areas) and 1 corresponds to the top and right edges. fig and dev coordinates will be identical if there is only 1 figure area on the device (layout, mfrow, or mfcol has not been used). dev (Device): Device area, coordinates range from 0 to 1 with 0 corresponding to the bottom and left of the device region within the outer margins and 1 is the top and right of the region withing the outer margins. If the outer margins are all set to 0 then tdev and dev should be identical. tdev (Total Device): Total Device area, coordinates range from 0 to 1 with 0 corresponding to the bottom and left edges of the device (piece of paper, window on screen) and 1 corresponds to the top and right edges. } \value{ A list with 5 components, each component is a list with vectors named x and y. The 5 sublists are: \item{usr}{The coordinates of the input points in usr (User) coordinates.} \item{plt}{The coordinates of the input points in plt (Plot) coordinates.} \item{fig}{The coordinates of the input points in fig (Figure) coordinates.} \item{dev}{The coordinates of the input points in dev (Device) coordinates.} \item{tdev}{The coordinates of the input points in tdev (Total Device) coordinates. } } %\references{ ~put references to the literature/web site here ~ } \author{Greg Snow \email{greg.snow@imail.org}} \note{ You must provide both x and y, but one of them may be \code{NA}. This function is becoming depricated with the new functions \code{grconvertX} and \code{grconvertY} in R version 2.7.0 and beyond. These new functions use the correct coordinate system names and have more coordinate systems available, you should start using them instead. } % ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{par}} specifically 'usr','plt', and 'fig'. Also 'xpd' for plotting outside of the plotting region and 'mfrow' and 'mfcol' for multi figure plotting. \code{\link{subplot}}, \code{grconvertX} and \code{grconvertY} in R2.7.0 and later} \examples{ old.par <- par(no.readonly=TRUE) par(mfrow=c(2,2),xpd=NA) # generate some sample data tmp.x <- rnorm(25, 10, 2) tmp.y <- rnorm(25, 50, 10) tmp.z <- rnorm(25, 0, 1) plot( tmp.x, tmp.y) # draw a diagonal line across the plot area tmp1 <- cnvrt.coords( c(0,1), c(0,1), input='plt' ) lines(tmp1$usr, col='blue') # draw a diagonal line accross figure region tmp2 <- cnvrt.coords( c(0,1), c(1,0), input='fig') lines(tmp2$usr, col='red') # save coordinate of point 1 and y value near top of plot for future plots tmp.point1 <- cnvrt.coords(tmp.x[1], tmp.y[1]) tmp.range1 <- cnvrt.coords(NA, 0.98, input='plt') # make a second plot and draw a line linking point 1 in each plot plot(tmp.y, tmp.z) tmp.point2 <- cnvrt.coords( tmp.point1$dev, input='dev' ) arrows( tmp.y[1], tmp.z[1], tmp.point2$usr$x, tmp.point2$usr$y, col='green') # draw another plot and add rectangle showing same range in 2 plots plot(tmp.x, tmp.z) tmp.range2 <- cnvrt.coords(NA, 0.02, input='plt') tmp.range3 <- cnvrt.coords(NA, tmp.range1$dev$y, input='dev') rect( 9, tmp.range2$usr$y, 11, tmp.range3$usr$y, border='yellow') # put a label just to the right of the plot and # near the top of the figure region. text( cnvrt.coords(1.05, NA, input='plt')$usr$x, cnvrt.coords(NA, 0.75, input='fig')$usr$y, "Label", adj=0) par(mfrow=c(1,1)) ## create a subplot within another plot (see also subplot) plot(1:10, 1:10) tmp <- cnvrt.coords( c( 1, 4, 6, 9), c(6, 9, 1, 4) ) par(plt = c(tmp$dev$x[1:2], tmp$dev$y[1:2]), new=TRUE) hist(rnorm(100)) par(fig = c(tmp$dev$x[3:4], tmp$dev$y[3:4]), new=TRUE) hist(rnorm(100)) par(old.par) } \keyword{ dplot }% at least one, from doc/KEYWORDS \keyword{ aplot }% __ONLY ONE__ keyword per line Hmisc/man/ggfreqScatter.Rd0000644000176200001440000001220214015203243015147 0ustar liggesusers\name{ggfreqScatter} \alias{ggfreqScatter} \title{Frequency Scatterplot} \description{ Uses \code{ggplot2} to plot a scatterplot or dot-like chart for the case where there is a very large number of overlapping values. This works for continuous and categorical \code{x} and \code{y}. For continuous variables it serves the same purpose as hexagonal binning. Counts for overlapping points are grouped into quantile groups and level of transparency and rainbow colors are used to provide count information. Instead, you can specify \code{stick=TRUE} not use color but to encode cell frequencies with the height of a black line y-centered at the middle of the bins. Relative frequencies are not transformed, and the maximum cell frequency is shown in a caption. Every point with at least a frequency of one is depicted with a full-height light gray vertical line, scaled to the above overall maximum frequency. In this way to relative frequency is to proportion of these light gray lines that are black, and one can see points whose frequencies are too low to see the black lines. The result can also be passed to \code{ggplotly}. Actual cell frequencies are added to the hover text in that case using the \code{label} \code{ggplot2} aesthetic. } \usage{ ggfreqScatter(x, y, by=NULL, bins=50, g=10, cuts=NULL, xtrans = function(x) x, ytrans = function(y) y, xbreaks = pretty(x, 10), ybreaks = pretty(y, 10), xminor = NULL, yminor = NULL, xlab = as.character(substitute(x)), ylab = as.character(substitute(y)), fcolors = viridis::viridis(10), nsize=FALSE, stick=FALSE, html=FALSE, prfreq=FALSE, \dots) } \arguments{ \item{x}{x-variable} \item{y}{y-variable} \item{by}{an optional vector used to make separate plots for each distinct value using \code{facet_wrap()}} \item{bins}{for continuous \code{x} or \code{y} is the number of bins to create by rounding. Ignored for categorical variables. If a 2-vector, the first element corresponds to \code{x} and the second to \code{y}.} \item{g}{number of quantile groups to make for frequency counts. Use \code{g=0} to use frequencies continuously for color coding. This is recommended only when using \code{plotly}.} \item{cuts}{instead of using \code{g}, specify \code{cuts} to provide the vector of cuts for categorizing frequencies for assignment to colors} \item{xtrans,ytrans}{functions specifying transformations to be made before binning and plotting} \item{xbreaks,ybreaks}{vectors of values to label on axis, on original scale} \item{xminor,yminor}{values at which to put minor tick marks, on original scale} \item{xlab,ylab}{axis labels. If not specified and variable has a \code{label}, thatu label will be used.} \item{fcolors}{\code{colors} argument to pass to \code{scale_color_gradientn} to color code frequencies. Use \code{fcolors=gray.colors(10, 0.75, 0)} to show gray scale, for example. Another good choice is \code{fcolors=hcl.colors(10, 'Blue-Red')}.} \item{nsize}{set to \code{TRUE} to not vary color or transparency but instead to size the symbols in relation to the number of points. Best with both \code{x} and \code{y} are discrete. \code{ggplot2} \code{size} is taken as the fourth root of the frequency. If there are 15 or unique frequencies all the unique frequencies are used, otherwise \code{g} quantile groups of frequencies are used.} \item{stick}{set to \code{TRUE} to not use colors but instead use varying-height black vertical lines to depict cell frequencies.} \item{html}{set to \code{TRUE} to use html in axis labels instead of plotmath} \item{prfreq}{set to \code{TRUE} to print the frequency distributions of the binned coordinate frequencies} \item{\dots}{arguments to pass to \code{geom_point} such as \code{shape} and \code{size}} } \value{a \code{ggplot} object} \author{Frank Harrell} \seealso{\code{\link[Hmisc]{cut2}}} \examples{ set.seed(1) x <- rnorm(1000) y <- rnorm(1000) count <- sample(1:100, 1000, TRUE) x <- rep(x, count) y <- rep(y, count) # color=alpha=NULL below makes loess smooth over all points g <- ggfreqScatter(x, y) + # might add g=0 if using plotly geom_smooth(aes(color=NULL, alpha=NULL), se=FALSE) + ggtitle("Using Deciles of Frequency Counts, 2500 Bins") g # plotly::ggplotly(g, tooltip='label') # use plotly, hover text = freq. only # Plotly makes it somewhat interactive, with hover text tooltips # Instead use varying-height sticks to depict frequencies ggfreqScatter(x, y, stick=TRUE) + labs(subtitle='Relative height of black lines to gray lines is proportional to cell frequency. Note that points with even tiny frequency are visable (gray line with no visible black line).') # Try with x categorical x1 <- sample(c('cat', 'dog', 'giraffe'), length(x), TRUE) ggfreqScatter(x1, y) # Try with y categorical y1 <- sample(LETTERS[1:10], length(x), TRUE) ggfreqScatter(x, y1) # Both categorical, larger point symbols, box instead of circle ggfreqScatter(x1, y1, shape=15, size=7) # Vary box size instead ggfreqScatter(x1, y1, nsize=TRUE, shape=15) } \keyword{hplot} \concept{grouping} \concept{categorization} \concept{discretization} Hmisc/man/bpplot.Rd0000644000176200001440000000440313036501416013657 0ustar liggesusers\name{bpplot} \alias{bpplot} \title{ Box-percentile plots } \description{ Producess side-by-side box-percentile plots from several vectors or a list of vectors. } \usage{ bpplot(\dots, name=TRUE, main="Box-Percentile Plot", xlab="", ylab="", srtx=0, plotopts=NULL) } \arguments{ \item{...}{ vectors or lists containing numeric components (e.g., the output of \code{split}). } \item{name}{ character vector of names for the groups. Default is \code{TRUE} to put names on the x-axis. Such names are taken from the data vectors or the \code{names} attribute of the first argument if it is a list. Set \code{name} to \code{FALSE} to suppress names. If a character vector is supplied the names in the vector are used to label the groups. } \item{main}{ main title for the plot. } \item{xlab}{ x axis label. } \item{ylab}{ y axis label. } \item{srtx}{rotation angle for x-axis labels. Default is zero.} \item{plotopts}{a list of other parameters to send to \code{plot}} } \value{ There are no returned values } \section{Side Effects}{ A plot is created on the current graphics device. } \section{BACKGROUND}{ Box-percentile plots are similiar to boxplots, except box-percentile plots supply more information about the univariate distributions. At any height the width of the irregular "box" is proportional to the percentile of that height, up to the 50th percentile, and above the 50th percentile the width is proportional to 100 minus the percentile. Thus, the width at any given height is proportional to the percent of observations that are more extreme in that direction. As in boxplots, the median, 25th and 75th percentiles are marked with line segments across the box. } \author{ Jeffrey Banfield \cr \email{umsfjban@bill.oscs.montana.edu} \cr Modified by F. Harrell 30Jun97 } \references{ Esty WW, Banfield J: The box-percentile plot. J Statistical Software 8 No. 17, 2003. } \seealso{ \code{\link{panel.bpplot}}, \code{\link{boxplot}}, \code{\link{Ecdf}}, \code{\link[lattice:xyplot]{bwplot}} } \examples{ set.seed(1) x1 <- rnorm(500) x2 <- runif(500, -2, 2) x3 <- abs(rnorm(500))-2 bpplot(x1, x2, x3) g <- sample(1:2, 500, replace=TRUE) bpplot(split(x2, g), name=c('Group 1','Group 2')) rm(x1,x2,x3,g) } \keyword{nonparametric} \keyword{hplot} % Converted by Sd2Rd version 1.21. Hmisc/man/nin.Rd0000644000176200001440000000132412243661443013150 0ustar liggesusers\name{\%nin\%} \alias{\%nin\%} \title{ Find Matching (or Non-Matching) Elements } \description{ \code{\%nin\%} is a binary operator, which returns a logical vector indicating if there is a match or not for its left operand. A true vector element indicates no match in left operand, false indicates a match. } \usage{ x \%nin\% table } \arguments{ \item{x}{ a vector (numeric, character, factor) } \item{table}{ a vector (numeric, character, factor), matching the mode of \code{x} } } \value{ vector of logical values with length equal to length of \code{x}. } \seealso{ \code{\link{match}} \code{\link{\%in\%}} } \examples{ c('a','b','c') \%nin\% c('a','b') } \keyword{manip} \keyword{character} Hmisc/man/symbol.freq.Rd0000644000176200001440000000352312243661443014630 0ustar liggesusers\name{symbol.freq} \alias{symbol.freq} \title{Graphic Representation of a Frequency Table} \description{ This function can be used to represent contingency tables graphically. Frequency counts are represented as the heights of "thermometers" by default; you can also specify \code{symbol='circle'} to the function. There is an option to include marginal frequencies, which are plotted on a halved scale so as to not overwhelm the plot. If you do not ask for marginal frequencies to be plotted using \code{marginals=T}, \code{symbol.freq} will ask you to click the mouse where a reference symbol is to be drawn to assist in reading the scale of the frequencies. \code{label} attributes, if present, are used for x- and y-axis labels. Otherwise, names of calling arguments are used. } \usage{ symbol.freq(x, y, symbol = c("thermometer", "circle"), marginals = FALSE, orig.scale = FALSE, inches = 0.25, width = 0.15, subset, srtx = 0, ...) } \arguments{ \item{x}{first variable to cross-classify} \item{y}{second variable} \item{symbol}{specify \code{"thermometer"} (the default) or \code{"circle"}} \item{marginals}{set to \code{TRUE} to add marginal frequencies (scaled by half) to the plot} \item{orig.scale}{set to \code{TRUE} when the first two arguments are numeric variables; this uses their original values for x and y coordinates)} \item{inches}{see \code{\link{symbols}}} \item{width}{see \code{thermometers} option in \code{symbols}} \item{subset}{the usual subsetting vector} \item{srtx}{rotation angle for x-axis labels} \item{\dots}{other arguments to pass to \code{symbols}} } \author{Frank Harrell} \seealso{\code{\link{symbols}}} \examples{ \dontrun{ getHdata(titanic) attach(titanic) age.tertile <- cut2(titanic$age, g=3) symbol.freq(age.tertile, pclass, marginals=T, srtx=45) detach(2) }} \keyword{hplot} Hmisc/man/summaryRc.Rd0000644000176200001440000001302213714234042014337 0ustar liggesusers\name{summaryRc} \alias{summaryRc} \title{Graphical Summarization of Continuous Variables Against a Response} \description{ \code{summaryRc} is a continuous version of \code{\link{summary.formula}} with \code{method='response'}. It uses the \code{\link{plsmo}} function to compute the possibly stratified \code{\link{lowess}} nonparametric regression estimates, and plots them along with the data density, with selected quantiles of the overall distribution (over strata) of each \code{x} shown as arrows on top of the graph. All the \code{x} variables must be numeric and continuous or nearly continuous. } \usage{ summaryRc(formula, data=NULL, subset=NULL, na.action=NULL, fun = function(x) x, na.rm = TRUE, ylab=NULL, ylim=NULL, xlim=NULL, nloc=NULL, datadensity=NULL, quant = c(0.05, 0.1, 0.25, 0.5, 0.75, 0.90, 0.95), quantloc=c('top','bottom'), cex.quant=.6, srt.quant=0, bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'), height.bpplot=0.08, trim=NULL, test = FALSE, vnames = c('labels', 'names'), \dots) } \arguments{ \item{formula}{ An \R formula with additive effects. The \code{formula} may contain one or more invocations of the \code{stratify} function whose arguments are defined below. This causes the entire analysis to be stratified by cross-classifications of the combined list of stratification factors. This stratification will be reflected as separate \code{lowess} curves.} \item{data}{ name or number of a data frame. Default is the current frame. } \item{subset}{ a logical vector or integer vector of subscripts used to specify the subset of data to use in the analysis. The default is to use all observations in the data frame. } \item{na.action}{ function for handling missing data in the input data. The default is a function defined here called \code{na.retain}, which keeps all observations for processing, with missing variables or not. } \item{fun}{ function for transforming \code{lowess} estimates. Default is the identity function.} \item{na.rm}{ \code{TRUE} (the default) to exclude \code{NA}s before passing data to \code{fun} to compute statistics, \code{FALSE} otherwise. } \item{ylab}{\code{y}-axis label. Default is label attribute of \code{y} variable, or its name.} \item{ylim}{\code{y}-axis limits. By default each graph is scaled on its own.} \item{xlim}{a list with elements named as the variable names appearing on the \code{x}-axis, with each element being a 2-vector specifying lower and upper limits. Any variable not appearing in the list will have its limits computed and possibly \code{trim}med.} \item{nloc}{location for sample size. Specify \code{nloc=FALSE} to suppress, or \code{nloc=list(x=,y=)} where \code{x,y} are relative coordinates in the data window. Default position is in the largest empty space.} \item{datadensity}{see \code{\link{plsmo}}. Defaults to \code{TRUE} if there is a \code{stratify} variable, \code{FALSE} otherwise.} \item{quant}{ vector of quantiles to use for summarizing the marginal distribution of each \code{x}. This must be numbers between 0 and 1 inclusive. Use \code{NULL} to omit quantiles. } \item{quantloc}{specify \code{quantloc='bottom'} to place at the bottom of each plot rather than the default} \item{cex.quant}{character size for writing which quantiles are represented. Set to \code{0} to suppress quantile labels.} \item{srt.quant}{angle for text for quantile labels} \item{bpplot}{if not \code{'none'} will draw extended box plot at location given by \code{bpplot}, and quantiles discussed above will be suppressed. Specifying \code{bpplot='top'} is the same as specifying \code{bpplot='top inside'}.} \item{height.bpplot}{height in inches of the horizontal extended box plot} \item{trim}{The default is to plot from the 10th smallest to the 10th largest \code{x} if the number of non-NAs exceeds 200, otherwise to use the entire range of \code{x}. Specify another quantile to use other limits, e.g., \code{trim=0.01} will use the first and last percentiles} \item{test}{ Set to \code{TRUE} to plot test statistics (not yet implemented). } \item{vnames}{ By default, plots are usually labeled with variable labels (see the \code{label} and \code{sas.get} functions). To use the shorter variable names, specify \code{vnames="names"}. } \item{...}{arguments passed to \code{\link{plsmo}}} } \value{no value is returned} \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \seealso{ \code{\link{plsmo}}, \code{\link{stratify}}, \code{\link{label}}, \code{\link{formula}}, \code{\link{panel.bpplot}} } \examples{ options(digits=3) set.seed(177) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) age <- rnorm(500, 50, 5) bp <- rnorm(500, 120, 7) units(age) <- 'Years'; units(bp) <- 'mmHg' label(bp) <- 'Systolic Blood Pressure' L <- .5*(sex == 'm') + 0.1 * (age - 50) y <- rbinom(500, 1, plogis(L)) par(mfrow=c(1,2)) summaryRc(y ~ age + bp) # For x limits use 1st and 99th percentiles to frame extended box plots summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01) summaryRc(y ~ age + bp + stratify(sex), label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) y2 <- rbinom(500, 1, plogis(L + .5)) Y <- cbind(y, y2) summaryRc(Y ~ age + bp + stratify(sex), label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) } \keyword{hplot} Hmisc/man/rm.boot.Rd0000644000176200001440000005555314275453214013763 0ustar liggesusers\name{rm.boot} \alias{rm.boot} \alias{plot.rm.boot} \title{ Bootstrap Repeated Measurements Model } \description{ For a dataset containing a time variable, a scalar response variable, and an optional subject identification variable, obtains least squares estimates of the coefficients of a restricted cubic spline function or a linear regression in time after adjusting for subject effects through the use of subject dummy variables. Then the fit is bootstrapped \code{B} times, either by treating time and subject ID as fixed (i.e., conditioning the analysis on them) or as random variables. For the former, the residuals from the original model fit are used as the basis of the bootstrap distribution. For the latter, samples are taken jointly from the time, subject ID, and response vectors to obtain unconditional distributions. If a subject \code{id} variable is given, the bootstrap sampling will be based on samples with replacement from subjects rather than from individual data points. In other words, either none or all of a given subject's data will appear in a bootstrap sample. This cluster sampling takes into account any correlation structure that might exist within subjects, so that confidence limits are corrected for within-subject correlation. Assuming that ordinary least squares estimates, which ignore the correlation structure, are consistent (which is almost always true) and efficient (which would not be true for certain correlation structures or for datasets in which the number of observation times vary greatly from subject to subject), the resulting analysis will be a robust, efficient repeated measures analysis for the one-sample problem. Predicted values of the fitted models are evaluated by default at a grid of 100 equally spaced time points ranging from the minimum to maximum observed time points. Predictions are for the average subject effect. Pointwise confidence intervals are optionally computed separately for each of the points on the time grid. However, simultaneous confidence regions that control the level of confidence for the entire regression curve lying within a band are often more appropriate, as they allow the analyst to draw conclusions about nuances in the mean time response profile that were not stated apriori. The method of \cite{Tibshirani (1997)} is used to easily obtain simultaneous confidence sets for the set of coefficients of the spline or linear regression function as well as the average intercept parameter (over subjects). Here one computes the objective criterion (here both the -2 log likelihood evaluated at the bootstrap estimate of beta but with respect to the original design matrix and response vector, and the sum of squared errors in predicting the original response vector) for the original fit as well as for all of the bootstrap fits. The confidence set of the regression coefficients is the set of all coefficients that are associated with objective function values that are less than or equal to say the 0.95 quantile of the vector of \eqn{\code{B} + 1} objective function values. For the coefficients satisfying this condition, predicted curves are computed at the time grid, and minima and maxima of these curves are computed separately at each time point toderive the final simultaneous confidence band. By default, the log likelihoods that are computed for obtaining the simultaneous confidence band assume independence within subject. This will cause problems unless such log likelihoods have very high rank correlation with the log likelihood allowing for dependence. To allow for correlation or to estimate the correlation function, see the \code{cor.pattern} argument below. } \usage{ rm.boot(time, y, id=seq(along=time), subset, plot.individual=FALSE, bootstrap.type=c('x fixed','x random'), nk=6, knots, B=500, smoother=supsmu, xlab, xlim, ylim=range(y), times=seq(min(time), max(time), length=100), absorb.subject.effects=FALSE, rho=0, cor.pattern=c('independent','estimate'), ncor=10000, \dots) \method{plot}{rm.boot}(x, obj2, conf.int=.95, xlab=x$xlab, ylab=x$ylab, xlim, ylim=x$ylim, individual.boot=FALSE, pointwise.band=FALSE, curves.in.simultaneous.band=FALSE, col.pointwise.band=2, objective=c('-2 log L','sse','dep -2 log L'), add=FALSE, ncurves, multi=FALSE, multi.method=c('color','density'), multi.conf =c(.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,.99), multi.density=c( -1,90,80,70,60,50,40,30,20,10, 7, 4), multi.col =c( 1, 8,20, 5, 2, 7,15,13,10,11, 9, 14), subtitles=TRUE, \dots) } \arguments{ \item{time}{ numeric time vector } \item{y}{ continuous numeric response vector of length the same as \code{time}. Subjects having multiple measurements have the measurements strung out. } \item{x}{ an object returned from \code{rm.boot} } \item{id}{ subject ID variable. If omitted, it is assumed that each time-response pair is measured on a different subject. } \item{subset}{ subset of observations to process if not all the data } \item{plot.individual}{ set to \code{TRUE} to plot nonparametrically smoothed time-response curves for each subject } \item{bootstrap.type}{ specifies whether to treat the time and subject ID variables as fixed or random } \item{nk}{ number of knots in the restricted cubic spline function fit. The number of knots may be 0 (denoting linear regression) or an integer greater than 2 in which k knots results in \eqn{k - 1} regression coefficients excluding the intercept. The default is 6 knots. } \item{knots}{ vector of knot locations. May be specified if \code{nk} is omitted. } \item{B}{ number of bootstrap repetitions. Default is 500. } \item{smoother}{ a smoothing function that is used if \code{plot.individual=TRUE}. Default is \code{\link{supsmu}}. } \item{xlab}{ label for x-axis. Default is \code{"units"} attribute of the original \code{time} variable, or \code{"Time"} if no such attribute was defined using the \code{units} function. } \item{xlim}{ specifies x-axis plotting limits. Default is to use range of times specified to \code{rm.boot}. } \item{ylim}{ for \code{rm.boot} this is a vector of y-axis limits used if \code{plot.individual=TRUE}. It is also passed along for later use by \code{plot.rm.boot}. For \code{plot.rm.boot}, \code{ylim} can be specified, to override the value stored in the object stored by \code{rm.boot}. The default is the actual range of \code{y} in the input data. } \item{times}{ a sequence of times at which to evaluated fitted values and confidence limits. Default is 100 equally spaced points in the observed range of \code{time}. } \item{absorb.subject.effects}{ If \code{TRUE}, adjusts the response vector \code{y} before re-sampling so that the subject-specific effects in the initial model fit are all zero. Then in re-sampling, subject effects are not used in the models. This will downplay one of the sources of variation. This option is used mainly for checking for consistency of results, as the re-sampling analyses are simpler when \code{absort.subject.effects=TRUE}. } \item{rho}{ The log-likelihood function that is used as the basis of simultaneous confidence bands assumes normality with independence within subject. To check the robustness of this assumption, if \code{rho} is not zero, the log-likelihood under multivariate normality within subject, with constant correlation \code{rho} between any two time points, is also computed. If the two log-likelihoods have the same ranks across re-samples, alllowing the correlation structure does not matter. The agreement in ranks is quantified using the Spearman rank correlation coefficient. The \code{\link{plot}} method allows the non-zero intra-subject correlation log-likelihood to be used in deriving the simultaneous confidence band. Note that this approach does assume homoscedasticity. } \item{cor.pattern}{ More generally than using an equal-correlation structure, you can specify a function of two time vectors that generates as many correlations as the length of these vectors. For example, \code{cor.pattern=function(time1,time2) 0.2^(abs(time1-time2)/10)} would specify a dampening serial correlation pattern. \code{cor.pattern} can also be a list containing vectors \code{x} (a vector of absolute time differences) and \code{y} (a corresponding vector of correlations). To estimate the correlation function as a function of absolute time differences within subjects, specify \code{cor.pattern="estimate"}. The products of all possible pairs of residuals (or at least up to \code{ncor} of them) within subjects will be related to the absolute time difference. The correlation function is estimated by computing the sample mean of the products of standardized residuals, stratified by absolute time difference. The correlation for a zero time difference is set to 1 regardless of the \code{\link{lowess}} estimate. NOTE: This approach fails in the presence of large subject effects; correcting for such effects removes too much of the correlation structure in the residuals. } \item{ncor}{ the maximum number of pairs of time values used in estimating the correlation function if \code{cor.pattern="estimate"} } \item{\dots}{ other arguments to pass to \code{smoother} if \code{plot.individual=TRUE} } \item{obj2}{ a second object created by \code{rm.boot} that can also be passed to \code{plot.rm.boot}. This is used for two-sample problems for which the time profiles are allowed to differ between the two groups. The bootstrapped predicted y values for the second fit are subtracted from the fitted values for the first fit so that the predicted mean response for group 1 minus the predicted mean response for group 2 is what is plotted. The confidence bands that are plotted are also for this difference. For the simultaneous confidence band, the objective criterion is taken to be the sum of the objective criteria (-2 log L or sum of squared errors) for the separate fits for the two groups. The \code{times} vectors must have been identical for both calls to \code{rm.boot}, although \code{NA}s can be inserted by the user of one or both of the time vectors in the \code{rm.boot} objects so as to suppress certain sections of the difference curve from being plotted. } \item{conf.int}{ the confidence level to use in constructing simultaneous, and optionally pointwise, bands. Default is 0.95. } \item{ylab}{ label for y-axis. Default is the \code{"label"} attribute of the original \code{y} variable, or \code{"y"} if no label was assigned to \code{y} (using the \code{label} function, for example). } \item{individual.boot}{ set to \code{TRUE} to plot the first 100 bootstrap regression fits } \item{pointwise.band}{ set to \code{TRUE} to draw a pointwise confidence band in addition to the simultaneous band } \item{curves.in.simultaneous.band}{ set to \code{TRUE} to draw all bootstrap regression fits that had a sum of squared errors (obtained by predicting the original \code{y} vector from the original \code{time} vector and \code{id} vector) that was less that or equal to the \code{conf.int} quantile of all bootstrapped models (plus the original model). This will show how the point by point max and min were computed to form the simultaneous confidence band. } \item{col.pointwise.band}{ color for the pointwise confidence band. Default is \samp{2}, which defaults to red for default Windows S-PLUS setups. } \item{objective}{ the default is to use the -2 times log of the Gaussian likelihood for computing the simultaneous confidence region. If neither \code{cor.pattern} nor \code{rho} was specified to \code{rm.boot}, the independent homoscedastic Gaussian likelihood is used. Otherwise the dependent homoscedastic likelihood is used according to the specified or estimated correlation pattern. Specify \code{objective="sse"} to instead use the sum of squared errors. } \item{add}{ set to \code{TRUE} to add curves to an existing plot. If you do this, titles and subtitles are omitted. } \item{ncurves}{ when using \code{individual.boot=TRUE} or \code{curves.in.simultaneous.band=TRUE}, you can plot a random sample of \code{ncurves} of the fitted curves instead of plotting up to \code{B} of them. } \item{multi}{ set to \code{TRUE} to draw multiple simultaneous confidence bands shaded with different colors. Confidence levels vary over the values in the \code{multi.conf} vector. } \item{multi.method}{ specifies the method of shading when \code{multi=TRUE}. Default is to use colors, with the default colors chosen so that when the graph is printed under S-Plus for Windows 4.0 to an HP LaserJet printer, the confidence regions are naturally ordered by darkness of gray-scale. Regions closer to the point estimates (i.e., the center) are darker. Specify \code{multi.method="density"} to instead use densities of lines drawn per inch in the confidence regions, with all regions drawn with the default color. The \code{\link{polygon}} function is used to shade the regions. } \item{multi.conf}{ vector of confidence levels, in ascending order. Default is to use 12 confidence levels ranging from 0.05 to 0.99. } \item{multi.density}{ vector of densities in lines per inch corresponding to \code{multi.conf}. As is the convention in the \code{\link{polygon}} function, a density of -1 indicates a solid region. } \item{multi.col}{ vector of colors corresponding to \code{multi.conf}. See \code{multi.method} for rationale. } \item{subtitles}{ set to \code{FALSE} to suppress drawing subtitles for the plot } } \value{ an object of class \code{rm.boot} is returned by \code{rm.boot}. The principal object stored in the returned object is a matrix of regression coefficients for the original fit and all of the bootstrap repetitions (object \code{Coef}), along with vectors of the corresponding -2 log likelihoods are sums of squared errors. The original fit object from \code{lm.fit.qr} is stored in \code{fit}. For this fit, a cell means model is used for the \code{id} effects. \code{plot.rm.boot} returns a list containing the vector of times used for plotting along with the overall fitted values, lower and upper simultaneous confidence limits, and optionally the pointwise confidence limits. } \details{ Observations having missing \code{time} or \code{y} are excluded from the analysis. As most repeated measurement studies consider the times as design points, the fixed covariable case is the default. Bootstrapping the residuals from the initial fit assumes that the model is correctly specified. Even if the covariables are fixed, doing an unconditional bootstrap is still appropriate, and for large sample sizes unconditional confidence intervals are only slightly wider than conditional ones. For moderate to small sample sizes, the \code{bootstrap.type="x random"} method can be fairly conservative. If not all subjects have the same number of observations (after deleting observations containing missing values) and if \code{bootstrap.type="x fixed"}, bootstrapped residual vectors may have a length m that is different from the number of original observations n. If \eqn{m > n} for a bootstrap repetition, the first n elements of the randomly drawn residuals are used. If \eqn{m < n}, the residual vector is appended with a random sample with replacement of length \eqn{n - m} from itself. A warning message is issued if this happens. If the number of time points per subject varies, the bootstrap results for \code{bootstrap.type="x fixed"} can still be invalid, as this method assumes that a vector (over subjects) of all residuals can be added to the original yhats, and varying number of points will cause mis-alignment. For \code{bootstrap.type="x random"} in the presence of significant subject effects, the analysis is approximate as the subjects used in any one bootstrap fit will not be the entire list of subjects. The average (over subjects used in the bootstrap sample) intercept is used from that bootstrap sample as a predictor of average subject effects in the overall sample. Once the bootstrap coefficient matrix is stored by \code{rm.boot}, \code{plot.rm.boot} can be run multiple times with different options (e.g, different confidence levels). See \code{\link[rms]{bootcov}} in the \pkg{rms} library for a general approach to handling repeated measurement data for ordinary linear models, binary and ordinal models, and survival models, using the unconditional bootstrap. \code{\link[rms]{bootcov}} does not handle bootstrapping residuals. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \references{ Feng Z, McLerran D, Grizzle J (1996): A comparison of statistical methods for clustered data analysis with Gaussian error. Stat in Med 15:1793--1806. Tibshirani R, Knight K (1997):Model search and inference by bootstrap "bumping". Technical Report, Department of Statistics, University of Toronto. \cr \url{http://statweb.stanford.edu/~tibs/}. Presented at the Joint Statistical Meetings, Chicago, August 1996. Efron B, Tibshirani R (1993): An Introduction to the Bootstrap. New York: Chapman and Hall. Diggle PJ, Verbyla AP (1998): Nonparametric estimation of covariance structure in logitudinal data. Biometrics 54:401--415. Chapman IM, Hartman ML, et al (1997): Effect of aging on the sensitivity of growth hormone secretion to insulin-like growth factor-I negative feedback. J Clin Endocrinol Metab 82:2996--3004. Li Y, Wang YG (2008): Smooth bootstrap methods for analysis of longitudinal data. Stat in Med 27:937-953. (potential improvements to cluster bootstrap; not implemented here) } \seealso{ \code{\link{rcspline.eval}}, \code{\link{lm}}, \code{\link{lowess}}, \code{\link{supsmu}}, \code{\link[rms]{bootcov}}, \code{\link{units}}, \code{\link{label}}, \code{\link{polygon}}, \code{\link{reShape}} } \examples{ # Generate multivariate normal responses with equal correlations (.7) # within subjects and no correlation between subjects # Simulate realizations from a piecewise linear population time-response # profile with large subject effects, and fit using a 6-knot spline # Estimate the correlation structure from the residuals, as a function # of the absolute time difference # Function to generate n p-variate normal variates with mean vector u and # covariance matrix S # Slight modification of function written by Bill Venables # See also the built-in function rmvnorm mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { Z <- matrix(rnorm(n * p), p, n) t(u + t(chol(S)) \%*\% Z) } n <- 20 # Number of subjects sub <- .5*(1:n) # Subject effects # Specify functional form for time trend and compute non-stochastic component times <- seq(0, 1, by=.1) g <- function(times) 5*pmax(abs(times-.5),.3) ey <- g(times) # Generate multivariate normal errors for 20 subjects at 11 times # Assume equal correlations of rho=.7, independent subjects nt <- length(times) rho <- .7 set.seed(19) errors <- mvrnorm(n, p=nt, S=diag(rep(1-rho,nt))+rho) # Note: first random number seed used gave rise to mean(errors)=0.24! # Add E[Y], error components, and subject effects y <- matrix(rep(ey,n), ncol=nt, byrow=TRUE) + errors + matrix(rep(sub,nt), ncol=nt) # String out data into long vectors for times, responses, and subject ID y <- as.vector(t(y)) times <- rep(times, n) id <- sort(rep(1:n, nt)) # Show lowess estimates of time profiles for individual subjects f <- rm.boot(times, y, id, plot.individual=TRUE, B=25, cor.pattern='estimate', smoother=lowess, bootstrap.type='x fixed', nk=6) # In practice use B=400 or 500 # This will compute a dependent-structure log-likelihood in addition # to one assuming independence. By default, the dep. structure # objective will be used by the plot method (could have specified rho=.7) # NOTE: Estimating the correlation pattern from the residual does not # work in cases such as this one where there are large subject effects # Plot fits for a random sample of 10 of the 25 bootstrap fits plot(f, individual.boot=TRUE, ncurves=10, ylim=c(6,8.5)) # Plot pointwise and simultaneous confidence regions plot(f, pointwise.band=TRUE, col.pointwise=1, ylim=c(6,8.5)) # Plot population response curve at average subject effect ts <- seq(0, 1, length=100) lines(ts, g(ts)+mean(sub), lwd=3) \dontrun{ # # Handle a 2-sample problem in which curves are fitted # separately for males and females and we wish to estimate the # difference in the time-response curves for the two sexes. # The objective criterion will be taken by plot.rm.boot as the # total of the two sums of squared errors for the two models # knots <- rcspline.eval(c(time.f,time.m), nk=6, knots.only=TRUE) # Use same knots for both sexes, and use a times vector that # uses a range of times that is included in the measurement # times for both sexes # tm <- seq(max(min(time.f),min(time.m)), min(max(time.f),max(time.m)),length=100) f.female <- rm.boot(time.f, bp.f, id.f, knots=knots, times=tm) f.male <- rm.boot(time.m, bp.m, id.m, knots=knots, times=tm) plot(f.female) plot(f.male) # The following plots female minus male response, with # a sequence of shaded confidence band for the difference plot(f.female,f.male,multi=TRUE) # Do 1000 simulated analyses to check simultaneous coverage # probability. Use a null regression model with Gaussian errors n.per.pt <- 30 n.pt <- 10 null.in.region <- 0 for(i in 1:1000) { y <- rnorm(n.pt*n.per.pt) time <- rep(1:n.per.pt, n.pt) # Add the following line and add ,id=id to rm.boot to use clustering # id <- sort(rep(1:n.pt, n.per.pt)) # Because we are ignoring patient id, this simulation is effectively # using 1 point from each of 300 patients, with times 1,2,3,,,30 f <- rm.boot(time, y, B=500, nk=5, bootstrap.type='x fixed') g <- plot(f, ylim=c(-1,1), pointwise=FALSE) null.in.region <- null.in.region + all(g$lower<=0 & g$upper>=0) prn(c(i=i,null.in.region=null.in.region)) } # Simulation Results: 905/1000 simultaneous confidence bands # fully contained the horizontal line at zero } } \keyword{regression} \keyword{multivariate} \keyword{htest} \keyword{hplot} \concept{bootstrap} \concept{repeated measures} \concept{longitudinal data} Hmisc/man/getRs.Rd0000644000176200001440000000573014324312027013446 0ustar liggesusers\name{getRs} \alias{getRs} \title{Interact with github rscripts Project} \description{ The github rscripts project at \url{https://github.com/harrelfe/rscripts} contains R scripts that are primarily analysis templates for teaching with RStudio. This function allows the user to print an organized list of available scripts, to download a script and \code{source()} it into the current session (the default), to download a script and load it into an RStudio script editor window, to list scripts whose major category contains a given string (ignoring case), or to list all major and minor categories. If \code{options(localHfiles=TRUE)} the scripts are read from local directory \code{~/R/rscripts} instead of from github. } \usage{ getRs(file=NULL, guser='harrelfe', grepo='rscripts', gdir='raw/master', dir=NULL, browse=c('local', 'browser'), cats=FALSE, put=c('source', 'rstudio')) } \arguments{ \item{file}{a character string containing a script file name. Omit \code{file} to obtain a list of available scripts with major and minor categories.} \item{guser}{GitHub user name, default is \code{'harrelfe'}} \item{grepo}{Github repository name, default is \code{'rscripts'}} \item{gdir}{Github directory under which to find retrievable files} \item{dir}{directory under \code{grepo} in which to find files} \item{browse}{When showing the rscripts contents directory, the default is to list in tabular form in the console. Specify \code{browse='browser'} to open the online contents in a web browser.} \item{cats}{Leave at the default (\code{FALSE}) to list whole contents or download a script. Specify \code{cats=TRUE} to list major and minor categories available. Specify a character string to list all scripts whose major category contains the string (ignoring case).} \item{put}{Leave at the default (\code{'source'}) to \code{source()} the file. This is useful when the file just defines a function you want to use in the session. Use load \code{put='rstudio'} to load the file into the RStudio script editor window using the \code{rstudioapi} \code{navigateToFile} function. If RStudio is not running, \code{file.edit()} is used instead.} } \value{a data frame or list, depending on arguments} \author{Frank Harrell and Cole Beck} \seealso{\code{\link{download.file}}} \examples{ \dontrun{ getRs() # list available scripts scripts <- getRs() # likewise, but store in an object that can easily # be viewed on demand in RStudio getRs('introda.r') # download introda.r and put in script editor getRs(cats=TRUE) # list available major and minor categories categories <- getRs(cats=TRUE) # likewise but store results in a list for later viewing getRs(cats='reg') # list all scripts in a major category containing 'reg' getRs('importREDCap.r') # source() to define a function # source() a new version of the Hmisc package's cut2 function: getRs('cut2.s', grepo='Hmisc', dir='R') } } \keyword{interface} Hmisc/man/pc1.Rd0000644000176200001440000000177512243661443013061 0ustar liggesusers\name{pc1} \alias{pc1} \title{First Principal Component} \description{ Given a numeric matrix which may or may not contain \code{NA}s, \code{pc1} standardizes the columns to have mean 0 and variance 1 and computes the first principal component using \code{\link{prcomp}}. The proportion of variance explained by this component is printed, and so are the coefficients of the original (not scaled) variables. These coefficients may be applied to the raw data to obtain the first PC. } \usage{ pc1(x, hi) } \arguments{ \item{x}{numeric matrix} \item{hi}{if specified, the first PC is scaled so that its maximum value is \code{hi} and its minimum value is zero} } \value{ The vector of observations with the first PC. An attribute \code{"coef"} is attached to this vector. \code{"coef"} contains the raw-variable coefficients. } \author{Frank Harrell} \seealso{\code{\link{prcomp}}} \examples{ set.seed(1) x1 <- rnorm(100) x2 <- x1 + rnorm(100) w <- pc1(cbind(x1,x2)) attr(w,'coef') } \keyword{multivariate} Hmisc/man/spss.get.Rd0000644000176200001440000000443413023765545014144 0ustar liggesusers\name{spss.get} \alias{spss.get} \title{Enhanced Importing of SPSS Files} \description{ \code{spss.get} invokes the \code{read.spss} function in the \pkg{foreign} package to read an SPSS file, with a default output format of \code{"data.frame"}. The \code{label} function is used to attach labels to individual variables instead of to the data frame as done by \code{read.spss}. By default, integer-valued variables are converted to a storage mode of integer unless \code{force.single=FALSE}. Date variables are converted to \R \code{Date} variables. By default, underscores in names are converted to periods. } \usage{ spss.get(file, lowernames=FALSE, datevars = NULL, use.value.labels = TRUE, to.data.frame = TRUE, max.value.labels = Inf, force.single=TRUE, allow=NULL, charfactor=FALSE, reencode = NA) } \arguments{ \item{file}{input SPSS save file. May be a file on the WWW, indicated by \code{file} starting with \code{'http://'} or \code{'https://'}.} \item{lowernames}{set to \code{TRUE} to convert variable names to lower case} \item{datevars}{vector of variable names containing dates to be converted to \R internal format} \item{use.value.labels}{see \code{\link[foreign]{read.spss}}} \item{to.data.frame}{see \code{\link[foreign]{read.spss}}; default is \code{TRUE} for \code{spss.get}} \item{max.value.labels}{see \code{\link[foreign]{read.spss}}} \item{force.single}{set to \code{FALSE} to prevent integer-valued variables from being converted from storage mode \code{double} to \code{integer}} \item{allow}{a vector of characters allowed by \R that should not be converted to periods in variable names. By default, underscores in variable names are converted to periods as with \R before version 1.9.} \item{charfactor}{set to \code{TRUE} to change character variables to factors if they have fewer than n/2 unique values. Blanks and null strings are converted to \code{NA}s.} \item{reencode}{see \code{\link[foreign]{read.spss}}} } \value{ a data frame or list } \author{Frank Harrell} \seealso{\code{\link[foreign]{read.spss}},\code{\link{cleanup.import}},\code{\link{sas.get}}} \examples{ \dontrun{ w <- spss.get('/tmp/my.sav', datevars=c('birthdate','deathdate')) } } \keyword{interface} \keyword{manip} \keyword{file} \concept{SPSS data file} Hmisc/man/Save.Rd0000644000176200001440000000301012753125074013255 0ustar liggesusers\name{Save} \alias{Save} \alias{Load} \title{Faciliate Use of save and load to Remote Directories} \description{ These functions are slightly enhanced versions of \code{save} and \code{load} that allow a target directory to be specified using \code{options(LoadPath="pathname")}. If the \code{LoadPath} option is not set, the current working directory is used. } \usage{ # options(LoadPath='mypath') Save(object, name=deparse(substitute(object)), compress=TRUE) Load(object) } \arguments{ \item{object}{the name of an object, usually a data frame. It must not be quoted.} \item{name}{an optional name to assign to the object and file name prefix, if the argument name is not used} \item{compress}{see \code{\link{save}}. Default is \code{TRUE} which corresponds to \code{gzip}.} } \details{ \code{Save} creates a temporary version of the object under the name given by the user, so that \code{save} will internalize this name. Then subsequent \code{Load} or \code{load} will cause an object of the original name to be created in the global environment. The name of the \R data file is assumed to be the name of the object (or the value of \code{name}) appended with \code{".rda"}. } \author{Frank Harrell} \seealso{\code{\link{save}}, \code{\link{load}}} \examples{ \dontrun{ d <- data.frame(x=1:3, y=11:13) options(LoadPath='../data/rda') Save(d) # creates ../data/rda/d.rda Load(d) # reads ../data/rda/d.rda Save(d, 'D') # creates object D and saves it in .../D.rda } } \keyword{data} \keyword{file} \keyword{utilities} Hmisc/man/makeNstr.Rd0000644000176200001440000000153112243661443014150 0ustar liggesusers\name{makeNstr} \alias{makeNstr} %- Also NEED an '\alias' for EACH other topic documented here. \title{ creates a string that is a repeat of a substring } \description{ Takes a character and creates a string that is the character repeated \code{len} times. } \usage{ makeNstr(char, len) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{char}{ character to be repeated } \item{len}{ number of times to repeat \code{char}. } } \value{ A string that is \code{char} repeated \code{len} times. } \author{ Charles Dupont } \seealso{ \code{\link[base]{paste}}, \code{\link[base]{rep}} } \examples{ makeNstr(" ", 5) \dontshow{ if(makeNstr(" ", 5) != " ") stop("makeNstr failed test") } } \keyword{ manip }% at least one, from doc/KEYWORDS \keyword{ character }% __ONLY ONE__ keyword per line \concept{string} \concept{concat} Hmisc/man/ynbind.Rd0000644000176200001440000000416112255611621013645 0ustar liggesusers\name{ynbind} \alias{ynbind} \alias{[.ynbind} \alias{pBlock} \alias{[.pBlock} \title{Combine Variables in a Matrix} \description{ \code{ynbind} column binds a series of related yes/no variables, allowing for a final argument \code{label} used to label the panel created for the group. \code{label}s for individual variables are collected into a vector attribute \code{"labels"} for the result; original variable names are used in place of labels for those variables without labels. A positive response is taken to be \code{y, yes, present} (ignoring case) or a \code{logical} \code{TRUE} value. By default, the columns are sorted be ascending order or the overall proportion of positives. A subsetting method is provided for objects of class \code{"ynbind"}. \code{pBlock} creates a matrix similarly labeled, from a general set of variables (without special handling of binaries), and sets to \code{NA} any observation not in \code{subset} so that when that block of variables is analyzed it will be only for that subset. } \usage{ ynbind(..., label = deparse(substitute(...)), asna = c("unknown", "unspecified"), sort = TRUE) pBlock(..., subset=NULL, label = deparse(substitute(...))) } \arguments{ \item{\dots}{a series of vectors} \item{label}{a label for the group, to be attached to the resulting matrix as a \code{"label"} attribute, used by \code{\link{summaryP}}.} \item{asna}{a vector of character strings specifying levels that are to be treated the same as \code{NA} if present} \item{sort}{set to \code{FALSE} to not sort the columns by their proportions} \item{subset}{subset criteria - either a vector of logicals or subscripts} } \value{a matrix of class \code{"ynbind"} or \code{"pBlock"} with \code{"label"} and \code{"labels"} attributes. For \code{"pBlock"}, factor input vectors will have values converted to \code{character}. } \author{Frank Harrell} \seealso{\code{\link{summaryP}}} \examples{ x1 <- c('yEs', 'no', 'UNKNOWN', NA) x2 <- c('y', 'n', 'no', 'present') label(x2) <- 'X2' X <- ynbind(x1, x2, label='x1-2') X[1:3,] pBlock(x1, x2, subset=2:3, label='x1-2') } \keyword{misc} \keyword{utilities} Hmisc/man/rlegend.Rd0000644000176200001440000000366512243661443014016 0ustar liggesusers\name{rlegend} \alias{rlegend} \alias{rlegendg} \title{Special Version of legend for R} \description{ \code{rlegend} is a version of \code{\link{legend}} for \R that implements \code{plot=FALSE}, adds \code{grid=TRUE}, and defaults \code{lty}, \code{lwd}, \code{pch} to \code{NULL} and checks for \code{length>0} rather than \code{missing()}, so it's easier to deal with non-applicable parameters. But when \pkg{grid} is in effect, the preferred function to use is \code{rlegendg}, which calls the \pkg{lattice} \code{\link{draw.key}} function. } \usage{ rlegend(x, y, legend, fill, col = "black", lty = NULL, lwd = NULL, pch = NULL, angle = NULL, density = NULL, bty = "o", bg = par("bg"), pt.bg = NA, cex = 1, xjust = 0, yjust = 1, x.intersp = 1, y.intersp = 1, adj = 0, text.width = NULL, merge = do.lines && has.pch, trace = FALSE, ncol = 1, horiz = FALSE, plot = TRUE, grid = FALSE, \dots) rlegendg(x, y, legend, col=pr$col[1], lty=NULL, lwd=NULL, pch=NULL, cex=pr$cex[1], other=NULL) } \arguments{ \item{x,y,legend,fill,col,lty,lwd,pch,angle,density,bty,bg,pt.bg,cex,xjust, yjust,x.intersp,y.intersp,adj,text.width,merge,trace,ncol,horiz}{ see \code{\link{legend}} } \item{plot}{set to \code{FALSE} to suppress drawing the legend. This is used the compute the size needed for when the legend is drawn with a later call to \code{rlegend}. } \item{grid}{set to \code{TRUE} if the \pkg{grid} package is in effect} \item{\dots}{see \code{\link{legend}}} \item{other}{ a list containing other arguments to pass to \code{draw.key}. See the help file for \code{\link{xyplot}}. } } \value{ a list with elements \code{rect} and \code{text}. \code{rect} has elements \code{w, h, left, top} with size/position information. } \author{Frank Harrell and R-Core} \seealso{ \code{\link{legend}}, \code{\link[lattice]{draw.key}}, \code{\link{xyplot}} } \keyword{aplot} Hmisc/man/somers2.Rd0000644000176200001440000000377313714234043013764 0ustar liggesusers\name{somers2} \alias{somers2} \title{ Somers' Dxy Rank Correlation } \description{ Computes Somers' Dxy rank correlation between a variable \code{x} and a binary (0-1) variable \code{y}, and the corresponding receiver operating characteristic curve area \code{c}. Note that \code{Dxy = 2(c-0.5)}. \code{somers} allows for a \code{weights} variable, which specifies frequencies to associate with each observation. } \usage{ somers2(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE) } \arguments{ \item{x}{ typically a predictor variable. \code{NA}s are allowed. } \item{y}{ a numeric outcome variable coded \code{0-1}. \code{NA}s are allowed. } \item{weights}{ a numeric vector of observation weights (usually frequencies). Omit or specify a zero-length vector to do an unweighted analysis. } \item{normwt}{ set to \code{TRUE} to make \code{weights} sum to the actual number of non-missing observations. } \item{na.rm}{ set to \code{FALSE} to suppress checking for NAs. }} \value{ a vector with the named elements \code{C}, \code{Dxy}, \code{n} (number of non-missing pairs), and \code{Missing}. Uses the formula \code{C = (mean(rank(x)[y == 1]) - (n1 + 1)/2)/(n - n1)}, where \code{n1} is the frequency of \code{y=1}. } \details{ The \code{rcorr.cens} function, which although slower than \code{somers2} for large sample sizes, can also be used to obtain Dxy for non-censored binary \code{y}, and it has the advantage of computing the standard deviation of the correlation index. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{ \code{\link{rcorr.cens}}, \code{\link{rank}}, \code{\link{wtd.rank}}, } \examples{ set.seed(1) predicted <- runif(200) dead <- sample(0:1, 200, TRUE) roc.area <- somers2(predicted, dead)["C"] # Check weights x <- 1:6 y <- c(0,0,1,0,1,1) f <- c(3,2,2,3,2,1) somers2(x, y) somers2(rep(x, f), rep(y, f)) somers2(x, y, f) } \keyword{nonparametric} \concept{logistic regression model} \concept{predictive accuracy} Hmisc/man/src.Rd0000644000176200001440000000135212243661443013154 0ustar liggesusers\name{src} \alias{src} \title{Source a File from the Current Working Directory} \description{ \code{src} concatenates \code{".s"} to its argument, quotes the result, and \code{source}s in the file. It sets \code{options(last.source)} to this file name so that \code{src()} can be issued to re-\code{source} the file when it is edited. } \usage{ src(x) } \arguments{ \item{x}{an unquoted file name aside from \code{".s"}. This base file name must be a legal S name.} } \section{Side Effects}{ Sets system option \code{last.source} } \author{Frank Harrell} \seealso{\code{\link{source}}} \examples{ \dontrun{ src(myfile) # source("myfile.s") src() # re-source myfile.s } } \keyword{file} \keyword{programming} \keyword{utilities} Hmisc/man/latexCheckOptions.Rd0000644000176200001440000000124513211012065015777 0ustar liggesusers\name{latexCheckOptions} \alias{latexCheckOptions} \title{Check whether the options for latex functions have been specified.} \description{ Check whether the options for latex functions have been specified. If any of\cr \code{options()[c("latexcmd","dviExtension","xdvicmd")]} are \code{NULL}, an error message is displayed. } \usage{ latexCheckOptions(...) } \arguments{ \item{\dots}{ Any arguments are ignored. } } \value{ If any \code{NULL} options are detected, the invisible text of the error message. If all three options have non-\code{NULL} values, NULL. } \author{ Richard M. Heiberger } \seealso{ \code{\link[Hmisc]{latex}} } \keyword{ utilities} Hmisc/man/bystats.Rd0000644000176200001440000001340313714234051014051 0ustar liggesusers\name{bystats} \alias{bystats} \alias{print.bystats} \alias{latex.bystats} \alias{bystats2} \alias{print.bystats2} \alias{latex.bystats2} \title{ Statistics by Categories } \description{ For any number of cross-classification variables, \code{bystats} returns a matrix with the sample size, number missing \code{y}, and \code{fun(non-missing y)}, with the cross-classifications designated by rows. Uses Harrell's modification of the \code{interaction} function to produce cross-classifications. The default \code{fun} is \code{mean}, and if \code{y} is binary, the mean is labeled as \code{Fraction}. There is a \code{print} method as well as a \code{latex} method for objects created by \code{bystats}. \code{bystats2} handles the special case in which there are 2 classifcation variables, and places the first one in rows and the second in columns. The \code{print} method for \code{bystats2} uses the \code{print.char.matrix} function to organize statistics for cells into boxes. } \usage{ bystats(y, \dots, fun, nmiss, subset) \method{print}{bystats}(x, \dots) \method{latex}{bystats}(object, title, caption, rowlabel, \dots) bystats2(y, v, h, fun, nmiss, subset) \method{print}{bystats2}(x, abbreviate.dimnames=FALSE, prefix.width=max(nchar(dimnames(x)[[1]])), \dots) \method{latex}{bystats2}(object, title, caption, rowlabel, \dots) } \arguments{ \item{y}{ a binary, logical, or continuous variable or a matrix or data frame of such variables. If \code{y} is a data frame it is converted to a matrix. If \code{y} is a data frame or matrix, computations are done on subsets of the rows of \code{y}, and you should specify \code{fun} so as to be able to operate on the matrix. For matrix \code{y}, any column with a missing value causes the entire row to be considered missing, and the row is not passed to \code{fun}. } \item{...}{ For \code{bystats}, one or more classifcation variables separated by commas. For \code{print.bystats}, options passed to \code{print.default} such as \code{digits}. For \code{latex.bystats}, and \code{latex.bystats2}, options passed to \code{latex.default} such as \code{digits}. If you pass \code{cdec} to \code{latex.default}, keep in mind that the first one or two positions (depending on \code{nmiss}) should have zeros since these correspond with frequency counts. } \item{v}{ vertical variable for \code{bystats2}. Will be converted to \code{factor}. } \item{h}{ horizontal variable for \code{bystats2}. Will be converted to \code{factor}. } \item{fun}{ a function to compute on the non-missing \code{y} for a given subset. You must specify \code{fun=} in front of the function name or definition. \code{fun} may return a single number or a vector or matrix of any length. Matrix results are rolled out into a vector, with names preserved. When \code{y} is a matrix, a common \code{fun} is \code{function(y) apply(y, 2, ff)} where \code{ff} is the name of a function which operates on one column of \code{y}. } \item{nmiss}{ A column containing a count of missing values is included if \code{nmiss=TRUE} or if there is at least one missing value. } \item{subset}{ a vector of subscripts or logical values indicating the subset of data to analyze } \item{abbreviate.dimnames}{set to \code{TRUE} to abbreviate \code{dimnames} in output} \item{prefix.width}{see \code{\link{print.char.matrix}}} \item{title}{ \code{title} to pass to \code{latex.default}. Default is the first word of the character string version of the first calling argument. } \item{caption}{ caption to pass to \code{latex.default}. Default is the \code{heading} attribute from the object produced by \code{bystats}. } \item{rowlabel}{ \code{rowlabel} to pass to \code{latex.default}. Default is the \code{byvarnames} attribute from the object produced by \code{bystats}. For \code{bystats2} the default is \code{""}. } \item{x}{an object created by \code{bystats} or \code{bystats2}} \item{object}{an object created by \code{bystats} or \code{bystats2}} } \value{ for \code{bystats}, a matrix with row names equal to the classification labels and column names \code{N, Missing, funlab}, where \code{funlab} is determined from \code{fun}. A row is added to the end with the summary statistics computed on all observations combined. The class of this matrix is \code{bystats}. For \code{bystats}, returns a 3-dimensional array with the last dimension corresponding to statistics being computed. The class of the array is \code{bystats2}. } \section{Side Effects}{ \code{latex} produces a \code{.tex} file. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{interaction}}, \code{\link{cut}}, \code{\link{cut2}}, \code{\link{latex}}, \code{\link{print.char.matrix}}, \code{\link{translate}} } \examples{ \dontrun{ bystats(sex==2, county, city) bystats(death, race) bystats(death, cut2(age,g=5), race) bystats(cholesterol, cut2(age,g=4), sex, fun=median) bystats(cholesterol, sex, fun=quantile) bystats(cholesterol, sex, fun=function(x)c(Mean=mean(x),Median=median(x))) latex(bystats(death,race,nmiss=FALSE,subset=sex=="female"), digits=2) f <- function(y) c(Hazard=sum(y[,2])/sum(y[,1])) # f() gets the hazard estimate for right-censored data from exponential dist. bystats(cbind(d.time, death), race, sex, fun=f) bystats(cbind(pressure, cholesterol), age.decile, fun=function(y) c(Median.pressure =median(y[,1]), Median.cholesterol=median(y[,2]))) y <- cbind(pressure, cholesterol) bystats(y, age.decile, fun=function(y) apply(y, 2, median)) # same result as last one bystats(y, age.decile, fun=function(y) apply(y, 2, quantile, c(.25,.75))) # The last one computes separately the 0.25 and 0.75 quantiles of 2 vars. latex(bystats2(death, race, sex, fun=table)) } } \keyword{category} \concept{grouping} Hmisc/man/mApply.Rd0000644000176200001440000000522513714234051013625 0ustar liggesusers\name{mApply} \concept{apply for matrix} \concept{apply for vector} \alias{mApply} \title{Apply a Function to Rows of a Matrix or Vector} \description{ \code{mApply} is like \code{tapply} except that the first argument can be a matrix or a vector, and the output is cleaned up if \code{simplify=TRUE}. It uses code adapted from Tony Plate (\email{tplate@blackmesacapital.com}) to operate on grouped submatrices. As \code{mApply} can be much faster than using \code{by}, it is often worth the trouble of converting a data frame to a numeric matrix for processing by \code{mApply}. \code{asNumericMatrix} will do this, and \code{matrix2dataFrame} will convert a numeric matrix back into a data frame. } \usage{ mApply(X, INDEX, FUN, \dots, simplify=TRUE, keepmatrix=FALSE) } \arguments{ \item{X}{ a vector or matrix capable of being operated on by the function specified as the \code{FUN} argument } \item{INDEX}{ list of factors, each of same number of rows as 'X' has. } \item{FUN}{ the function to be applied. In the case of functions like '+', '%*%', etc., the function name must be quoted. } \item{\dots}{ optional arguments to 'FUN'. } \item{simplify}{ set to 'FALSE' to suppress simplification of the result in to an array, matrix, etc. } \item{keepmatrix}{set to \code{TRUE} to keep result as a matrix even if \code{simplify} is \code{TRUE}, in the case of only one stratum } } \value{ For \code{mApply}, the returned value is a vector, matrix, or list. If \code{FUN} returns more than one number, the result is an array if \code{simplify=TRUE} and is a list otherwise. If a matrix is returned, its rows correspond to unique combinations of \code{INDEX}. If \code{INDEX} is a list with more than one vector, \code{FUN} returns more than one number, and \code{simplify=FALSE}, the returned value is a list that is an array with the first dimension corresponding to the last vector in \code{INDEX}, the second dimension corresponding to the next to last vector in \code{INDEX}, etc., and the elements of the list-array correspond to the values computed by \code{FUN}. In this situation the returned value is a regular array if \code{simplify=TRUE}. The order of dimensions is as previously but the additional (last) dimension corresponds to values computed by \code{FUN}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{asNumericMatrix}}, \code{\link{matrix2dataFrame}}, \code{\link{tapply}}, \code{\link{sapply}}, \code{\link{lapply}}, \code{\link{mapply}}, \code{\link{by}}. } \examples{ require(datasets, TRUE) a <- mApply(iris[,-5], iris$Species, mean) } \keyword{iteration} \keyword{category} Hmisc/man/ggMisc.Rd0000644000176200001440000000246213714232600013572 0ustar liggesusers\name{colorFacet} \alias{colorFacet} \alias{arrGrob} \alias{print.arrGrob} \title{Miscellaneous ggplot2 and grid Helper Functions} \description{ These functions are used on \code{ggplot2} objects or as layers when building a \code{ggplot2} object, and to facilitate use of \code{gridExtra}. \code{colorFacet} colors the thin rectangles used to separate panels created by \code{facet_grid} (and probably by \code{facet_wrap}). A better approach may be found at \url{https://stackoverflow.com/questions/28652284/}. \code{arrGrob} is a front-end to \code{gridExtra::arrangeGrob} that allows for proper printing. See \url{https://stackoverflow.com/questions/29062766/store-output-from-gridextragrid-arrange-into-an-object/}. The \code{arrGrob} \code{print} method calls \code{grid::grid.draw}. } \usage{ colorFacet(g, col = adjustcolor("blue", alpha.f = 0.3)) arrGrob(\dots) \method{print}{arrGrob}(x, \dots) } \arguments{ \item{g}{a \code{ggplot2} object that used faceting} \item{col}{color for facet separator rectanges} \item{\dots}{passed to \code{arrangeGrob}} \item{x}{an object created by \code{arrGrob}} } \author{Sandy Muspratt} \examples{ \dontrun{ s <- summaryP(age + sex ~ region + treatment) colorFacet(ggplot(s)) # prints directly # arrGrob is called by rms::ggplot.Predict and others } } \keyword{hplot} Hmisc/man/binconf.Rd0000644000176200001440000000447712243661443014016 0ustar liggesusers\name{binconf} \alias{binconf} \title{ Confidence Intervals for Binomial Probabilities } \description{ Produces 1-alpha confidence intervals for binomial probabilities. } \usage{ binconf(x, n, alpha=0.05, method=c("wilson","exact","asymptotic","all"), include.x=FALSE, include.n=FALSE, return.df=FALSE) } \arguments{ \item{x}{ vector containing the number of "successes" for binomial variates } \item{n}{ vector containing the numbers of corresponding observations } \item{alpha}{ probability of a type I error, so confidence coefficient = 1-alpha } \item{method}{ character string specifing which method to use. The "all" method only works when x and n are length 1. The "exact" method uses the F distribution to compute exact (based on the binomial cdf) intervals; the "wilson" interval is score-test-based; and the "asymptotic" is the text-book, asymptotic normal interval. Following Agresti and Coull, the Wilson interval is to be preferred and so is the default. } \item{include.x}{ logical flag to indicate whether \code{x} should be included in the returned matrix or data frame } \item{include.n}{ logical flag to indicate whether \code{n} should be included in the returned matrix or data frame } \item{return.df}{ logical flag to indicate that a data frame rather than a matrix be returned }} \value{ a matrix or data.frame containing the computed intervals and, optionally, \code{x} and \code{n}. } \author{ Rollin Brant, Modified by Frank Harrell and \cr Brad Biggerstaff \cr Centers for Disease Control and Prevention \cr National Center for Infectious Diseases \cr Division of Vector-Borne Infectious Diseases \cr P.O. Box 2087, Fort Collins, CO, 80522-2087, USA \cr \email{bkb5@cdc.gov} } \references{ A. Agresti and B.A. Coull, Approximate is better than "exact" for interval estimation of binomial proportions, \emph{American Statistician,} \bold{52}:119--126, 1998. R.G. Newcombe, Logit confidence intervals and the inverse sinh transformation, \emph{American Statistician,} \bold{55}:200--202, 2001. L.D. Brown, T.T. Cai and A. DasGupta, Interval estimation for a binomial proportion (with discussion), \emph{Statistical Science,} \bold{16}:101--133, 2001. } \examples{ binconf(0:10,10,include.x=TRUE,include.n=TRUE) binconf(46,50,method="all") } \keyword{category} \keyword{htest} % Converted by Sd2Rd version 1.21. Hmisc/man/ggplotlyr.Rd0000644000176200001440000000174714020163005014402 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggplotlyr.r \name{ggplotlyr} \alias{ggplotlyr} \title{ggplotlyr} \usage{ ggplotlyr(ggobject, tooltip = "label", remove = "txt: ", ...) } \arguments{ \item{ggobject}{an object produced by \code{ggplot}} \item{tooltip}{attribute specified to \code{ggplot} to hold hover text} \item{remove}{extraneous text to remove from hover text. Default is set to assume \code{tooltip='label'} and assumed the user specified \code{aes(..., label=txt)}. If you instead specified \code{aes(..., label=myvar)} use \code{remove='myvar: '}.} \item{...}{other arguments passed to \code{ggplotly}} } \value{ a \code{plotly} object } \description{ Render \code{plotly} Graphic from a \code{ggplot2} Object } \details{ Uses \code{plotly::ggplotly()} to render a \code{plotly} graphic with a specified tooltip attribute, removing extraneous text that \code{ggplotly} puts in hover text when \code{tooltip='label'} } \author{ Frank Harrell } Hmisc/man/popower.Rd0000644000176200001440000002636214323265325014070 0ustar liggesusers\name{popower} \alias{multEventChart} \alias{popower} \alias{posamsize} \alias{print.popower} \alias{print.posamsize} \alias{pomodm} \alias{simPOcuts} \alias{propsPO} \alias{propsTrans} \title{Power and Sample Size for Ordinal Response} \description{ \code{popower} computes the power for a two-tailed two sample comparison of ordinal outcomes under the proportional odds ordinal logistic model. The power is the same as that of the Wilcoxon test but with ties handled properly. \code{posamsize} computes the total sample size needed to achieve a given power. Both functions compute the efficiency of the design compared with a design in which the response variable is continuous. \code{print} methods exist for both functions. Any of the input arguments may be vectors, in which case a vector of powers or sample sizes is returned. These functions use the methods of Whitehead (1993). \code{pomodm} is a function that assists in translating odds ratios to differences in mean or median on the original scale. \code{simPOcuts} simulates simple unadjusted two-group comparisons under a PO model to demonstrate the natural sampling variability that causes estimated odds ratios to vary over cutoffs of Y. \code{propsPO} uses \code{\link{ggplot2}} to plot a stacked bar chart of proportions stratified by a grouping variable (and optionally a stratification variable), with an optional additional graph showing what the proportions would be had proportional odds held and an odds ratio was applied to the proportions in a reference group. If the result is passed to \code{ggplotly}, customized tooltip hover text will appear. \code{propsTrans} uses \code{\link{ggplot2}} to plot all successive transition proportions. \code{formula} has the state variable on the left hand side, the first right-hand variable is time, and the second right-hand variable is a subject ID variable.\ \code{multEventChart} uses \code{\link{ggplot2}} to plot event charts showing state transitions, account for absorbing states/events. It is based on code written by Lucy D'Agostino McGowan posted at \url{https://livefreeordichotomize.com/posts/2020-05-21-survival-model-detective-1/}. } \usage{ popower(p, odds.ratio, n, n1, n2, alpha=0.05) \method{print}{popower}(x, \dots) posamsize(p, odds.ratio, fraction=.5, alpha=0.05, power=0.8) \method{print}{posamsize}(x, \dots) pomodm(x=NULL, p, odds.ratio=1) simPOcuts(n, nsim=10, odds.ratio=1, p) propsPO(formula, odds.ratio=NULL, ref=NULL, data=NULL, ncol=NULL, nrow=NULL ) propsTrans(formula, data=NULL, labels=NULL, arrow='\u2794', maxsize=12, ncol=NULL, nrow=NULL) multEventChart(formula, data=NULL, absorb=NULL, sortbylast=FALSE, colorTitle=label(y), eventTitle='Event', palette='OrRd', eventSymbols=c(15, 5, 1:4, 6:10), timeInc=min(diff(unique(x))/2)) } \arguments{ \item{p}{ a vector of marginal cell probabilities which must add up to one. For \code{popower} and \code{posamsize}, The \code{i}th element specifies the probability that a patient will be in response level \code{i}, averaged over the two treatment groups. For \code{pomodm} and \code{simPOcuts}, \code{p} is the vector of cell probabilities to be translated under a given odds ratio. For \code{simPOcuts}, if \code{p} has names, those names are taken as the ordered distinct Y-values. Otherwise Y-values are taken as the integers 1, 2, ... up to the length of \code{p}. } \item{odds.ratio}{ the odds ratio to be able to detect. It doesn't matter which group is in the numerator. For \code{propsPO}, \code{odds.ratio} is a function of the grouping (right hand side) variable value. The value of the function specifies the odds ratio to apply to the refernce group to get all other group's expected proportions were proportional odds to hold against the first group. Normally the function should return 1.0 when its \code{x} argument corresponds to the \code{ref} group. For \code{pomodm} and \code{simPOcuts} is the odds ratio to apply to convert the given cell probabilities.} \item{n}{ total sample size for \code{popower}. You must specify either \code{n} or \code{n1} and \code{n2}. If you specify \code{n}, \code{n1} and \code{n2} are set to \code{n/2}. For \code{simPOcuts} is a single number equal to the combined sample sizes of two groups. } \item{n1}{for \code{popower}, the number of subjects in treatment group 1} \item{n2}{for \code{popower}, the number of subjects in group 2} \item{nsim}{number of simulated studies to create by \code{simPOcuts}} \item{alpha}{type I error} \item{x}{an object created by \code{popower} or \code{posamsize}, or a vector of data values given to \code{pomodm} that corresponds to the vector \code{p} of probabilities. If \code{x} is omitted for \code{pomodm}, the \code{odds.ratio} will be applied and the new vector of individual probabilities will be returned. Otherwise if \code{x} is given to \code{pomodm}, a 2-vector with the mean and median \code{x} after applying the odds ratio is returned.} \item{fraction}{ for \code{posamsize}, the fraction of subjects that will be allocated to group 1 } \item{power}{ for \code{posamsize}, the desired power (default is 0.8) } \item{formula}{an R formula expressure for \code{proposPO} where the outcome categorical variable is on the left hand side and the grouping variable is on the right. It is assumed that the left hand variable is either already a factor or will have its levels in the right order for an ordinal model when it is converted to factor. For \code{multEventChart} the left hand variable is a categorial status variable, the first right hand side variable represents time, and the second right side variable is a unique subject ID. One line is produced per subject.} \item{ref}{for \code{propsPO} specifies the reference group (value of the right hand side \code{formula} variable) to use in computing proportions on which too translate proportions in other groups, under the proportional odds assumption.} \item{data}{a data frame or \code{data.table}} \item{labels}{for \code{propsTrans} is an optional character vector corresponding to y=1,2,3,... that is used to construct \code{plotly} hovertext as a \code{label} attribute in the \code{ggplot2} aesthetic. Used with y is integer on axes but you want long labels in hovertext.} \item{arrow}{character to use as the arrow symbol for transitions in \code{propsTrans. The default is the dingbats heavy wide-headed rightwards arror.}} \item{nrow,ncol}{see \code{\link[ggplot2]{facet_wrap}}} \item{maxsize}{maximum symbol size} \item{\dots}{unused} \item{absorb}{character vector specifying the subset of levels of the left hand side variable that are absorbing states such as death or hospital discharge} \item{sortbylast}{set to \code{TRUE} to sort the subjects by the severity of the status at the last time point} \item{colorTitle}{label for legend for status} \item{eventTitle}{label for legend for \code{absorb}} \item{palette}{a single character string specifying the \code{\link[ggplot2]{scale_fill_brewer}} color palette} \item{eventSymbols}{vector of symbol codes. Default for first two symbols is a solid square and an open diamond.} \item{timeInc}{time increment for the x-axis. Default is 1/2 the shortest gap between any two distincttimes in the data.} } \value{ a list containing \code{power}, \code{eff} (relative efficiency), and \code{approx.se} (approximate standard error of log odds ratio) for \code{popower}, or containing \code{n} and \code{eff} for \code{posamsize}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \references{ Whitehead J (1993): Sample size calculations for ordered categorical data. Stat in Med 12:2257--2271. Julious SA, Campbell MJ (1996): Letter to the Editor. Stat in Med 15: 1065--1066. Shows accuracy of formula for binary response case. } \seealso{ \code{\link{simRegOrd}}, \code{\link{bpower}}, \code{\link{cpower}}, \code{\link[rms]{impactPO}} } \examples{ # For a study of back pain (none, mild, moderate, severe) here are the # expected proportions (averaged over 2 treatments) that will be in # each of the 4 categories: p <- c(.1,.2,.4,.3) popower(p, 1.2, 1000) # OR=1.2, total n=1000 posamsize(p, 1.2) popower(p, 1.2, 3148) # If p was the vector of probabilities for group 1, here's how to # compute the average over the two groups: # p2 <- pomodm(p=p, odds.ratio=1.2) # pavg <- (p + p2) / 2 # Compare power to test for proportions for binary case, # proportion of events in control group of 0.1 p <- 0.1; or <- 0.85; n <- 4000 popower(c(1 - p, p), or, n) # 0.338 bpower(p, odds.ratio=or, n=n) # 0.320 # Add more categories, starting with 0.1 in middle p <- c(.8, .1, .1) popower(p, or, n) # 0.543 p <- c(.7, .1, .1, .1) popower(p, or, n) # 0.67 # Continuous scale with final level have prob. 0.1 p <- c(rep(1 / n, 0.9 * n), 0.1) popower(p, or, n) # 0.843 # Compute the mean and median x after shifting the probability # distribution by an odds ratio under the proportional odds model x <- 1 : 5 p <- c(.05, .2, .2, .3, .25) # For comparison make up a sample that looks like this X <- rep(1 : 5, 20 * p) c(mean=mean(X), median=median(X)) pomodm(x, p, odds.ratio=1) # still have to figure out the right median pomodm(x, p, odds.ratio=0.5) # Show variation of odds ratios over possible cutoffs of Y even when PO # truly holds. Run 5 simulations for a total sample size of 300. # The two groups have 150 subjects each. s <- simPOcuts(300, nsim=5, odds.ratio=2, p=p) round(s, 2) # An ordinal outcome with levels a, b, c, d, e is measured at 3 times # Show the proportion of values in each outcome category stratified by # time. Then compute what the proportions would be had the proportions # at times 2 and 3 been the proportions at time 1 modified by two odds ratios set.seed(1) d <- expand.grid(time=1:3, reps=1:30) d$y <- sample(letters[1:5], nrow(d), replace=TRUE) propsPO(y ~ time, data=d, odds.ratio=function(time) c(1, 2, 4)[time]) # To show with plotly, save previous result as object p and then: # plotly::ggplotly(p, tooltip='label') # Add a stratification variable and don't consider an odds ratio d <- expand.grid(time=1:5, sex=c('female', 'male'), reps=1:30) d$y <- sample(letters[1:5], nrow(d), replace=TRUE) propsPO(y ~ time + sex, data=d) # may add nrow= or ncol= # Show all successive transition proportion matrices d <- expand.grid(id=1:30, time=1:10) d$state <- sample(LETTERS[1:4], nrow(d), replace=TRUE) propsTrans(state ~ time + id, data=d) pt1 <- data.frame(pt=1, day=0:3, status=c('well', 'well', 'sick', 'very sick')) pt2 <- data.frame(pt=2, day=c(1,2,4,6), status=c('sick', 'very sick', 'coma', 'death')) pt3 <- data.frame(pt=3, day=1:5, status=c('sick', 'very sick', 'sick', 'very sick', 'discharged')) pt4 <- data.frame(pt=4, day=c(1:4, 10), status=c('well', 'sick', 'very sick', 'well', 'discharged')) d <- rbind(pt1, pt2, pt3, pt4) d$status <- factor(d$status, c('discharged', 'well', 'sick', 'very sick', 'coma', 'death')) label(d$day) <- 'Day' multEventChart(status ~ day + pt, data=d, absorb=c('death', 'discharged'), colorTitle='Status', sortbylast=TRUE) + theme_classic() + theme(legend.position='bottom') } \keyword{htest} \keyword{category} \concept{power} \concept{study design} \concept{ordinal logistic model} \concept{ordinal response} \concept{proportional odds model} Hmisc/man/prnz.Rd0000644000176200001440000000110512255333620013346 0ustar liggesusers\name{prnz} \alias{prn} \title{ Print and Object with its Name } \description{ Prints an object with its name and with an optional descriptive text string. This is useful for annotating analysis output files and for debugging. } \usage{ prn(x, txt, file) } \arguments{ \item{x}{any object} \item{txt}{optional text string} \item{file}{optional file name. By default, writes to console. \code{append=TRUE} is assumed.} } \section{Side Effects}{ prints } \seealso{ \code{\link{print}}, \code{\link{cat}} } \examples{ x <- 1:5 prn(x) # prn(fit, 'Full Model Fit') } \keyword{print} Hmisc/man/event.chart.Rd0000644000176200001440000010013512243661443014605 0ustar liggesusers\name{event.chart} \alias{event.chart} \title{ Flexible Event Chart for Time-to-Event Data } \description{ Creates an event chart on the current graphics device. Also, allows user to plot legend on plot area or on separate page. Contains features useful for plotting data with time-to-event outcomes Which arise in a variety of studies including randomized clinical trials and non-randomized cohort studies. This function can use as input a matrix or a data frame, although greater utility and ease of use will be seen with a data frame. } \usage{ event.chart(data, subset.r = 1:dim(data)[1], subset.c = 1:dim(data)[2], sort.by = NA, sort.ascending = TRUE, sort.na.last = TRUE, sort.after.subset = TRUE, y.var = NA, y.var.type = "n", y.jitter = FALSE, y.jitter.factor = 1, y.renum = FALSE, NA.rm = FALSE, x.reference = NA, now = max(data[, subset.c], na.rm = TRUE), now.line = FALSE, now.line.lty = 2, now.line.lwd = 1, now.line.col = 1, pty = "m", date.orig = c(1, 1, 1960), titl = "Event Chart", y.idlabels = NA, y.axis = "auto", y.axis.custom.at = NA, y.axis.custom.labels = NA, y.julian = FALSE, y.lim.extend = c(0, 0), y.lab = ifelse(is.na(y.idlabels), "", as.character(y.idlabels)), x.axis.all = TRUE, x.axis = "auto", x.axis.custom.at = NA, x.axis.custom.labels = NA, x.julian = FALSE, x.lim.extend = c(0, 0), x.scale = 1, x.lab = ifelse(x.julian, "Follow-up Time", "Study Date"), line.by = NA, line.lty = 1, line.lwd = 1, line.col = 1, line.add = NA, line.add.lty = NA, line.add.lwd = NA, line.add.col = NA, point.pch = 1:length(subset.c), point.cex = rep(0.6, length(subset.c)), point.col = rep(1, length(subset.c)), point.cex.mult = 1., point.cex.mult.var = NA, extra.points.no.mult = rep(NA, length(subset.c)), legend.plot = FALSE, legend.location = "o", legend.titl = titl, legend.titl.cex = 3, legend.titl.line = 1, legend.point.at = list(x = c(5, 95), y = c(95, 30)), legend.point.pch = point.pch, legend.point.text = ifelse(rep(is.data.frame(data), length(subset.c)), names(data[, subset.c]), subset.c), legend.cex = 2.5, legend.bty = "n", legend.line.at = list(x = c(5, 95), y = c(20, 5)), legend.line.text = names(table(as.character(data[, line.by]), exclude = c("", "NA"))), legend.line.lwd = line.lwd, legend.loc.num = 1, \dots) } \arguments{ \item{data}{ a matrix or data frame with rows corresponding to subjects and columns corresponding to variables. Note that for a data frame or matrix containing multiple time-to-event data (e.g., time to recurrence, time to death, and time to last follow-up), one column is required for each specific event. } \item{subset.r}{ subset of rows of original matrix or data frame to place in event chart. Logical arguments may be used here (e.g., \code{treatment.arm == 'a'}, if the data frame, data, has been attached to the search directory; otherwise, \code{data$treatment.arm == "a"}). } \item{subset.c}{ subset of columns of original matrix or data frame to place in event chart; if working with a data frame, a vector of data frame variable names may be used for subsetting purposes (e.g., \code{c('randdate', 'event1')}. } \item{sort.by}{ column(s) or data frame variable name(s) with which to sort the chart's output. The default is \code{NA}, thereby resulting in a chart sorted by original row number. } \item{sort.ascending}{ logical flag (which takes effect only if the argument \code{sort.by} is utilized). If \code{TRUE} (default), sorting is done in ascending order; if \code{FALSE}, descending order. } \item{sort.na.last}{ logical flag (which takes effect only if the argument \code{sort.by} is utilized). If \code{TRUE} (default), \code{NA} values are considered as last values in ordering. } \item{sort.after.subset}{ logical flag (which takes effect only if the argument sort.by is utilized). If \code{FALSE}, sorting data (via \code{sort.by} specified variables or columns) will be performed prior to row subsetting (via \code{subset.r}); if \code{TRUE} (default), row subsetting of original data will be done before sorting. } \item{y.var}{ variable name or column number of original matrix or data frame with which to scale y-axis. Default is \code{NA}, which will result in equally spaced lines on y-axis (based on original data or sorted data if requested by sort.by). Otherwise, location of lines on y-axis will be dictated by specified variable or column. Examples of specified variables may be date of an event or a physiological covariate. Any observation which has a missing value for the y.var variable will not appear on the graph. } \item{y.var.type}{ type of variable specified in \code{y.var} (which will only take effect if argument \code{y.var} is utilized). If \code{"d"}, specifed variable is a date (either numeric julian date or an S-Plus dates object); if \code{"n"}, specifed variable is numeric (e.g., systolic blood pressure level) although not a julian date. } \item{y.jitter}{ logical flag (which takes effect only if the argument \code{y.var} is utilized). Due to potential ties in \code{y.var} variable, \code{y.jitter} (when \code{TRUE}) will jitter the data to allow discrimination between observations at the possible cost of producing slightly inaccurate dates or covariate values; if \code{FALSE} (the default), no jittering will be performed. The \code{y.jitter} algorithm assumes a uniform distribution of observations across the range of \code{y.var}. The algorithm is as follows: \code{ size.jitter <- ( diff(range(y.var)) / (2 * (length(y.var) - 1)) ) * y.jitter.factor } The default of \code{y.jitter.factor} is 1. The entire product is then used as an argument into \code{runif}: \code{y.var <- y.var + runif(length(y.var), -size.jitter, size.jitter)} } \item{y.jitter.factor}{ an argument used with the \code{y.jitter} function to scale the range of added noise. Default is 1. } \item{y.renum}{ logical flag. If \code{TRUE}, subset observations are listed on y-axis from 1 to \code{length(subset.r)}; if \code{FALSE} (default), subset observations are listed on y-axis in original form. As an example, if \code{subset.r = 301:340} and \code{y.renum ==TRUE}, y-axis will be shown as 1 through 40. However, if \code{y.renum ==FALSE}, y-axis will be shown as 301 through 340. The above examples assume the following argument, \code{NA.rm}, is set to \code{FALSE}. } \item{NA.rm}{ logical flag. If \code{TRUE}, subset observations which have \code{NA} for each variable specified in subset.c will not have an entry on the y-axis. Also, if the following argument, \code{x.reference}, is specified, observations with missing \code{x.reference} values will also not have an entry on the y-axis. If \code{FALSE} (default), user can identify those observations which do have \code{NA} for every variable specified in \code{subset.c} (or, if \code{x.reference} is specified, also those observations which are missing only the \code{x.reference} value); this can easily be done by examining the resulting y-axis and recognizing the observations without any plotting symbols. } \item{x.reference}{ column of original matrix or data frame with which to reference the x-axis. That is, if specified, all columns specified in \code{subset.c} will be substracted by \code{x.reference}. An example may be to see the timing of events before and after treatment or to see time-to-event after entry into study. The event times will be aligned using the \code{x.reference} argument as the reference point. } \item{now}{ the \dQuote{now} date which will be used for top of y-axis when creating the Goldman eventchart (see reference below). Default is \code{max(data[, subset.c], na.rm =TRUE)}. } \item{now.line}{ logical flag. A feature utilized by the Goldman Eventchart. When \code{x.reference} is specified as the start of follow-up and \code{y.var = x.reference}, then the Goldman chart can be created. This argument, if \code{TRUE}, will cause the plot region to be square, and will draw a line with a slope of -1 from the top of the y-axis to the right end of the x-axis. Essentially, it denotes end of current follow-up period for looking at the time-to-event data. Default is \code{FALSE}. } \item{now.line.lty}{ line type of \code{now.line}. } \item{now.line.lwd}{ line width of \code{now.line}. } \item{now.line.col}{ color of \code{now.line}. } \item{pty}{ graph option, \code{pty='m'} is the default; use \code{pty='s'} for the square looking Goldman's event chart. } \item{date.orig}{ date of origin to consider if dates are in julian, SAS , or S-Plus dates object format; default is January 1, 1960 (which is the default origin used by both S-Plus and SAS). Utilized when either \code{y.julian = FALSE} or \code{x.julian = FALSE}. } \item{titl}{ title for event chart. Default is 'Event Chart'. } \item{y.idlabels}{ column or data frame variable name used for y-axis labels. For example, if \code{c('pt.no')} is specified, patient ID (stored in \code{pt.no}) will be seen on y-axis labels instead of sequence specified by \code{subset.r}. This argument takes precedence over both \code{y.axis = 'auto'} and \code{y.axis = 'custom'} (see below). NOTE: Program will issue warning if this argument is specified and if \code{is.na(y.var) == FALSE}; \code{y.idlabels} will not be used in this situation. Also, attempting to plot too many patients on a single event chart will cause undesirable plotting of \code{y.idlabels}. } \item{y.axis}{ character string specifying whether program will control labelling of y-axis (with argument \code{"auto"}), or if user will control labelling (with argument \code{"custom"}). If \code{"custom"} is chosen, user must specify location and text of labels using \code{y.axis.custom.at} and \code{y.axis.custom.labels} arguments, respectively, listed below. This argument will not be utilized if \code{y.idlabels} is specified. } \item{y.axis.custom.at}{ user-specified vector of y-axis label locations. Must be used when \code{y.axis = "custom"}; will not be used otherwise. } \item{y.axis.custom.labels}{ user-specified vector of y-axis labels. Must be used when \code{y.axis = "custom"}; will not be used otherwise. } \item{y.julian}{ logical flag (which will only be considered if \code{y.axis == "auto"} and \code{(!is.na(y.var) & y.var.type== "d")}. If \code{FALSE} (default), will convert julian numeric dates or S-Plus dates objects into \dQuote{mm/dd/yy} format for the y-axis labels. If \code{TRUE}, dates will be printed in julian (numeric) format. } \item{y.lim.extend}{ two-dimensional vector representing the number of units that the user wants to increase \code{ylim} on bottom and top of y-axis, respectively. Default \code{c(0,0)}. This argument will not take effect if the Goldman chart is utilized. } \item{y.lab}{ single label to be used for entire y-axis. Default will be the variable name or column number of \code{y.idlabels} (if non-missing) and blank otherwise. } \item{x.axis.all}{ logical flag. If \code{TRUE} (default), lower and upper limits of x-axis will be based on all observations (rows) in matrix or data frame. If \code{FALSE}, lower and upper limits will be based only on those observations specified by \code{subset.r} (either before or after sorting depending on specification of \code{sort.by} and value of \code{sort.after.subset}). } \item{x.axis}{ character string specifying whether program will control labelling of x-axis (with argument \code{"auto"}), or if user will control labelling (with argument \code{"custom"}). If \code{"custom"} is chosen, user must specify location and text of labels using \code{x.axis.custom.at} and \code{x.axis.custom.labels} arguments, respectively, listed below. } \item{x.axis.custom.at}{ user-specified vector of x-axis label locations. Must be used when \code{x.axis == "custom"}; will not be used otherwise. } \item{x.axis.custom.labels}{ user-specified vector of x-axis labels. Must be used when \code{x.axis == "custom"}; will not be used otherwise. } \item{x.julian}{ logical flag (which will only be considered if \code{x.axis == "auto"}). If \code{FALSE} (default), will convert julian dates or S-plus dates objects into \dQuote{mm/dd/yy} format for the x-axis labels. If \code{TRUE}, dates will be printed in julian (numeric) format. NOTE: This argument should remain \code{TRUE} if \code{x.reference} is specified. } \item{x.lim.extend}{ two-dimensional vector representing the number of time units (usually in days) that the user wants to increase \code{xlim} on left-hand side and right-hand side of x-axis, respectively. Default is \code{c(0,0)}. This argument will not take effect if the Goldman chart is utilized. } \item{x.scale}{ a factor whose reciprocal is multiplied to original units of the x-axis. For example, if the original data frame is in units of days, \code{x.scale = 365} will result in units of years (notwithstanding leap years). Default is 1. } \item{x.lab}{ single label to be used for entire x-axis. Default will be \dQuote{On Study Date} if \code{x.julian = FALSE} and \dQuote{Time on Study} if \code{x.julian = TRUE}. } \item{line.by}{ column or data frame variable name for plotting unique lines by unique values of vector (e.g., specify \code{c('arm')} to plot unique lines by treatment arm). Can take at most one column or variable name. Default is \code{NA} which produces identical lines for each patient. } \item{line.lty}{ vector of line types corresponding to ascending order of \code{line.by} values. If \code{line.by} is specified, the vector should be the length of the number of unique values of \code{line.by}. If \code{line.by} is \code{NA}, only \code{line.lty[1]} will be used. The default is 1. } \item{line.lwd}{ vector of line widths corresponding to ascending order of \code{line.by} values. If \code{line.by} is specified, the vector should be the length of the number of unique values of \code{line.by}. If \code{line.by} is \code{NA}, only \code{line.lwd[1]} will be used. The default is 1. } \item{line.col}{ vector of line colors corresponding to ascending order of \code{line.by} values. If \code{line.by} is specified, the vector should be the length of the number of unique values of \code{line.by}. If \code{line.by} is \code{NA}, only \code{line.col[1]} will be used. The default is 1. } \item{line.add}{ a 2xk matrix with k=number of pairs of additional line segments to add. For example, if it is of interest to draw additional line segments connecting events one and two, two and three, and four and five, (possibly with different colors), an appropriate \code{line.add} argument would be \code{matrix(c('first.event','second.event','second.event','third.event', 'fourth.event','fifth.event'), 2, 3)}. One line segment would be drawn between \code{first.event} and \code{second.event}, a second line segment would be drawn between \code{second.event} and \code{third.event}, and a third line segment would be drawn between \code{fourth.event} and \code{fifth.event}. Different line types, widths and colors can be specified (in arguments listed just below). The convention use of \code{subset.c} and \code{line.add} must match (i.e., column name must be used for both or column number must be used for both). If \code{line.add != NA}, length of \code{line.add.lty}, \code{line.add.lwd}, and \code{line.add.col} must be the same as number of pairs of additional line segments to add. NOTE: The drawing of the original default line may be suppressed (with \code{line.col = 0}), and \code{line.add} can be used to do all the line plotting for the event chart. } \item{line.add.lty}{ a kx1 vector corresponding to the columns of \code{line.add}; specifies the line types for the k line segments. } \item{line.add.lwd}{ a kx1 vector corresponding to the columns of \code{line.add}; specifies the line widths for the k line segments. } \item{line.add.col}{ a kx1 vector corresponding to the columns of \code{line.add}; specifies the line colors for the k line segments. } \item{point.pch}{ vector of \code{pch} values for points representing each event. If similar events are listed in multiple columns (e.g., regular visits or a recurrent event), repeated \code{pch} values may be listed in the vector (e.g., \code{c(2,4,rep(183,3))}). If \code{length(point.pch) < length(subset.c)}, \code{point.pch} will be repeated until lengths are equal; a warning message will verify this condition. } \item{point.cex}{ vector of size of points representing each event. If \code{length(point.cex) < length(subset.c)}, \code{point.cex} will be repeated until lengths are equal; a warning message will verify this condition. } \item{point.col}{ vector of colors of points representing each event. If \code{length(point.col) < length(subset.c)}, \code{point.col} will be repeated until lengths are equal; a warning message will verify this condition. } \item{point.cex.mult}{ a single number (may be non-integer), which is the base multiplier for the value of the \code{cex} of the plotted points, when interest lies in a variable size allowed for certain points, as a function of the quantity of the variable(s) in the dataset specified in the \code{point.cex.mult.var} argument; multiplied by original \code{point.cex} value and then the value of interest (for an individual) from the \code{point.cex.mult.var argument}; used only when non-\code{NA} arguments are provided to \code{point.cex.mult.var}; default is 1. . } \item{point.cex.mult.var}{ vector of variables to be used in determining what point.cex.mult is multiplied by for determining size of plotted points from (possibly a subset of) \code{subset.c} variables, when interest lies in a variable size allowed for certain points, as a function of the level of some variable(s) in the dataset; default is \code{NA}. } \item{extra.points.no.mult}{ vector of variables in the dataset to ignore for purposes of using \code{point.cex.mult}; for example, for some variables there may be interest in allowing a variable size allowed for the plotting of the points, whereas other variables (e.g., dropout time), there may be no interest in such manipulation; the vector should be the same size as the number of variables specified in \code{subset.c}, with \code{NA} entries where variable point size is of interest and the variable name (or location in \code{subset.c}) specified when the variable point size is not of interest; in this latter case, the associated argument in \code{point.cex} is instead used as the point \code{cex}; used only when non-\code{NA} arguments are provided to \code{point.cex.mult.var}; default is \code{NA} } \item{legend.plot}{ logical flag; if \code{TRUE}, a legend will be plotted. Location of legend will be based on specification of legend.location along with values of other arguments listed below. Default is \code{FALSE} (i.e., no legend plotting). } \item{legend.location}{ will be used only if \code{legend.plot = TRUE}. If \code{"o"} (default), a one-page legend will precede the output of the chart. The user will need to hit \kbd{enter} in order for the event chart to be displayed. This feature is possible due to the \bold{\code{dev.ask}} option. If \code{"i"}, an internal legend will be placed in the plot region based on \code{legend.point.at}. If \code{"l"}, a legend will be placed in the plot region using the locator option. Legend will map points to events (via column names, by default) and, if \code{line.by} is specified, lines to groups (based on levels of \code{line.by}). } \item{legend.titl}{ title for the legend; default is title to be used for main plot. Only used when \code{legend.location = "o"}. } \item{legend.titl.cex}{ size of text for legend title. Only used when \code{legend.location = "o"}. } \item{legend.titl.line}{ line location of legend title dictated by \code{mtext} function with \code{outer = FALSE} option; default is 1.0. Only used when \code{legend.location = "o"}. } \item{legend.point.at}{ location of upper left and lower right corners of legend area to be utilized for describing events via points and text. } \item{legend.point.pch}{ vector of \code{pch} values for points representing each event in the legend. Default is \code{point.pch}. } \item{legend.point.text}{ text to be used for describing events; the default is setup for a data frame, as it will print the names of the columns specified by \code{subset.c}. } \item{legend.cex}{ size of text for points and event descriptions. Default is 2.5 which is setup for \code{legend.location = "o"}. A much smaller \code{cex} is recommended (possibly 0.75) for use with \code{legend.location = "i"} or \code{legend.location = "l"}. } \item{legend.bty}{ option to put a box around the legend(s); default is to have no box (\code{legend.bty = "n"}). Option \code{legend.bty = "o"} will produce a legend box. } \item{legend.line.at}{ if \code{line.by} was specified (with \code{legend.location = "o"} or \code{legend.location = "i"}), this argument will dictate the location of the upper left and lower right corners of legend area to be utilized for describing the different \code{line.by} values (e.g., \code{treatment.arm}). The default is setup for \code{legend.location = "o"}. } \item{legend.line.text}{ text to be used for describing \code{line.by} values; the default are the names of the unique non-missing \code{line.by} values as produced from the table function. } \item{legend.line.lwd}{ vector of line widths corresponding to \code{line.by} values. } \item{legend.loc.num}{ number used for locator argument when \code{legend.locator = "l"}. If 1 (default), user is to locate only the top left corner of the legend box. If 2, user is to locate both the top left corner and the lower right corner. This will be done twice when \code{line.by} is specified (once for points and once for lines). } \item{...}{ additional par arguments for use in main plot. } } \section{Side Effects}{ an event chart is created on the current graphics device. If legend.plot =TRUE and legend.location = 'o', a one-page legend will precede the event chart. Please note that par parameters on completion of function will be reset to par parameters existing prior to start of function. } \details{ if you want to put, say, two eventcharts side-by-side, in a plot region, you should not set up \code{par(mfrow=c(1,2))} before running the first plot. Instead, you should add the argument \code{mfg=c(1,1,1,2)} to the first plot call followed by the argument \code{mfg=c(1,2,1,2)} to the second plot call. if dates in original data frame are in a specialized form (eg., mm/dd/yy) of mode CHARACTER, the user must convert those columns to become class dates or julian numeric mode (see \code{\link{Date}} for more information). For example, in a data frame called \code{testdata}, with specialized dates in columns 4 thru 10, the following code could be used: \code{as.numeric(dates(testdata[,4:10]))}. This will convert the columns to numeric julian dates based on the function's default origin of January 1, 1960. If original dates are in class dates or julian form, no extra work is necessary. In the survival analysis, the data typically come in two columns: one column containing survival time and the other containing censoring indicator or event code. The \code{event.convert} function converts this type of data into multiple columns of event times, one column of each event type, suitable for the \code{event.chart} function. } \author{ J. Jack Lee and Kenneth R. Hess \cr Department of Biostatistics \cr University of Texas \cr M.D. Anderson Cancer Center \cr Houston, TX 77030 \cr \email{jjlee@mdanderson.org}, \email{khess@mdanderson.org} Joel A. Dubin \cr Department of Statistics \cr University of Waterloo \cr \email{jdubin@uwaterloo.ca} } \references{ Lee J.J., Hess, K.R., Dubin, J.A. (2000). Extensions and applications of event charts. \emph{The American Statistician,} \bold{54:1}, 63--70. Dubin, J.A., Lee, J.J., Hess, K.R. (1997). The Utility of Event Charts. \emph{Proceedings of the Biometrics Section, American} Statistical Association. Dubin, J.A., Muller H-G, Wang J-L (2001). Event history graphs for censored survival data. \emph{Statistics in Medicine,} \bold{20:} 2951--2964. Goldman, A.I. (1992). EVENTCHARTS: Visualizing Survival and Other Timed-Events Data. \emph{The American Statistician,} \bold{46:1}, 13--18. } \seealso{ \code{\link{event.history}}, \code{\link{Date}} } \examples{ # The sample data set is an augmented CDC AIDS dataset (ASCII) # which is used in the examples in the help file. This dataset is # described in Kalbfleisch and Lawless (JASA, 1989). # Here, we have included only children 4 years old and younger. # We have also added a new field, dethdate, which # represents a fictitious death date for each patient. There was # no recording of death date on the original dataset. In addition, we have # added a fictitious viral load reading (copies/ml) for each patient at time of AIDS diagnosis, # noting viral load was also not part of the original dataset. # # All dates are julian with julian=0 being # January 1, 1960, and julian=14000 being 14000 days beyond # January 1, 1960 (i.e., May 1, 1998). cdcaids <- data.frame( age=c(4,2,1,1,2,2,2,4,2,1,1,3,2,1,3,2,1,2,4,2,2,1,4,2,4,1,4,2,1,1,3,3,1,3), infedate=c( 7274,7727,7949,8037,7765,8096,8186,7520,8522,8609,8524,8213,8455,8739, 8034,8646,8886,8549,8068,8682,8612,9007,8461,8888,8096,9192,9107,9001, 9344,9155,8800,8519,9282,8673), diagdate=c( 8100,8158,8251,8343,8463,8489,8554,8644,8713,8733,8854,8855,8863,8983, 9035,9037,9132,9164,9186,9221,9224,9252,9274,9404,9405,9433,9434,9470, 9470,9472,9489,9500,9585,9649), diffdate=c( 826,431,302,306,698,393,368,1124,191,124,330,642,408,244,1001,391,246, 615,1118,539,612,245,813,516,1309,241,327,469,126,317,689,981,303,976), dethdate=c( 8434,8304,NA,8414,8715,NA,8667,9142,8731,8750,8963,9120,9005,9028,9445, 9180,9189,9406,9711,9453,9465,9289,9640,9608,10010,9488,9523,9633,9667, 9547,9755,NA,9686,10084), censdate=c( NA,NA,8321,NA,NA,8519,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, NA,NA,NA,NA,NA,NA,NA,NA,NA,10095,NA,NA), viralload=c( 13000,36000,70000,90000,21000,110000,75000,12000,125000,110000,13000,39000,79000,135000,14000, 42000,123000,20000,12000,18000,16000,140000,16000,58000,11000,120000,85000,31000,24000,115000, 17000,13100,72000,13500) ) cdcaids <- upData(cdcaids, labels=c(age ='Age, y', infedate='Date of blood transfusion', diagdate='Date of AIDS diagnosis', diffdate='Incubation period (days from HIV to AIDS)', dethdate='Fictitious date of death', censdate='Fictitious censoring date', viralload='Fictitious viral load')) # Note that the style options listed with these # examples are best suited for output to a postscript file (i.e., using # the postscript function with horizontal=TRUE) as opposed to a graphical # window (e.g., motif). # To produce simple calendar event chart (with internal legend): # postscript('example1.ps', horizontal=TRUE) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'observation dates', y.lab='patients (sorted by AIDS diagnosis date)', titl='AIDS data calendar event chart 1', point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), legend.plot=TRUE, legend.location='i', legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), legend.point.at = list(c(7210, 8100), c(35, 27)), legend.bty='o') # To produce simple interval event chart (with internal legend): # postscript('example2.ps', horizontal=TRUE) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'time since transfusion (in days)', y.lab='patients (sorted by AIDS diagnosis date)', titl='AIDS data interval event chart 1', point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), legend.plot=TRUE, legend.location='i', legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), x.reference='infedate', x.julian=TRUE, legend.bty='o', legend.point.at = list(c(1400, 1950), c(7, -1))) # To produce simple interval event chart (with internal legend), # but now with flexible diagdate symbol size based on viral load variable: # postscript('example2a.ps', horizontal=TRUE) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'time since transfusion (in days)', y.lab='patients (sorted by AIDS diagnosis date)', titl='AIDS data interval event chart 1a, with viral load at diagdate represented', point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), point.cex.mult = 0.00002, point.cex.mult.var = 'viralload', extra.points.no.mult = c(1,NA,1,1), legend.plot=TRUE, legend.location='i', legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), x.reference='infedate', x.julian=TRUE, legend.bty='o', legend.point.at = list(c(1400, 1950), c(7, -1))) # To produce more complicated interval chart which is # referenced by infection date, and sorted by age and incubation period: # postscript('example3.ps', horizontal=TRUE) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'time since diagnosis of AIDS (in days)', y.lab='patients (sorted by age and incubation length)', titl='AIDS data interval event chart 2 (sorted by age, incubation)', point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), legend.plot=TRUE, legend.location='i',legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), x.reference='diagdate', x.julian=TRUE, sort.by=c('age','diffdate'), line.by='age', line.lty=c(1,3,2,4), line.lwd=rep(1,4), line.col=rep(1,4), legend.bty='o', legend.point.at = list(c(-1350, -800), c(7, -1)), legend.line.at = list(c(-1350, -800), c(16, 8)), legend.line.text=c('age = 1', ' = 2', ' = 3', ' = 4')) # To produce the Goldman chart: # postscript('example4.ps', horizontal=TRUE) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'time since transfusion (in days)', y.lab='dates of observation', titl='AIDS data Goldman event chart 1', y.var = c('infedate'), y.var.type='d', now.line=TRUE, y.jitter=FALSE, point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), mgp = c(3.1,1.6,0), legend.plot=TRUE, legend.location='i',legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), x.reference='infedate', x.julian=TRUE, legend.bty='o', legend.point.at = list(c(1500, 2800), c(9300, 10000))) # To convert coded time-to-event data, then, draw an event chart: surv.time <- c(5,6,3,1,2) cens.ind <- c(1,0,1,1,0) surv.data <- cbind(surv.time,cens.ind) event.data <- event.convert(surv.data) event.chart(cbind(rep(0,5),event.data),x.julian=TRUE,x.reference=1) } \keyword{hplot} \keyword{survival} % Converted by Sd2Rd version 1.21. Hmisc/man/bootkm.Rd0000644000176200001440000000441713714234051013660 0ustar liggesusers\name{bootkm} \alias{bootkm} \title{ Bootstrap Kaplan-Meier Estimates } \description{ Bootstraps Kaplan-Meier estimate of the probability of survival to at least a fixed time (\code{times} variable) or the estimate of the \code{q} quantile of the survival distribution (e.g., median survival time, the default). } \usage{ bootkm(S, q=0.5, B=500, times, pr=TRUE) } \arguments{ \item{S}{ a \code{Surv} object for possibly right-censored survival time } \item{q}{ quantile of survival time, default is 0.5 for median } \item{B}{ number of bootstrap repetitions (default=500) } \item{times}{ time vector (currently only a scalar is allowed) at which to compute survival estimates. You may specify only one of \code{q} and \code{times}, and if \code{times} is specified \code{q} is ignored. } \item{pr}{ set to \code{FALSE} to suppress printing the iteration number every 10 iterations } } \value{ a vector containing \code{B} bootstrap estimates } \section{Side Effects}{ updates \code{.Random.seed}, and, if \code{pr=TRUE}, prints progress of simulations } \details{ \code{bootkm} uses Therneau's \code{survfitKM} function to efficiently compute Kaplan-Meier estimates. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \references{ Akritas MG (1986): Bootstrapping the Kaplan-Meier estimator. JASA 81:1032--1038. } \seealso{ \code{\link[survival]{survfit}}, \code{\link[survival]{Surv}}, \code{\link[rms]{Survival.cph}}, \code{\link[rms]{Quantile.cph}} } \examples{ # Compute 0.95 nonparametric confidence interval for the difference in # median survival time between females and males (two-sample problem) set.seed(1) library(survival) S <- Surv(runif(200)) # no censoring sex <- c(rep('female',100),rep('male',100)) med.female <- bootkm(S[sex=='female',], B=100) # normally B=500 med.male <- bootkm(S[sex=='male',], B=100) describe(med.female-med.male) quantile(med.female-med.male, c(.025,.975), na.rm=TRUE) # na.rm needed because some bootstrap estimates of median survival # time may be missing when a bootstrap sample did not include the # longer survival times } \keyword{survival} \keyword{nonparametric} \concept{bootstrap} Hmisc/man/cpower.Rd0000644000176200001440000001111013714234051013650 0ustar liggesusers\name{cpower} \alias{cpower} \title{ Power of Cox/log-rank Two-Sample Test } \description{ Assumes exponential distributions for both treatment groups. Uses the George-Desu method along with formulas of Schoenfeld that allow estimation of the expected number of events in the two groups. To allow for drop-ins (noncompliance to control therapy, crossover to intervention) and noncompliance of the intervention, the method of Lachin and Foulkes is used. } \usage{ cpower(tref, n, mc, r, accrual, tmin, noncomp.c=0, noncomp.i=0, alpha=0.05, nc, ni, pr=TRUE) } \arguments{ \item{tref}{ time at which mortalities estimated } \item{n}{ total sample size (both groups combined). If allocation is unequal so that there are not \code{n/2} observations in each group, you may specify the sample sizes in \code{nc} and \code{ni}. } \item{mc}{ tref-year mortality, control } \item{r}{ \% reduction in \code{mc} by intervention } \item{accrual}{ duration of accrual period } \item{tmin}{ minimum follow-up time } \item{noncomp.c}{ \% non-compliant in control group (drop-ins) } \item{noncomp.i}{ \% non-compliant in intervention group (non-adherers) } \item{alpha}{ type I error probability. A 2-tailed test is assumed. } \item{nc}{ number of subjects in control group } \item{ni}{ number of subjects in intervention group. \code{nc} and \code{ni} are specified exclusive of \code{n}. } \item{pr}{ set to \code{FALSE} to suppress printing of details }} \value{ power } \section{Side Effects}{ prints } \details{ For handling noncompliance, uses a modification of formula (5.4) of Lachin and Foulkes. Their method is based on a test for the difference in two hazard rates, whereas \code{cpower} is based on testing the difference in two log hazards. It is assumed here that the same correction factor can be approximately applied to the log hazard ratio as Lachin and Foulkes applied to the hazard difference. Note that Schoenfeld approximates the variance of the log hazard ratio by \code{4/m}, where \code{m} is the total number of events, whereas the George-Desu method uses the slightly better \code{1/m1 + 1/m2}. Power from this function will thus differ slightly from that obtained with the SAS \code{samsizc} program. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Peterson B, George SL: Controlled Clinical Trials 14:511--522; 1993. Lachin JM, Foulkes MA: Biometrics 42:507--519; 1986. Schoenfeld D: Biometrics 39:499--503; 1983. } \seealso{ \code{\link{spower}}, \code{\link{ciapower}}, \code{\link{bpower}} } \examples{ #In this example, 4 plots are drawn on one page, one plot for each #combination of noncompliance percentage. Within a plot, the #5-year mortality \% in the control group is on the x-axis, and #separate curves are drawn for several \% reductions in mortality #with the intervention. The accrual period is 1.5y, with all #patients followed at least 5y and some 6.5y. par(mfrow=c(2,2),oma=c(3,0,3,0)) morts <- seq(10,25,length=50) red <- c(10,15,20,25) for(noncomp in c(0,10,15,-1)) { if(noncomp>=0) nc.i <- nc.c <- noncomp else {nc.i <- 25; nc.c <- 15} z <- paste("Drop-in ",nc.c,"\%, Non-adherence ",nc.i,"\%",sep="") plot(0,0,xlim=range(morts),ylim=c(0,1), xlab="5-year Mortality in Control Patients (\%)", ylab="Power",type="n") title(z) cat(z,"\n") lty <- 0 for(r in red) { lty <- lty+1 power <- morts i <- 0 for(m in morts) { i <- i+1 power[i] <- cpower(5, 14000, m/100, r, 1.5, 5, nc.c, nc.i, pr=FALSE) } lines(morts, power, lty=lty) } if(noncomp==0)legend(18,.55,rev(paste(red,"\% reduction",sep="")), lty=4:1,bty="n") } mtitle("Power vs Non-Adherence for Main Comparison", ll="alpha=.05, 2-tailed, Total N=14000",cex.l=.8) # # Point sample size requirement vs. mortality reduction # Root finder (uniroot()) assumes needed sample size is between # 1000 and 40000 # nc.i <- 25; nc.c <- 15; mort <- .18 red <- seq(10,25,by=.25) samsiz <- red i <- 0 for(r in red) { i <- i+1 samsiz[i] <- uniroot(function(x) cpower(5, x, mort, r, 1.5, 5, nc.c, nc.i, pr=FALSE) - .8, c(1000,40000))$root } samsiz <- samsiz/1000 par(mfrow=c(1,1)) plot(red, samsiz, xlab='\% Reduction in 5-Year Mortality', ylab='Total Sample Size (Thousands)', type='n') lines(red, samsiz, lwd=2) title('Sample Size for Power=0.80\nDrop-in 15\%, Non-adherence 25\%') title(sub='alpha=0.05, 2-tailed', adj=0) } \keyword{htest} \keyword{survival} \concept{power} \concept{study design} Hmisc/man/rcspline.plot.Rd0000644000176200001440000001103014275452674015166 0ustar liggesusers\name{rcspline.plot} \alias{rcspline.plot} \title{ Plot Restricted Cubic Spline Function } \description{ Provides plots of the estimated restricted cubic spline function relating a single predictor to the response for a logistic or Cox model. The \code{rcspline.plot} function does not allow for interactions as do \code{\link[rms]{lrm}} and \code{\link[rms]{cph}}, but it can provide detailed output for checking spline fits. This function uses the \code{\link{rcspline.eval}}, \code{\link[rms]{lrm.fit}}, and Therneau's \code{\link{coxph.fit}} functions and plots the estimated spline regression and confidence limits, placing summary statistics on the graph. If there are no adjustment variables, \code{rcspline.plot} can also plot two alternative estimates of the regression function when \code{model="logistic"}: proportions or logit proportions on grouped data, and a nonparametric estimate. The nonparametric regression estimate is based on smoothing the binary responses and taking the logit transformation of the smoothed estimates, if desired. The smoothing uses \code{\link{supsmu}}. } \usage{ rcspline.plot(x,y,model=c("logistic", "cox", "ols"), xrange, event, nk=5, knots=NULL, show=c("xbeta","prob"), adj=NULL, xlab, ylab, ylim, plim=c(0,1), plotcl=TRUE, showknots=TRUE, add=FALSE, subset, lty=1, noprint=FALSE, m, smooth=FALSE, bass=1, main="auto", statloc) } \arguments{ \item{x}{ a numeric predictor } \item{y}{ a numeric response. For binary logistic regression, \code{y} should be either 0 or 1. } \item{model}{ \code{"logistic"} or \code{"cox"}. For \code{"cox"}, uses the \code{coxph.fit} function with \code{method="efron"} arguement set. } \item{xrange}{ range for evaluating \code{x}, default is f and \eqn{1 - f} quantiles of \code{x}, where \eqn{f = \frac{10}{\max{(n, 200)}}}{f = 10/max(n, 200)} } \item{event}{ event/censoring indicator if \code{model="cox"}. If \code{event} is present, \code{model} is assumed to be \code{"cox"} } \item{nk}{ number of knots } \item{knots}{ knot locations, default based on quantiles of \code{x} (by \code{\link{rcspline.eval}}) } \item{show}{ \code{"xbeta"} or \code{"prob"} - what is plotted on \verb{y}-axis } \item{adj}{ optional matrix of adjustment variables } \item{xlab}{ \verb{x}-axis label, default is the \dQuote{label} attribute of \code{x} } \item{ylab}{ \verb{y}-axis label, default is the \dQuote{label} attribute of \code{y} } \item{ylim}{ \verb{y}-axis limits for logit or log hazard } \item{plim}{ \verb{y}-axis limits for probability scale } \item{plotcl}{ plot confidence limits } \item{showknots}{ show knot locations with arrows } \item{add}{ add this plot to an already existing plot } \item{subset}{ subset of observations to process, e.g. \code{sex == "male"} } \item{lty}{ line type for plotting estimated spline function } \item{noprint}{ suppress printing regression coefficients and standard errors } \item{m}{ for \code{model="logistic"}, plot grouped estimates with triangles. Each group contains \code{m} ordered observations on \code{x}. } \item{smooth}{ plot nonparametric estimate if \code{model="logistic"} and \code{adj} is not specified } \item{bass}{ smoothing parameter (see \code{supsmu}) } \item{main}{ main title, default is \code{"Estimated Spline Transformation"} } \item{statloc}{ location of summary statistics. Default positioning by clicking left mouse button where upper left corner of statistics should appear. Alternative is \code{"ll"} to place below the graph on the lower left, or the actual \code{x} and \code{y} coordinates. Use \code{"none"} to suppress statistics. } } \value{ list with components (\samp{knots}, \samp{x}, \samp{xbeta}, \samp{lower}, \samp{upper}) which are respectively the knot locations, design matrix, linear predictor, and lower and upper confidence limits } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link[rms]{lrm}}, \code{\link[rms]{cph}}, \code{\link{rcspline.eval}}, \code{\link[graphics]{plot}}, \code{\link{supsmu}}, \code{\link[survival:survival-internal]{coxph.fit}}, \code{\link[rms]{lrm.fit}} } \examples{ #rcspline.plot(cad.dur, tvdlm, m=150) #rcspline.plot(log10(cad.dur+1), tvdlm, m=150) } \keyword{regression} \keyword{models} Hmisc/man/varclus.Rd0000644000176200001440000003175414244437633014062 0ustar liggesusers\name{varclus} \alias{varclus} \alias{print.varclus} \alias{plot.varclus} \alias{naclus} \alias{naplot} \alias{combine.levels} \alias{plotMultSim} \alias{na.pattern} \title{ Variable Clustering } \description{ Does a hierarchical cluster analysis on variables, using the Hoeffding D statistic, squared Pearson or Spearman correlations, or proportion of observations for which two variables are both positive as similarity measures. Variable clustering is used for assessing collinearity, redundancy, and for separating variables into clusters that can be scored as a single variable, thus resulting in data reduction. For computing any of the three similarity measures, pairwise deletion of NAs is done. The clustering is done by \code{hclust()}. A small function \code{naclus} is also provided which depicts similarities in which observations are missing for variables in a data frame. The similarity measure is the fraction of \code{NAs} in common between any two variables. The diagonals of this \code{sim} matrix are the fraction of NAs in each variable by itself. \code{naclus} also computes \code{na.per.obs}, the number of missing variables in each observation, and \code{mean.na}, a vector whose ith element is the mean number of missing variables other than variable i, for observations in which variable i is missing. The \code{naplot} function makes several plots (see the \code{which} argument). So as to not generate too many dummy variables for multi-valued character or categorical predictors, \code{varclus} will automatically combine infrequent cells of such variables using an auxiliary function \code{combine.levels} that is defined here. If all values of \code{x} are \code{NA}, \code{combine.levels} returns a numeric vector is returned that is all \code{NA}. \code{plotMultSim} plots multiple similarity matrices, with the similarity measure being on the x-axis of each subplot. \code{na.pattern} prints a frequency table of all combinations of missingness for multiple variables. If there are 3 variables, a frequency table entry labeled \code{110} corresponds to the number of observations for which the first and second variables were missing but the third variable was not missing. } \usage{ varclus(x, similarity=c("spearman","pearson","hoeffding","bothpos","ccbothpos"), type=c("data.matrix","similarity.matrix"), method="complete", data=NULL, subset=NULL, na.action=na.retain, trans=c("square", "abs", "none"), ...) \method{print}{varclus}(x, abbrev=FALSE, ...) \method{plot}{varclus}(x, ylab, abbrev=FALSE, legend.=FALSE, loc, maxlen, labels, \dots) naclus(df, method) naplot(obj, which=c('all','na per var','na per obs','mean na', 'na per var vs mean na'), \dots) combine.levels(x, minlev=.05) plotMultSim(s, x=1:dim(s)[3], slim=range(pretty(c(0,max(s,na.rm=TRUE)))), slimds=FALSE, add=FALSE, lty=par('lty'), col=par('col'), lwd=par('lwd'), vname=NULL, h=.5, w=.75, u=.05, labelx=TRUE, xspace=.35) na.pattern(x) } \arguments{ \item{x}{ a formula, a numeric matrix of predictors, or a similarity matrix. If \code{x} is a formula, \code{model.matrix} is used to convert it to a design matrix. If the formula excludes an intercept (e.g., \code{~ a + b -1}), the first categorical (\code{factor}) variable in the formula will have dummy variables generated for all levels instead of omitting one for the first level. For \code{combine.levels}, \code{x} is a character, category, or factor vector (or other vector that is converted to factor). For \code{plot} and \code{print}, \code{x} is an object created by \code{varclus}. For \code{na.pattern}, \code{x} is a data table, data frame, or matrix. For \code{plotMultSim}, is a numeric vector specifying the ordered unique values on the x-axis, corresponding to the third dimension of \code{s}. } \item{df}{a data frame} \item{s}{ an array of similarity matrices. The third dimension of this array corresponds to different computations of similarities. The first two dimensions come from a single similarity matrix. This is useful for displaying similarity matrices computed by \code{varclus}, for example. A use for this might be to show pairwise similarities of variables across time in a longitudinal study (see the example below). If \code{vname} is not given, \code{s} must have \code{dimnames}. } \item{similarity}{ the default is to use squared Spearman correlation coefficients, which will detect monotonic but nonlinear relationships. You can also specify linear correlation or Hoeffding's (1948) D statistic, which has the advantage of being sensitive to many types of dependence, including highly non-monotonic relationships. For binary data, or data to be made binary, \code{similarity="bothpos"} uses as a similarity measure the proportion of observations for which two variables are both positive. \code{similarity="ccbothpos"} uses a chance-corrected measure which is the proportion of observations for which both variables are positive minus the product of the two marginal proportions. This difference is expected to be zero under independence. For diagonals, \code{"ccbothpos"} still uses the proportion of positives for the single variable. So \code{"ccbothpos"} is not really a similarity measure, and clustering is not done. This measure is useful for plotting with \code{plotMultSim} (see the last example). } \item{type}{ if \code{x} is not a formula, it may be a data matrix or a similarity matrix. By default, it is assumed to be a data matrix. } \item{method}{ see \code{hclust}. The default, for both \code{varclus} and \code{naclus}, is \code{"compact"} (for \R it is \code{"complete"}). } \item{data}{ } \item{subset}{ } \item{na.action}{ These may be specified if \code{x} is a formula. The default \code{na.action} is \code{na.retain}, defined by \code{varclus}. This causes all observations to be kept in the model frame, with later pairwise deletion of \code{NA}s.} \item{trans}{By default, when the similarity measure is based on Pearson's or Spearman's correlation coefficients, the coefficients are squared. Specify \code{trans="abs"} to take absolute values or \code{trans="none"} to use the coefficients as they stand.} \item{...}{for \code{varclus} these are optional arguments to pass to the \code{\link{dataframeReduce}} function. Otherwise, passed to \code{plclust} (or to \code{dotchart} or \code{dotchart2} for \code{naplot}). } \item{ylab}{ y-axis label. Default is constructed on the basis of \code{similarity}. } \item{legend.}{ set to \code{TRUE} to plot a legend defining the abbreviations } \item{loc}{ a list with elements \code{x} and \code{y} defining coordinates of the upper left corner of the legend. Default is \code{locator(1)}. } \item{maxlen}{ if a legend is plotted describing abbreviations, original labels longer than \code{maxlen} characters are truncated at \code{maxlen}. } \item{labels}{ a vector of character strings containing labels corresponding to columns in the similar matrix, if the column names of that matrix are not to be used } \item{obj}{an object created by \code{naclus}} \item{which}{ defaults to \code{"all"} meaning to have \code{naplot} make 4 separate plots. To make only one of the plots, use \code{which="na per var"} (dot chart of fraction of NAs for each variable), ,\code{"na per obs"} (dot chart showing frequency distribution of number of variables having NAs in an observation), \code{"mean na"} (dot chart showing mean number of other variables missing when the indicated variable is missing), or \code{"na per var vs mean na"}, a scatterplot showing on the x-axis the fraction of NAs in the variable and on the y-axis the mean number of other variables that are NA when the indicated variable is NA. } \item{minlev}{ the minimum proportion of observations in a cell before that cell is combined with one or more cells. If more than one cell has fewer than minlev*n observations, all such cells are combined into a new cell labeled \code{"OTHER"}. Otherwise, the lowest frequency cell is combined with the next lowest frequency cell, and the level name is the combination of the two old level levels. } \item{abbrev}{ set to \code{TRUE} to abbreviate variable names for plotting or printing. Is set to \code{TRUE} automatically if \code{legend=TRUE}. } \item{slim}{ 2-vector specifying the range of similarity values for scaling the y-axes. By default this is the observed range over all of \code{s}. } \item{slimds}{set to \code{slimds} to \code{TRUE} to scale diagonals and off-diagonals separately} \item{add}{ set to \code{TRUE} to add similarities to an existing plot (usually specifying \code{lty} or \code{col}) } \item{lty}{ } \item{col}{ } \item{lwd}{ line type, color, or line thickness for \code{plotMultSim} } \item{vname}{ optional vector of variable names, in order, used in \code{s} } \item{h}{ relative height for subplot } \item{w}{ relative width for subplot } \item{u}{ relative extra height and width to leave unused inside the subplot. Also used as the space between y-axis tick mark labels and graph border. } \item{labelx}{ set to \code{FALSE} to suppress drawing of labels in the x direction } \item{xspace}{ amount of space, on a scale of 1:\code{n} where \code{n} is the number of variables, to set aside for y-axis labels } } \value{ for \code{varclus} or \code{naclus}, a list of class \code{varclus} with elements \code{call} (containing the calling statement), \code{sim} (similarity matrix), \code{n} (sample size used if \code{x} was not a correlation matrix already - \code{n} is a matrix), \code{hclust}, the object created by \code{hclust}, \code{similarity}, and \code{method}. \code{naclus} also returns the two vectors listed under description, and \code{naplot} returns an invisible vector that is the frequency table of the number of missing variables per observation. \code{plotMultSim} invisibly returns the limits of similarities used in constructing the y-axes of each subplot. For \code{similarity="ccbothpos"} the \code{hclust} object is \code{NULL}. \code{na.pattern} creates an integer vector of frequencies. } \details{ \code{options(contrasts= c("contr.treatment", "contr.poly"))} is issued temporarily by \code{varclus} to make sure that ordinary dummy variables are generated for \code{factor} variables. Pass arguments to the \code{\link{dataframeReduce}} function to remove problematic variables (especially if analyzing all variables in a data frame). } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} } \section{Side Effects}{ plots } \references{ Sarle, WS: The VARCLUS Procedure. SAS/STAT User's Guide, 4th Edition, 1990. Cary NC: SAS Institute, Inc. Hoeffding W. (1948): A non-parametric test of independence. Ann Math Stat 19:546--57. } \seealso{ \code{\link{hclust}}, \code{\link{plclust}}, \code{\link{hoeffd}}, \code{\link{rcorr}}, \code{\link{cor}}, \code{\link{model.matrix}}, \code{\link{locator}}, \code{\link{na.pattern}} } \examples{ set.seed(1) x1 <- rnorm(200) x2 <- rnorm(200) x3 <- x1 + x2 + rnorm(200) x4 <- x2 + rnorm(200) x <- cbind(x1,x2,x3,x4) v <- varclus(x, similarity="spear") # spearman is the default anyway v # invokes print.varclus print(round(v$sim,2)) plot(v) # plot(varclus(~ age + sys.bp + dias.bp + country - 1), abbrev=TRUE) # the -1 causes k dummies to be generated for k countries # plot(varclus(~ age + factor(disease.code) - 1)) # # # use varclus(~., data= fracmiss= maxlevels= minprev=) to analyze all # "useful" variables - see dataframeReduce for details about arguments df <- data.frame(a=c(1,2,3),b=c(1,2,3),c=c(1,2,NA),d=c(1,NA,3), e=c(1,NA,3),f=c(NA,NA,NA),g=c(NA,2,3),h=c(NA,NA,3)) par(mfrow=c(2,2)) for(m in c("ward","complete","median")) { plot(naclus(df, method=m)) title(m) } naplot(naclus(df)) n <- naclus(df) plot(n); naplot(n) na.pattern(df) x <- c(1, rep(2,11), rep(3,9)) combine.levels(x) x <- c(1, 2, rep(3,20)) combine.levels(x) # plotMultSim example: Plot proportion of observations # for which two variables are both positive (diagonals # show the proportion of observations for which the # one variable is positive). Chance-correct the # off-diagonals by subtracting the product of the # marginal proportions. On each subplot the x-axis # shows month (0, 4, 8, 12) and there is a separate # curve for females and males d <- data.frame(sex=sample(c('female','male'),1000,TRUE), month=sample(c(0,4,8,12),1000,TRUE), x1=sample(0:1,1000,TRUE), x2=sample(0:1,1000,TRUE), x3=sample(0:1,1000,TRUE)) s <- array(NA, c(3,3,4)) opar <- par(mar=c(0,0,4.1,0)) # waste less space for(sx in c('female','male')) { for(i in 1:4) { mon <- (i-1)*4 s[,,i] <- varclus(~x1 + x2 + x3, sim='ccbothpos', data=d, subset=d$month==mon & d$sex==sx)$sim } plotMultSim(s, c(0,4,8,12), vname=c('x1','x2','x3'), add=sx=='male', slimds=TRUE, lty=1+(sx=='male')) # slimds=TRUE causes separate scaling for diagonals and # off-diagonals } par(opar) } \keyword{cluster} \keyword{multivariate} \keyword{category} \keyword{manip} Hmisc/man/knitrSet.Rd0000644000176200001440000001225614225632047014175 0ustar liggesusers\name{knitrSet} \alias{knitrSet} \alias{plotlySave} \title{knitr Setup and plotly Service Function} \description{ \code{knitrSet} sets up knitr to use better default parameters for base graphics, better code formatting, and to allow several arguments to be passed from code chunk headers, such as \code{bty}, \code{mfrow}, \code{ps}, \code{bot} (extra bottom margin for base graphics), \code{top} (extra top margin), \code{left} (extra left margin), \code{rt} (extra right margin), \code{lwd}, \code{mgp}, \code{las}, \code{tcl}, \code{axes}, \code{xpd}, \code{h} (usually \code{fig.height} in knitr), \code{w} (usually \code{fig.width} in knitr), \code{wo} (\code{out.width} in knitr), \code{ho} (\code{out.height} in knitr), \code{cap} (character string containing figure caption), \code{scap} (character string containing short figure caption for table of figures). The \code{capfile} argument facilities auto-generating a table of figures for certain Rmarkdown report themes. This is done by the addition of a hook function that appends data to the \code{capfile} file each time a chunk runs that has a long or short caption in the chunk header. \code{plotlySave} saves a plotly graphic with name \code{foo.png} where \code{foo} is the name of the current chunk. You must have a free \code{plotly} account from \code{plot.ly} to use this function, and you must have run \code{Sys.setenv(plotly_username="your_plotly_username")} and \code{Sys.setenv(plotly_api_key="your_api_key")}. The API key can be found in one's profile settings. } \usage{ knitrSet(basename=NULL, w=if(! bd) 4, h=if(! bd) 3, wo=NULL, ho=NULL, fig.path=if(length(basename)) basename else '', fig.align=if(! bd) 'center', fig.show='hold', fig.pos=if(! bd) 'htbp', fig.lp = if(! bd) paste('fig', basename, sep=':'), dev=switch(lang, latex='pdf', markdown='png', blogdown=NULL, quarto=NULL), tidy=FALSE, error=FALSE, messages=c('messages.txt', 'console'), width=61, decinline=5, size=NULL, cache=FALSE, echo=TRUE, results='markup', capfile=NULL, lang=c('latex','markdown','blogdown','quarto')) plotlySave(x, \dots) } \arguments{ \item{basename}{base name to be added in front of graphics file names. \code{basename} is followed by a minus sign.} \item{w,h}{default figure width and height in inches} \item{wo,ho}{default figure rendering width and height, in integer pixels or percent as a character string, e.g. \code{'40\%'}} \item{fig.path}{path for figures. To put figures in a subdirectory specify e.g. \code{fig.path='folder/'}. Ignored for blogdown.} \item{fig.align,fig.show,fig.pos,fig.lp,tidy,cache,echo,results,error,size}{see knitr documentation} \item{dev}{graphics device, with default figured from \code{lang}} \item{messages}{By default warning and other messages such as those from loading packages are sent to file \code{'messages.txt'} in the current working directory. You can specify \code{messages='console'} to send them directly to the console.} \item{width}{text output width for R code and output} \item{decinline}{number of digits to the right of the decimal point to round numeric values appearing inside Sexpr} \item{capfile}{the name of a file in the current working directory that is used to accumulate chunk labels, figure cross-reference tags, and figure short captions (long captions if no short caption is defined) for the purpose of using \code{markupSpecs$markdown$tof()} to insert a table of figures in a report. The file as appended to, which is useful if \code{cache=TRUE} is used since this will keep some chunks from running. The \code{tof} function will remove earlier duplicated figure tags if this is the case. If not \code{cache}ing, the user should initialize the file to empty at the top of the script.} \item{lang}{Default is \code{'latex'} to use LaTeX. Set to \code{'markdown'} when using R Markdown or \code{'blogdown'} or \code{'quarto'}. For \code{'blogdown'} and \code{'quarto'}, \code{par} and \code{knitr} graphics-related hooks are not called as this would prevent writing graphics files in the correct directory for the blog system.} \item{x}{a \code{plotly} graphics object or a named list of such objects. The resulting \code{png} file will go in the file path given by the \code{knitr} \code{fig.path} value, and have a base name equal to the current \code{knitr} chunk name. If \code{x} is a list, a minus sign followed by the chunk name are inserted before \code{.png}.} \item{\dots}{additional arguments passed to \code{plotly::plotly_IMAGE}} } \author{Frank Harrell} \seealso{\code{\link[knitr]{knit}}} \examples{ \dontrun{ # Typical call (without # comment symbols): # <>= # require(Hmisc) # knitrSet() # @ knitrSet() # use all defaults and don't use a graphics file prefix knitrSet('modeling') # use modeling- prefix for a major section or chapter knitrSet(cache=TRUE, echo=FALSE) # global default to cache and not print code knitrSet(w=5,h=3.75) # override default figure width, height # ```{r chunkname} # p <- plotly::plot_ly(...) # plotlySave(p) # creates fig.path/chunkname.png } } \keyword{interface} Hmisc/man/combplotp.Rd0000644000176200001440000000755614112727067014402 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/combplotp.r \name{combplotp} \alias{combplotp} \title{Combination Plot} \usage{ combplotp( formula, data = NULL, subset, na.action = na.retain, vnames = c("labels", "names"), includenone = FALSE, showno = FALSE, maxcomb = NULL, minfreq = NULL, N = NULL, pos = function(x) 1 * (tolower(x) \%in\% c("true", "yes", "y", "positive", "+", "present", "1")), obsname = "subjects", ptsize = 35, width = NULL, height = NULL, ... ) } \arguments{ \item{formula}{a formula containing all the variables to be cross-tabulated, on the formula's right hand side. There is no left hand side variable. If \code{formula} is omitted, then all variables from \code{data} are analyzed.} \item{data}{input data frame. If none is specified the data are assumed to come from the parent frame.} \item{subset}{an optional subsetting expression applied to \code{data}} \item{na.action}{see \code{lm} etc.} \item{vnames}{set to \code{"names"} to use variable names to label axes instead of variable labels. When using the default \code{labels}, any variable not having a label will have its name used instead.} \item{includenone}{set to \code{TRUE} to include the combination where all conditions are absent} \item{showno}{set to \code{TRUE} to show a light dot for conditions that are not part of the currently tabulated combination} \item{maxcomb}{maximum number of combinations to display} \item{minfreq}{if specified, any combination having a frequency less than this will be omitted from the display} \item{N}{set to an integer to override the global denominator, instead of using the number of rows in the data} \item{pos}{a function of vector returning a logical vector with \code{TRUE} values indicating positive} \item{obsname}{character string noun describing observations, default is \code{"subjects"}} \item{ptsize}{point size, defaults to 35} \item{width}{width of \code{plotly} plot} \item{height}{height of \code{plotly} plot} \item{\dots}{optional arguments to pass to \code{table}} } \value{ \code{plotly} object } \description{ Generates a plotly attribute plot given a series of possibly overlapping binary variables } \details{ Similar to the \code{UpSetR} package, draws a special dot chart sometimes called an attribute plot that depicts all possible combination of the binary variables. By default a positive value, indicating that a certain condition pertains for a subject, is any of logical \code{TRUE}, numeric 1, \code{"yes"}, \code{"y"}, \code{"positive"}, \code{"+"} or \code{"present"} value, and all others are considered negative. The user can override this determination by specifying her own \code{pos} function. Case is ignored in the variable values. The plot uses solid dots arranged in a vertical line to indicate which combination of conditions is being considered. Frequencies of all possible combinations are shown above the dot chart. Marginal frequencies of positive values for the input variables are shown to the left of the dot chart. More information for all three of these component symbols is provided in hover text. Variables are sorted in descending order of marginal frqeuencies and likewise for combinations of variables. } \examples{ if (requireNamespace("plotly")) { g <- function() sample(0:1, n, prob=c(1 - p, p), replace=TRUE) set.seed(2); n <- 100; p <- 0.5 x1 <- g(); label(x1) <- 'A long label for x1 that describes it' x2 <- g() x3 <- g(); label(x3) <- 'This is
a label for x3' x4 <- g() combplotp(~ x1 + x2 + x3 + x4, showno=TRUE, includenone=TRUE) n <- 1500; p <- 0.05 pain <- g() anxiety <- g() depression <- g() soreness <- g() numbness <- g() tiredness <- g() sleepiness <- g() combplotp(~ pain + anxiety + depression + soreness + numbness + tiredness + sleepiness, showno=TRUE) } } \author{ Frank Harrell } Hmisc/man/GiniMd.Rd0000644000176200001440000000327413714234051013534 0ustar liggesusers\name{GiniMd} \alias{GiniMd} \title{Gini's Mean Difference} \description{ \code{GiniMD} computes Gini's mean difference on a numeric vector. This index is defined as the mean absolute difference between any two distinct elements of a vector. For a Bernoulli (binary) variable with proportion of ones equal to \eqn{p} and sample size \eqn{n}, Gini's mean difference is \eqn{2\frac{n}{n-1}p(1-p)}{2np(1-p)/(n-1)}. For a trinomial variable (e.g., predicted values for a 3-level categorical predictor using two dummy variables) having (predicted) values \eqn{A, B, C} with corresponding proportions \eqn{a, b, c}, Gini's mean difference is \eqn{2\frac{n}{n-1}[ab|A-B|+ac|A-C|+bc|B-C|]}{2n[ab|A-B|+ac|A-C|+bc|B-C|]/(n-1).} } \usage{ GiniMd(x, na.rm=FALSE) } \arguments{ \item{x}{a numeric vector (for \code{GiniMd})} \item{na.rm}{set to \code{TRUE} if you suspect there may be \code{NA}s in \code{x}; these will then be removed. Otherwise an error will result.} } \value{a scalar numeric} \references{ David HA (1968): Gini's mean difference rediscovered. Biometrika 55:573--575. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \examples{ set.seed(1) x <- rnorm(40) # Test GiniMd against a brute-force solution gmd <- function(x) { n <- length(x) sum(outer(x, x, function(a, b) abs(a - b))) / n / (n - 1) } GiniMd(x) gmd(x) z <- c(rep(0,17), rep(1,6)) n <- length(z) GiniMd(z) 2*mean(z)*(1-mean(z))*n/(n-1) a <- 12; b <- 13; c <- 7; n <- a + b + c A <- -.123; B <- -.707; C <- 0.523 xx <- c(rep(A, a), rep(B, b), rep(C, c)) GiniMd(xx) 2*(a*b*abs(A-B) + a*c*abs(A-C) + b*c*abs(B-C))/n/(n-1) } \keyword{robust} \keyword{univar} Hmisc/man/pairUpDiff.Rd0000644000176200001440000000700613736123153014417 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pairUpDiff.r \name{pairUpDiff} \alias{pairUpDiff} \title{pairUpDiff} \usage{ pairUpDiff( x, major = NULL, minor = NULL, group, refgroup, lower = NULL, upper = NULL, minkeep = NULL, sortdiff = TRUE, conf.int = 0.95 ) } \arguments{ \item{x}{a numeric vector} \item{major}{an optional factor or character vector} \item{minor}{an optional factor or character vector} \item{group}{a required factor or character vector with two levels} \item{refgroup}{a character string specifying which level of \code{group} is to be subtracted} \item{lower}{an optional numeric vector giving the lower \code{conf.int} confidence limit for \code{x}} \item{upper}{similar to \code{lower} but for the upper limit} \item{minkeep}{the minimum value of \code{x} required to keep the observation. An observation is kept if either \code{group} has \code{x} exceeding or equalling \code{minkeep}. Default is to keep all observations.} \item{sortdiff}{set to \code{FALSE} to avoid sorting observations by descending between-\code{group} differences} \item{conf.int}{confidence level; must have been the value used to compute \code{lower} and \code{upper} if they are provided} } \value{ a list of two objects both sorted by descending values of differences in \code{x}. The \code{X} object is a data frame that contains the original variables sorted by descending differences across \code{group} and in addition a variable \code{subscripts} denoting the subscripts of original observations with possible re-sorting and dropping depending on \code{sortdiff} and \code{minkeep}. The \code{D} data frame contains sorted differences (\code{diff}), \code{major}, \code{minor}, \code{sd} of difference, \code{lower} and \code{upper} confidence limits for the difference, \code{mid}, the midpoint of the two \code{x} values involved in the difference, \code{lowermid}, the midpoint minus 1/2 the width of the confidence interval, and \code{uppermid}, the midpoint plus 1/2 the width of the confidence interval. Another element returned is \code{dropped} which is a vector of \code{major} / \code{minor} combinations dropped due to \code{minkeep}. } \description{ Pair-up and Compute Differences } \details{ This function sets up for plotting half-width confidence intervals for differences, sorting by descending order of differences within major categories, especially for dot charts as produced by \code{\link[=dotchartpl]{dotchartpl()}}. Given a numeric vector \code{x} and a grouping (superpositioning) vector \code{group} with exactly two levels, computes differences in possibly transformed \code{x} between levels of \code{group} for the two observations that are equal on \code{major} and \code{minor}. If \code{lower} and \code{upper} are specified, using \code{conf.int} and approximate normality on the transformed scale to backsolve for the standard errors of estimates, and uses approximate normality to get confidence intervals on differences by taking the square root of the sum of squares of the two standard errors. Coordinates for plotting half-width confidence intervals are also computed. These intervals may be plotted on the same scale as \code{x}, having the property that they overlap the two \code{x} values if and only if there is no "significant" difference at the \code{conf.int} level. } \examples{ x <- c(1, 4, 7, 2, 5, 3, 6) pairUpDiff(x, c(rep('A', 4), rep('B', 3)), c('u','u','v','v','z','z','q'), c('a','b','a','b','a','b','a'), 'a', x-.1, x+.1) } \author{ Frank Harrell } Hmisc/man/num.intercepts.Rd0000644000176200001440000000157512243661443015352 0ustar liggesusers\name{num.intercepts} \alias{num.intercepts} \title{Extract number of intercepts} \description{ Extract the number of intercepts from a model } \usage{ num.intercepts(fit, type=c('fit', 'var', 'coef')) } \arguments{ \item{fit}{a model fit object} \item{type}{ the default is to return the formal number of intercepts used when fitting the model. Set \code{type='var'} to return the actual number of intercepts stored in the \code{var} object, or \code{type='coef'} to return the actual number in the fitted coefficients. The former will be less than the number fitted for \code{\link[rms]{orm}} fits, and the latter for \code{orm} fits passed through \code{\link{fit.mult.impute}} } } \value{ \code{num.intercepts} returns an integer with the number of intercepts in the model. } \seealso{ \code{\link[rms]{orm}}, \code{\link{fit.mult.impute}} } Hmisc/man/tex.Rd0000644000176200001440000000432413714234042013162 0ustar liggesusers\name{tex} \alias{tex} \title{ function for use in graphs that are used with the psfrag package in LaTeX } \description{ \code{tex} is a little function to save typing when including TeX commands in graphs that are used with the psfrag package in LaTeX to typeset any LaTeX text inside a postscript graphic. \code{tex} surrounds the input character string with \samp{\tex[options]\{\}}. This is especially useful for getting Greek letters and math symbols in postscript graphs. By default \code{tex} returns a string with \code{psfrag} commands specifying that the string be centered, not rotated, and not specially enlarged or shrunk. } \usage{ tex(string, lref='c', psref='c', scale=1, srt=0) } \arguments{ \item{string}{ a character string to be processed by \code{psfrag} in LaTeX. } \item{lref}{ LaTeX reference point for \code{string}. See the \code{psfrag} documentation referenced below. Default is \code{"c"} for centered (this is also the default for \code{psref}). } \item{psref}{ PostScript reference point. } \item{scale}{ scall factor, default is 1 } \item{srt}{ rotation for \code{string} in degrees (default is zero) } } \value{ \code{tex} returns a modified character string. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \references{ Grant MC, Carlisle (1998): The PSfrag System, Version 3. Full documentation is obtained by searching www.ctan.org for \file{pfgguide.ps}. } \seealso{ \code{\link{postscript}}, \code{\link{par}}, \code{\link{ps.options}}, \code{\link{mgp.axis.labels}}, \code{\link{pdf}}, \code{\link[lattice]{trellis.device}}, \code{\link{setTrellis}} } \examples{ \dontrun{ pdf('test.pdf') x <- seq(0,15,length=100) plot(x, dchisq(x, 5), xlab=tex('$x$'), ylab=tex('$f(x)$'), type='l') title(tex('Density Function of the $\\chi_{5}^{2}$ Distribution')) dev.off() # To process this file in LaTeX do something like #\documentclass{article} #\usepackage[scanall]{psfrag} #\begin{document} #\begin{figure} #\includegraphics{test.ps} #\caption{This is an example} #\end{figure} #\end{document} } } \keyword{hplot} \keyword{device} \concept{trellis} \concept{lattice} Hmisc/man/sys.Rd0000644000176200001440000000144412243661443013205 0ustar liggesusers\name{sys} \alias{sys} \title{ Run Unix or Dos Depending on System } \description{ Runs \code{unix} or \code{dos} depending on the current operating system. For \R, just runs \code{system} with optional concatenation of first two arguments which are assumed named \code{command} and \code{text}. } \usage{ sys(command, text=NULL, output=TRUE) # S-Plus: sys(\dots, minimized=FALSE) } \arguments{ \item{command}{ system command to execute } \item{text}{ text to concatenate to system command, if any (typically options or file names or both) } \item{output}{ set to \code{FALSE} to not return output of command as a character vector } } \value{ see \code{unix} or \code{dos} } \section{Side Effects}{ executes system commands } \seealso{ \code{\link{unix}}, \code{\link{system}} } \keyword{interface} Hmisc/man/units.Rd0000644000176200001440000000255512257361641013537 0ustar liggesusers\name{units} \alias{units} \alias{units.default} \alias{units.Surv} \alias{units<-.default} \title{ Units Attribute of a Vector } \description{ Sets or retrieves the \code{"units"} attribute of an object. For \code{units.default} replaces the builtin version, which only works for time series objects. If the variable is also given a \code{label}, subsetting (using \code{[.labelled}) will retain the \code{"units"} attribute. For a \code{Surv} object, \code{units} first looks for an overall \code{"units"} attribute, then it looks for \code{units} for the \code{time2} variable then for \code{time1}. } \usage{ units(x, \dots) \method{units}{default}(x, none='', \dots) \method{units}{Surv}(x, none='', \dots) \method{units}{default}(x) <- value } \arguments{ \item{x}{any object} \item{\dots}{ignored} \item{value}{the units of the object, or ""} \item{none}{value to which to set result if no appropriate attribute is found} } \value{ the units attribute of x, if any; otherwise, the \code{units} attribute of the \code{tspar} attribute of \code{x} if any; otherwise the value \code{none}. Handling for \code{Surv} objects is different (see above). } \seealso{\code{\link{label}}} \examples{ fail.time <- c(10,20) units(fail.time) <- "Day" describe(fail.time) S <- Surv(fail.time) units(S) label(fail.time) <- 'Failure Time' fail.time } \keyword{utilities} \keyword{interface} Hmisc/man/Overview.Rd0000644000176200001440000004640413714232715014202 0ustar liggesusers\name{HmiscOverview} \alias{HmiscOverview} \alias{Hmisc.Overview} \title{ Overview of Hmisc Library } \description{ The Hmisc library contains many functions useful for data analysis, high-level graphics, utility operations, functions for computing sample size and power, translating SAS datasets into \R, imputing missing values, advanced table making, variable clustering, character string manipulation, conversion of \R objects to LaTeX code, recoding variables, and bootstrap repeated measures analysis. Most of these functions were written by F Harrell, but a few were collected from statlib and from s-news; other authors are indicated below. This collection of functions includes all of Harrell's submissions to statlib other than the functions in the \pkg{rms} and display libraries. A few of the functions do not have \dQuote{Help} documentation. To make \pkg{Hmisc} load silently, issue \code{options(Hverbose=FALSE)} before \code{library(Hmisc)}. } \section{Functions}{ \tabular{ll}{ \bold{Function Name} \tab \bold{Purpose} \cr abs.error.pred \tab Computes various indexes of predictive accuracy based\cr \tab on absolute errors, for linear models\cr addMarginal \tab Add marginal observations over selected variables\cr all.is.numeric \tab Check if character strings are legal numerics\cr approxExtrap \tab Linear extrapolation\cr aregImpute \tab Multiple imputation based on additive regression,\cr \tab bootstrapping, and predictive mean matching\cr areg.boot \tab Nonparametrically estimate transformations for both\cr \tab sides of a multiple additive regression, and\cr \tab bootstrap these estimates and \eqn{R^2}\cr ballocation \tab Optimum sample allocations in 2-sample proportion test\cr binconf \tab Exact confidence limits for a proportion and more accurate\cr \tab (narrower!) score stat.-based Wilson interval\cr \tab (Rollin Brant, mod. FEH)\cr bootkm \tab Bootstrap Kaplan-Meier survival or quantile estimates\cr bpower \tab Approximate power of 2-sided test for 2 proportions\cr \tab Includes bpower.sim for exact power by simulation\cr bpplot \tab Box-Percentile plot \cr \tab (Jeffrey Banfield, \email{umsfjban@bill.oscs.montana.edu})\cr bpplotM \tab Chart extended box plots for multiple variables\cr bsamsize \tab Sample size requirements for test of 2 proportions\cr bystats \tab Statistics on a single variable by levels of >=1 factors\cr bystats2 \tab 2-way statistics\cr character.table \tab Shows numeric equivalents of all latin characters\cr \tab Useful for putting many special chars. in graph titles\cr \tab (Pierre Joyet, \email{pierre.joyet@bluewin.ch})\cr ciapower \tab Power of Cox interaction test\cr cleanup.import \tab More compactly store variables in a data frame, and clean up\cr \tab problem data when e.g. Excel spreadsheet had a non-\cr \tab numeric value in a numeric column\cr combine.levels \tab Combine infrequent levels of a categorical variable\cr confbar \tab Draws confidence bars on an existing plot using multiple\cr \tab confidence levels distinguished using color or gray scale\cr contents \tab Print the contents (variables, labels, etc.) of a data frame\cr cpower \tab Power of Cox 2-sample test allowing for noncompliance\cr Cs \tab Vector of character strings from list of unquoted names\cr csv.get \tab Enhanced importing of comma separated files labels\cr cut2 \tab Like cut with better endpoint label construction and allows\cr \tab construction of quantile groups or groups with given n\cr datadensity \tab Snapshot graph of distributions of all variables in\cr \tab a data frame. For continuous variables uses scat1d.\cr dataRep \tab Quantify representation of new observations in a database\cr ddmmmyy \tab SAS \dQuote{date7} output format for a chron object\cr deff \tab Kish design effect and intra-cluster correlation\cr describe \tab Function to describe different classes of objects.\cr \tab Invoke by saying describe(object). It calls one of the\cr \tab following:\cr describe.data.frame \tab Describe all variables in a data frame (generalization\cr \tab of SAS UNIVARIATE)\cr describe.default \tab Describe a variable (generalization of SAS UNIVARIATE)\cr dotplot3 \tab A more flexible version of dotplot\cr Dotplot \tab Enhancement of Trellis dotplot allowing for matrix\cr \tab x-var., auto generation of Key function, superposition\cr drawPlot \tab Simple mouse-driven drawing program, including a function\cr \tab for fitting Bezier curves\cr Ecdf \tab Empirical cumulative distribution function plot\cr errbar \tab Plot with error bars (Charles Geyer, U. Chi., mod FEH)\cr event.chart \tab Plot general event charts (Jack Lee, \email{jjlee@mdanderson.org}, \cr \tab Ken Hess, Joel Dubin; Am Statistician 54:63-70,2000)\cr event.history \tab Event history chart with time-dependent cov. status\cr \tab (Joel Dubin, \email{jdubin@uwaterloo.ca})\cr find.matches \tab Find matches (with tolerances) between columns of 2 matrices\cr first.word \tab Find the first word in an \R expression (R Heiberger)\cr fit.mult.impute \tab Fit most regression models over multiple transcan imputations,\cr \tab compute imputation-adjusted variances and avg. betas\cr format.df \tab Format a matrix or data frame with much user control\cr \tab (R Heiberger and FE Harrell)\cr ftupwr \tab Power of 2-sample binomial test using Fleiss, Tytun, Ury\cr ftuss \tab Sample size for 2-sample binomial test using " " " "\cr \tab (Both by Dan Heitjan, \email{dheitjan@biostats.hmc.psu.edu})\cr gbayes \tab Bayesian posterior and predictive distributions when both\cr \tab the prior and the likelihood are Gaussian\cr getHdata \tab Fetch and list datasets on our web site\cr hdquantile \tab Harrell-Davis nonparametric quantile estimator with s.e.\cr histbackback \tab Back-to-back histograms (Pat Burns, Salomon Smith\cr \tab Barney, London, \email{pburns@dorado.sbi.com})\cr hist.data.frame \tab Matrix of histograms for all numeric vars. in data frame\cr \tab Use hist.data.frame(data.frame.name)\cr histSpike \tab Add high-resolution spike histograms or density estimates\cr \tab to an existing plot\cr hoeffd \tab Hoeffding's D test (omnibus test of independence of X and Y)\cr impute \tab Impute missing data (generic method)\cr interaction \tab More flexible version of builtin function\cr is.present \tab Tests for non-blank character values or non-NA numeric values\cr james.stein \tab James-Stein shrinkage estimates of cell means from raw data\cr labcurve \tab Optimally label a set of curves that have been drawn on\cr \tab an existing plot, on the basis of gaps between curves.\cr \tab Also position legends automatically at emptiest rectangle.\cr label \tab Set or fetch a label for an \R-object\cr Lag \tab Lag a vector, padding on the left with NA or ''\cr latex \tab Convert an \R object to LaTeX (R Heiberger & FE Harrell)\cr list.tree \tab Pretty-print the structure of any data object\cr \tab (Alan Zaslavsky, \email{zaslavsk@hcp.med.harvard.edu})\cr Load \tab Enhancement of \code{load}\cr mask \tab 8-bit logical representation of a short integer value\cr \tab (Rick Becker)\cr matchCases \tab Match each case on one continuous variable\cr matxv \tab Fast matrix * vector, handling intercept(s) and NAs\cr mgp.axis \tab Version of axis() that uses appropriate mgp from \cr \tab mgp.axis.labels and gets around bug in axis(2, ...)\cr \tab that causes it to assume las=1\cr mgp.axis.labels \tab Used by survplot and plot in \pkg{rms} library (and other\cr \tab functions in the future) so that different spacing\cr \tab between tick marks and axis tick mark labels may be\cr \tab specified for x- and y-axes. \cr \tab Use mgp.axis.labels('default') to set defaults.\cr \tab Users can set values manually using \cr \tab mgp.axis.labels(x,y) where x and y are 2nd value of\cr \tab par('mgp') to use. Use mgp.axis.labels(type=w) to\cr \tab retrieve values, where w='x', 'y', 'x and y', 'xy',\cr \tab to get 3 mgp values (first 3 types) or 2 mgp.axis.labels.\cr minor.tick \tab Add minor tick marks to an existing plot\cr mtitle \tab Add outer titles and subtitles to a multiple plot layout\cr multLines \tab Draw multiple vertical lines at each x\cr \tab in a line plot\cr \%nin\% \tab Opposite of \%in\%\cr nobsY \tab Compute no. non-NA observations for left hand formula side\cr nomiss \tab Return a matrix after excluding any row with an NA\cr panel.bpplot \tab Panel function for trellis bwplot - box-percentile plots\cr panel.plsmo \tab Panel function for trellis xyplot - uses plsmo\cr pBlock \tab Block variables for certain lattice charts\cr pc1 \tab Compute first prin. component and get coefficients on\cr\tab original scale of variables\cr plotCorrPrecision \tab Plot precision of estimate of correlation coefficient\cr plsmo \tab Plot smoothed x vs. y with labeling and exclusion of NAs\cr \tab Also allows a grouping variable and plots unsmoothed data\cr popower \tab Power and sample size calculations for ordinal responses\cr \tab (two treatments, proportional odds model)\cr prn \tab prn(expression) does print(expression) but titles the\cr \tab output with 'expression'. Do prn(expression,txt) to add\cr \tab a heading (\sQuote{txt}) before the \sQuote{expression} title\cr pstamp \tab Stamp a plot with date in lower right corner (pstamp())\cr \tab Add ,pwd=T and/or ,time=T to add current directory \cr \tab name or time\cr \tab Put additional text for label as first argument, e.g.\cr \tab pstamp('Figure 1') will draw 'Figure 1 date'\cr putKey \tab Different way to use key()\cr putKeyEmpty \tab Put key at most empty part of existing plot\cr rcorr \tab Pearson or Spearman correlation matrix with pairwise deletion\cr \tab of missing data\cr rcorr.cens \tab Somers' Dxy rank correlation with censored data\cr rcorrp.cens \tab Assess difference in concordance for paired predictors\cr rcspline.eval \tab Evaluate restricted cubic spline design matrix\cr rcspline.plot \tab Plot spline fit with nonparametric smooth and grouped estimates\cr rcspline.restate \tab Restate restricted cubic spline in unrestricted form, and\cr \tab create TeX expression to print the fitted function\cr reShape \tab Reshape a matrix into 3 vectors, reshape serial data\cr rm.boot \tab Bootstrap spline fit to repeated measurements model,\cr \tab with simultaneous confidence region - least\cr \tab squares using spline function in time\cr rMultinom \tab Generate multinomial random variables with varying prob.\cr samplesize.bin \tab Sample size for 2-sample binomial problem\cr \tab (Rick Chappell, \email{chappell@stat.wisc.edu})\cr sas.get \tab Convert SAS dataset to S data frame\cr sasxport.get \tab Enhanced importing of SAS transport dataset in R\cr Save \tab Enhancement of \code{save}\cr scat1d \tab Add 1-dimensional scatterplot to an axis of an existing plot\cr \tab (like bar-codes, FEH/Martin Maechler, \cr \tab \email{maechler@stat.math.ethz.ch}/Jens Oehlschlaegel-Akiyoshi,\cr \tab \email{oehl@psyres-stuttgart.de})\cr score.binary \tab Construct a score from a series of binary variables or\cr \tab expressions\cr sedit \tab A set of character handling functions written entirely\cr \tab in \R. sedit() does much of what the UNIX sed\cr \tab program does. Other functions included are\cr \tab substring.location, substring<-, replace.string.wild,\cr \tab and functions to check if a string is numeric or\cr \tab contains only the digits 0-9\cr setTrellis \tab Set Trellis graphics to use blank conditioning panel strips,\cr \tab line thickness 1 for dot plot reference lines: \cr \tab setTrellis(); 3 optional arguments\cr show.col \tab Show colors corresponding to col=0,1,...,99\cr show.pch \tab Show all plotting characters specified by pch=.\cr \tab Just type show.pch() to draw the table on the \cr \tab current device. \cr showPsfrag \tab Use LaTeX to compile, and dvips and ghostview to\cr \tab display a postscript graphic containing psfrag strings\cr solvet \tab Version of solve with argument tol passed to qr\cr somers2 \tab Somers' rank correlation and c-index for binary y\cr spearman \tab Spearman rank correlation coefficient spearman(x,y)\cr spearman.test \tab Spearman 1 d.f. and 2 d.f. rank correlation test\cr spearman2 \tab Spearman multiple d.f. \eqn{\rho^2}{rho^2}, adjusted \eqn{\rho^2}{rho^2}, Wilcoxon-Kruskal-\cr \tab Wallis test, for multiple predictors\cr spower \tab Simulate power of 2-sample test for survival under\cr \tab complex conditions\cr \tab Also contains the Gompertz2,Weibull2,Lognorm2 functions.\cr spss.get \tab Enhanced importing of SPSS files using read.spss function \cr src \tab src(name) = source("name.s") with memory\cr store \tab store an object permanently (easy interface to assign function)\cr strmatch \tab Shortest unique identifier match \cr \tab (Terry Therneau, \email{therneau@mayo.edu})\cr subset \tab More easily subset a data frame\cr substi \tab Substitute one var for another when observations NA\cr summarize \tab Generate a data frame containing stratified summary\cr \tab statistics. Useful for passing to trellis.\cr summary.formula \tab General table making and plotting functions for summarizing\cr \tab data\cr summaryD \tab Summarizing using user-provided formula and dotchart3\cr summaryM \tab Replacement for summary.formula(..., method='reverse')\cr summaryP \tab Multi-panel dot chart for summarizing proportions\cr summaryS \tab Summarize multiple response variables for multi-panel\cr \tab dot chart or scatterplot\cr summaryRc \tab Summary for continuous variables using lowess\cr symbol.freq \tab X-Y Frequency plot with circles' area prop. to frequency\cr sys \tab Execute unix() or dos() depending on what's running\cr tabulr \tab Front-end to tabular function in the tables package\cr tex \tab Enclose a string with the correct syntax for using\cr \tab with the LaTeX psfrag package, for postscript graphics\cr transace \tab ace() packaged for easily automatically transforming all\cr \tab variables in a matrix\cr transcan \tab automatic transformation and imputation of NAs for a\cr \tab series of predictor variables\cr trap.rule \tab Area under curve defined by arbitrary x and y vectors,\cr \tab using trapezoidal rule\cr trellis.strip.blank \tab To make the strip titles in trellis more visible, you can \cr \tab make the backgrounds blank by saying trellis.strip.blank().\cr \tab Use before opening the graphics device.\cr t.test.cluster \tab 2-sample t-test for cluster-randomized observations\cr uncbind \tab Form individual variables from a matrix\cr upData \tab Update a data frame (change names, labels, remove vars, etc.)\cr units \tab Set or fetch "units" attribute - units of measurement for var.\cr varclus \tab Graph hierarchical clustering of variables using squared\cr \tab Pearson or Spearman correlations or Hoeffding D as similarities\cr \tab Also includes the naclus function for examining similarities in\cr \tab patterns of missing values across variables.\cr wtd.mean \tab \cr wtd.var \tab \cr wtd.quantile \tab \cr wtd.Ecdf \tab \cr wtd.table \tab \cr wtd.rank \tab \cr wtd.loess.noiter \tab \cr num.denom.setup \tab Set of function for obtaining weighted estimates\cr xy.group \tab Compute mean x vs. function of y by groups of x\cr xYplot \tab Like trellis xyplot but supports error bars and multiple\cr \tab response variables that are connected as separate lines\cr ynbind \tab Combine a series of yes/no true/false present/absent variables into a matrix\cr zoom \tab Zoom in on any graphical display\cr \tab (Bill Dunlap, \email{bill@statsci.com}) }} \references{ See Alzola CF, Harrell FE (2004): An Introduction to S and the Hmisc and Design Libraries at \url{https://hbiostat.org/R/doc/sintro.pdf} for extensive documentation and examples for the Hmisc package. } \section{Copyright Notice}{ \bold{GENERAL DISCLAIMER}\cr This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version.\cr This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.\cr In short: You may use it any way you like, as long as you don't charge money for it, remove this notice, or hold anyone liable for its results. Also, please acknowledge the source and communicate changes to the author.\cr If this software is used is work presented for publication, kindly reference it using for example:\cr Harrell FE (2014): Hmisc: A package of miscellaneous R functions. Programs available from \url{https://hbiostat.org/R/Hmisc/}.\cr Be sure to reference \R itself and other libraries used. } \author{ Frank E Harrell Jr\cr Professor of Biostatistics\cr Vanderbilt University School of Medicine\cr Nashville, Tennessee\cr \email{fh@fharrell.com} } \keyword{misc} \concept{overview} Hmisc/man/escapeRegex.Rd0000644000176200001440000000260712243661443014624 0ustar liggesusers\name{escapeRegex} \alias{escapeRegex} \alias{escapeBS} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Escapes any characters that would have special meaning in a reqular expression. } \description{ Escapes any characters that would have special meaning in a reqular expression. } \usage{ escapeRegex(string) escapeBS(string) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{string}{ string being operated on. } } \details{ \code{escapeRegex} will escape any characters that would have special meaning in a reqular expression. For any string \code{grep(regexpEscape(string), string)} will always be true. \code{escapeBS} will escape any backslash \samp{\\} in a string. } \value{ The value of the string with any characters that would have special meaning in a reqular expression escaped. } \author{ Charles Dupont\cr Department of Biostatistics\cr Vanderbilt University } \seealso{\code{\link[base]{grep}} } \examples{ string <- "this\\\\(system) {is} [full]." escapeRegex(string) escapeBS(string) \dontshow{ if(!any(grep(escapeRegex(string), string))) { stop("function escapeRegex failed test") } if(escapeBS(string) != "this\\\\\\\\(system) {is} [full].") { stop("function escapeBS failed test") } } } \keyword{ manip }% at least one, from doc/KEYWORDS \keyword{ character }% __ONLY ONE__ keyword per line \keyword{ programming } Hmisc/man/histboxp.Rd0000644000176200001440000001030513672163323014224 0ustar liggesusers\name{histboxp} \alias{histboxp} \alias{histboxpM} \alias{dhistboxp} \title{Use plotly to Draw Stratified Spike Histogram and Box Plot Statistics} \description{ Uses \code{plotly} to draw horizontal spike histograms stratified by \code{group}, plus the mean (solid dot) and vertical bars for these quantiles: 0.05 (red, short), 0.25 (blue, medium), 0.50 (black, long), 0.75 (blue, medium), and 0.95 (red, short). The robust dispersion measure Gini's mean difference and the SD may optionally be added. These are shown as horizontal lines starting at the minimum value of \code{x} having a length equal to the mean difference or SD. Even when Gini's and SD are computed, they are not drawn unless the user clicks on their legend entry. Spike histograms have the advantage of effectively showing the raw data for both small and huge datasets, and unlike box plots allow multi-modality to be easily seen. \code{histboxpM} plots multiple histograms stacked vertically, for variables in a data frame having a common \code{group} variable (if any) and combined using \code{plotly::subplot}. \code{dhistboxp} is like \code{histboxp} but no \code{plotly} graphics are actually drawn. Instead, a data frame suitable for use with \code{plotlyM} is returned. For \code{dhistboxp} an additional level of stratification \code{strata} is implemented. \code{group} causes a different result here to produce back-to-back histograms (in the case of two groups) for each level of \code{strata}. } \usage{ histboxp(p = plotly::plot_ly(height=height), x, group = NULL, xlab=NULL, gmd=TRUE, sd=FALSE, bins = 100, wmax=190, mult=7, connect=TRUE, showlegend=TRUE) dhistboxp(x, group = NULL, strata=NULL, xlab=NULL, gmd=FALSE, sd=FALSE, bins = 100, nmin=5, ff1=1, ff2=1) histboxpM(p=plotly::plot_ly(height=height, width=width), x, group=NULL, gmd=TRUE, sd=FALSE, width=NULL, nrows=NULL, ncols=NULL, ...) } \arguments{ \item{p}{\code{plotly} graphics object if already begun} \item{x}{a numeric vector, or for \code{histboxpM} a numeric vector or a data frame of numeric vectors, hopefully with \code{label} and \code{units} attributes} \item{group}{a discrete grouping variable. If omitted, defaults to a vector of ones} \item{strata}{a discrete numeric stratification variable. Values are also used to space out different spike histograms. Defaults to a vector of ones.} \item{xlab}{x-axis label, defaults to labelled version include units of measurement if any} \item{gmd}{set to \code{FALSE} to not compute Gini's mean difference} \item{sd}{set to \code{TRUE} to compute the SD} \item{width}{width in pixels} \item{nrows}{number of rows for layout of multiple plots} \item{ncols}{number of columns for layout of multiple plots. At most one of \code{nrows,ncols} should be specified.} \item{bins}{number of equal-width bins to use for spike histogram. If the number of distinct values of \code{x} is less than \code{bins}, the actual values of \code{x} are used.} \item{nmin}{minimum number of non-missing observations for a group-stratum combination before the spike histogram and quantiles are drawn} \item{ff1,ff2}{fudge factors for position and bar length for spike histograms} \item{wmax,mult}{tweaks for margin to allocate} \item{connect}{set to \code{FALSE} to suppress lines connecting quantiles} \item{showlegend}{used if producing multiple plots to be combined with \code{subplot}; set to \code{FALSE} for all but one plot} \item{\dots}{other arguments for \code{histboxpM} that are passed to \code{histboxp}} } \value{a \code{plotly} object. For \code{dhistboxp} a data frame as expected by \code{plotlyM}} \author{Frank Harrell} \seealso{\code{\link{histSpike}}, \code{\link{plot.describe}}, \code{\link{scat1d}}} \examples{ \dontrun{ dist <- c(rep(1, 500), rep(2, 250), rep(3, 600)) Distribution <- factor(dist, 1 : 3, c('Unimodal', 'Bimodal', 'Trimodal')) x <- c(rnorm(500, 6, 1), rnorm(200, 3, .7), rnorm(50, 7, .4), rnorm(200, 2, .7), rnorm(300, 5.5, .4), rnorm(100, 8, .4)) histboxp(x=x, group=Distribution, sd=TRUE) X <- data.frame(x, x2=runif(length(x))) histboxpM(x=X, group=Distribution, ncols=2) # separate plots } } \keyword{hplot} Hmisc/man/latexTabular.Rd0000644000176200001440000000435313714234051015014 0ustar liggesusers\name{latexTabular} \alias{latexTabular} \title{Convert a Data Frame or Matrix to a LaTeX Tabular} \description{ \code{latexTabular} creates a character vector representing a matrix or data frame in a simple \samp{tabular} environment. } \usage{ latexTabular(x, headings=colnames(x), align =paste(rep('c',ncol(x)),collapse=''), halign=paste(rep('c',ncol(x)),collapse=''), helvetica=TRUE, translate=TRUE, hline=0, center=FALSE, \dots) } \arguments{ \item{x}{a matrix or data frame, or a vector that is automatically converted to a matrix} \item{headings}{a vector of character strings specifying column headings for \samp{latexTabular}, defaulting to \code{x}'s \code{colnames}. To make multi-line headers use the newline character inside elements of \code{headings}.} \item{align}{a character strings specifying column alignments for \samp{latexTabular}, defaulting to \code{paste(rep('c',ncol(x)),collapse='')} to center. You may specify \code{align='c|c'} and other LaTeX tabular formatting.} \item{halign}{a character strings specifying alignment for column headings, defaulting to centered.} \item{helvetica}{set to \code{FALSE} to use default LaTeX font in \samp{latexTabular} instead of helvetica.} \item{translate}{set to \code{FALSE} if column headings and table entries are already in LaTeX format, otherwise \code{latexTabular} will run them through \code{latexTranslate}} \item{hline}{set to 1 to put \code{hline} after heading, 2 to also put \code{hline}s before and after heading and at table end} \item{center}{set to \code{TRUE} to enclose the tabular in a LaTeX \code{center} environment} \item{\dots}{if present, \code{x} is run through \code{format.df} with those extra arguments} } \value{a character string containing LaTeX markup} \author{ Frank E. Harrell, Jr.,\cr Department of Biostatistics,\cr Vanderbilt University,\cr \email{fh@fharrell.com} } \seealso{ \code{\link{latex.default}}, \code{\link{format.df}} } \examples{ x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','this that'))) latexTabular(x) # a character string with LaTeX markup } \keyword{utilities} \keyword{interface} \keyword{methods} \keyword{file} \keyword{character} \keyword{manip} Hmisc/man/dataRep.Rd0000644000176200001440000001351313714234051013742 0ustar liggesusers\name{dataRep} \alias{dataRep} \alias{print.dataRep} \alias{predict.dataRep} \alias{print.predict.dataRep} \alias{roundN} \alias{[.roundN} \title{ Representativeness of Observations in a Data Set } \description{ These functions are intended to be used to describe how well a given set of new observations (e.g., new subjects) were represented in a dataset used to develop a predictive model. The \code{dataRep} function forms a data frame that contains all the unique combinations of variable values that existed in a given set of variable values. Cross--classifications of values are created using exact values of variables, so for continuous numeric variables it is often necessary to round them to the nearest \code{v} and to possibly curtail the values to some lower and upper limit before rounding. Here \code{v} denotes a numeric constant specifying the matching tolerance that will be used. \code{dataRep} also stores marginal distribution summaries for all the variables. For numeric variables, all 101 percentiles are stored, and for all variables, the frequency distributions are also stored (frequencies are computed after any rounding and curtailment of numeric variables). For the purposes of rounding and curtailing, the \code{roundN} function is provided. A \code{print} method will summarize the calculations made by \code{dataRep}, and if \code{long=TRUE} all unique combinations of values and their frequencies in the original dataset are printed. The \code{predict} method for \code{dataRep} takes a new data frame having variables named the same as the original ones (but whose factor levels are not necessarily in the same order) and examines the collapsed cross-classifications created by \code{dataRep} to find how many observations were similar to each of the new observations after any rounding or curtailment of limits is done. \code{predict} also does some calculations to describe how the variable values of the new observations "stack up" against the marginal distributions of the original data. For categorical variables, the percent of observations having a given variable with the value of the new observation (after rounding for variables that were through \code{roundN} in the formula given to \code{dataRep}) is computed. For numeric variables, the percentile of the original distribution in which the current value falls will be computed. For this purpose, the data are not rounded because the 101 original percentiles were retained; linear interpolation is used to estimate percentiles for values between two tabulated percentiles. The lowest marginal frequency of matching values across all variables is also computed. For example, if an age, sex combination matches 10 subjects in the original dataset but the age value matches 100 ages (after rounding) and the sex value matches the sex code of 300 observations, the lowest marginal frequency is 100, which is a "best case" upper limit for multivariable matching. I.e., matching on all variables has to result on a lower frequency than this amount. A \code{print} method for the output of \code{predict.dataRep} prints all calculations done by \code{predict} by default. Calculations can be selectively suppressed. } \usage{ dataRep(formula, data, subset, na.action) roundN(x, tol=1, clip=NULL) \method{print}{dataRep}(x, long=FALSE, \dots) \method{predict}{dataRep}(object, newdata, \dots) \method{print}{predict.dataRep}(x, prdata=TRUE, prpct=TRUE, \dots) } \arguments{ \item{formula}{ a formula with no left-hand-side. Continuous numeric variables in need of rounding should appear in the formula as e.g. \code{roundN(x,5)} to have a tolerance of e.g. +/- 2.5 in matching. Factor or character variables as well as numeric ones not passed through \code{roundN} are matched on exactly. } \item{x}{ a numeric vector or an object created by \code{dataRep} } \item{object}{ the object created by \code{dataRep} or \code{predict.dataRep} } \item{data, subset, na.action}{ standard modeling arguments. Default \code{na.action} is \code{na.delete}, i.e., observations in the original dataset having any variables missing are deleted up front. } \item{tol}{ rounding constant (tolerance is actually \code{tol/2} as values are rounded to the nearest \code{tol}) } \item{clip}{ a 2-vector specifying a lower and upper limit to curtail values of \code{x} before rounding } \item{long}{ set to \code{TRUE} to see all unique combinations and frequency count } \item{newdata}{ a data frame containing all the variables given to \code{dataRep} but not necessarily in the same order or having factor levels in the same order } \item{prdata}{ set to \code{FALSE} to suppress printing \code{newdata} and the count of matching observations (plus the worst-case marginal frequency). } \item{prpct}{set to \code{FALSE} to not print percentiles and percents} \item{\dots}{unused} } \value{ \code{dataRep} returns a list of class \code{"dataRep"} containing the collapsed data frame and frequency counts along with marginal distribution information. \code{predict} returns an object of class \code{"predict.dataRep"} containing information determined by matching observations in \code{newdata} with the original (collapsed) data. } \section{Side Effects}{ \code{print.dataRep} prints. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{ \code{\link{round}}, \code{\link{table}} } \examples{ set.seed(13) num.symptoms <- sample(1:4, 1000,TRUE) sex <- factor(sample(c('female','male'), 1000,TRUE)) x <- runif(1000) x[1] <- NA table(num.symptoms, sex, .25*round(x/.25)) d <- dataRep(~ num.symptoms + sex + roundN(x,.25)) print(d, long=TRUE) predict(d, data.frame(num.symptoms=1:3, sex=c('male','male','female'), x=c(.03,.5,1.5))) } \keyword{datasets} \keyword{category} \keyword{cluster} \keyword{manip} \keyword{models} % Converted by Sd2Rd version 1.21. Hmisc/man/trunc.POSIXt.Rd0000644000176200001440000000337013215524663014611 0ustar liggesusers\name{trunc.POSIXt} \alias{truncPOSIXt} \alias{ceil.POSIXt} \alias{ceil} \alias{ceil.default} \alias{roundPOSIXt} \title{Return the floor, ceiling, or rounded value of date or time to specified unit.} \description{ \code{truncPOSIXt} returns the date truncated to the specified unit. \code{ceil.POSIXt} returns next ceiling of the date at the unit selected in \code{units}. \code{roundPOSIXt} returns the date or time value rounded to nearest specified unit selected in \code{digits}. \code{truncPOSIXt} and \code{roundPOSIXt} have been extended from the \code{base} package functions \code{trunc.POSIXt} and \code{round.POSIXt} which in the future will add the other time units we need. } \usage{ ceil(x, units,\dots) \method{ceil}{default}(x, units, \dots) truncPOSIXt(x, units = c("secs", "mins", "hours", "days", "months", "years"), \dots) \method{ceil}{POSIXt}(x, units = c("secs", "mins", "hours", "days", "months", "years"), \dots) roundPOSIXt(x, digits = c("secs", "mins", "hours", "days", "months", "years")) } \arguments{ \item{x}{ date to be ceilinged, truncated, or rounded } \item{units}{ unit to that is is rounded up or down to. } \item{digits}{ same as \code{units} but different name to be compatible with \code{\link{round}} generic. } \item{\dots}{further arguments to be passed to or from other methods.} } \value{ An object of class \code{POSIXlt}. } \author{ Charles Dupont } \seealso{ \code{\link{Date}} \code{\link{POSIXt}} \code{\link{POSIXlt}} \code{\link{DateTimeClasses}}} \examples{ date <- ISOdate(1832, 7, 12) ceil(date, units='months') # '1832-8-1' truncPOSIXt(date, units='years') # '1832-1-1' roundPOSIXt(date, digits='months') # '1832-7-1' } \keyword{ manip } \keyword{ utilities } \keyword{ chron } Hmisc/man/xYplot.Rd0000644000176200001440000005577013714234037013700 0ustar liggesusers\name{xYplot} \alias{xYplot} \alias{panel.xYplot} \alias{prepanel.xYplot} \alias{Dotplot} \alias{panel.Dotplot} \alias{prepanel.Dotplot} \alias{Cbind} \alias{[.Cbind} \alias{setTrellis} \alias{numericScale} \title{xyplot and dotplot with Matrix Variables to Plot Error Bars and Bands} \description{ A utility function \code{Cbind} returns the first argument as a vector and combines all other arguments into a matrix stored as an attribute called \code{"other"}. The arguments can be named (e.g., \code{Cbind(pressure=y,ylow,yhigh)}) or a \code{label} attribute may be pre-attached to the first argument. In either case, the name or label of the first argument is stored as an attribute \code{"label"} of the object returned by \code{Cbind}. Storing other vectors as a matrix attribute facilitates plotting error bars, etc., as \code{trellis} really wants the x- and y-variables to be vectors, not matrices. If a single argument is given to \code{Cbind} and that argument is a matrix with column dimnames, the first column is taken as the main vector and remaining columns are taken as \code{"other"}. A subscript method for \code{Cbind} objects subscripts the \code{other} matrix along with the main \code{y} vector. The \code{xYplot} function is a substitute for \code{xyplot} that allows for simulated multi-column \code{y}. It uses by default the \code{panel.xYplot} and \code{prepanel.xYplot} functions to do the actual work. The \code{method} argument passed to \code{panel.xYplot} from \code{xYplot} allows you to make error bars, the upper-only or lower-only portions of error bars, alternating lower-only and upper-only bars, bands, or filled bands. \code{panel.xYplot} decides how to alternate upper and lower bars according to whether the median \code{y} value of the current main data line is above the median \code{y} for all \code{groups} of lines or not. If the median is above the overall median, only the upper bar is drawn. For \code{bands} (but not 'filled bands'), any number of other columns of \code{y} will be drawn as lines having the same thickness, color, and type as the main data line. If plotting bars, bands, or filled bands and only one additional column is specified for the response variable, that column is taken as the half width of a precision interval for \code{y}, and the lower and upper values are computed automatically as \code{y} plus or minus the value of the additional column variable. When a \code{groups} variable is present, \code{panel.xYplot} will create a function in frame 0 (\code{.GlobalEnv} in \R) called \code{Key} that when invoked will draw a key describing the \code{groups} labels, point symbols, and colors. By default, the key is outside the graph. For S-Plus, if \code{Key(locator(1))} is specified, the key will appear so that its upper left corner is at the coordinates of the mouse click. For R/Lattice the first two arguments of \code{Key} (\code{x} and \code{y}) are fractions of the page, measured from the lower left corner, and the default placement is at \code{x=0.05, y=0.95}. For \R, an optional argument to \code{sKey}, \code{other}, may contain a list of arguments to pass to \code{draw.key} (see \code{\link[lattice]{xyplot}} for a list of possible arguments, under the \code{key} option). When \code{method="quantile"} is specified, \code{xYplot} automatically groups the \code{x} variable into intervals containing a target of \code{nx} observations each, and within each \code{x} group computes three quantiles of \code{y} and plots these as three lines. The mean \code{x} within each \code{x} group is taken as the \code{x}-coordinate. This will make a useful empirical display for large datasets in which scatterdiagrams are too busy to see patterns of central tendency and variability. You can also specify a general function of a data vector that returns a matrix of statistics for the \code{method} argument. Arguments can be passed to that function via a list \code{methodArgs}. The statistic in the first column should be the measure of central tendency. Examples of useful \code{method} functions are those listed under the help file for \code{summary.formula} such as \code{smean.cl.normal}. \code{xYplot} can also produce bubble plots. This is done when \code{size} is specified to \code{xYplot}. When \code{size} is used, a function \code{sKey} is generated for drawing a key to the character sizes. See the bubble plot example. \code{size} can also specify a vector where the first character of each observation is used as the plotting symbol, if \code{rangeCex} is set to a single \code{cex} value. An optional argument to \code{sKey}, \code{other}, may contain a list of arguments to pass to \code{draw.key} (see \code{\link[lattice]{xyplot}} for a list of possible arguments, under the \code{key} option). See the bubble plot example. \code{Dotplot} is a substitute for \code{dotplot} allowing for a matrix x-variable, automatic superpositioning when \code{groups} is present, and creation of a \code{Key} function. When the x-variable (created by \code{Cbind} to simulate a matrix) contains a total of 3 columns, the first column specifies where the dot is positioned, and the last 2 columns specify starting and ending points for intervals. The intervals are shown using line type, width, and color from the trellis \code{plot.line} list. By default, you will usually see a darker line segment for the low and high values, with the dotted reference line elsewhere. A good choice of the \code{pch} argument for such plots is \code{3} (plus sign) if you want to emphasize the interval more than the point estimate. When the x-variable contains a total of 5 columns, the 2nd and 5th columns are treated as the 2nd and 3rd are treated above, and the 3rd and 4th columns define an inner line segment that will have twice the thickness of the outer segments. In addition, tick marks separate the outer and inner segments. This type of display (an example of which appeared in \emph{The Elements of Graphing Data} by Cleveland) is very suitable for displaying two confidence levels (e.g., 0.9 and 0.99) or the 0.05, 0.25, 0.75, 0.95 sample quantiles, for example. For this display, the central point displays well with a default circle symbol. \code{setTrellis} sets nice defaults for Trellis graphics, assuming that the graphics device has already been opened if using postscript, etc. By default, it sets panel strips to blank and reference dot lines to thickness 1 instead of the Trellis default of 2. \code{numericScale} is a utility function that facilitates using \code{xYplot} to plot variables that are not considered to be numeric but which can readily be converted to numeric using \code{as.numeric()}. \code{numericScale} by default will keep the name of the input variable as a \code{label} attribute for the new numeric variable. } \usage{ Cbind(\dots) xYplot(formula, data = sys.frame(sys.parent()), groups, subset, xlab=NULL, ylab=NULL, ylim=NULL, panel=panel.xYplot, prepanel=prepanel.xYplot, scales=NULL, minor.ticks=NULL, sub=NULL, \dots) panel.xYplot(x, y, subscripts, groups=NULL, type=if(is.function(method) || method=='quantiles') 'b' else 'p', method=c("bars", "bands", "upper bars", "lower bars", "alt bars", "quantiles", "filled bands"), methodArgs=NULL, label.curves=TRUE, abline, probs=c(.5,.25,.75), nx=NULL, cap=0.015, lty.bar=1, lwd=plot.line$lwd, lty=plot.line$lty, pch=plot.symbol$pch, cex=plot.symbol$cex, font=plot.symbol$font, col=NULL, lwd.bands=NULL, lty.bands=NULL, col.bands=NULL, minor.ticks=NULL, col.fill=NULL, size=NULL, rangeCex=c(.5,3), \dots) prepanel.xYplot(x, y, \dots) Dotplot(formula, data = sys.frame(sys.parent()), groups, subset, xlab = NULL, ylab = NULL, ylim = NULL, panel=panel.Dotplot, prepanel=prepanel.Dotplot, scales=NULL, xscale=NULL, \dots) prepanel.Dotplot(x, y, \dots) panel.Dotplot(x, y, groups = NULL, pch = dot.symbol$pch, col = dot.symbol$col, cex = dot.symbol$cex, font = dot.symbol$font, abline, \dots) setTrellis(strip.blank=TRUE, lty.dot.line=2, lwd.dot.line=1) numericScale(x, label=NULL, \dots) } \arguments{ \item{\dots}{ for \code{Cbind} \code{\dots} is any number of additional numeric vectors. Unless you are using \code{Dotplot} (which allows for either 2 or 4 "other" variables) or \code{xYplot} with \code{method="bands"}, vectors after the first two are ignored. If drawing bars and only one extra variable is given in \code{\dots}, upper and lower values are computed as described above. If the second argument to \code{Cbind} is a matrix, that matrix is stored in the \code{"other"} attribute and arguments after the second are ignored. For bubble plots, name an argument \code{cex}. Also can be other arguments to pass to \code{labcurve}. } \item{formula}{ a \code{trellis} formula consistent with \code{xyplot} or \code{dotplot} } \item{x}{ \code{x}-axis variable. For \code{numericScale} \code{x} is any vector such as \code{as.numeric(x)} returns a numeric vector suitable for x- or y-coordinates. } \item{y}{ a vector, or an object created by \code{Cbind} for \code{xYplot}. \code{y} represents the main variable to plot, i.e., the variable used to draw the main lines. For \code{Dotplot} the first argument to \code{Cbind} will be the main \code{x}-axis variable. } \item{data,subset,ylim,subscripts,groups,type,scales,panel,prepanel,xlab,ylab}{ see \code{trellis.args}. \code{xlab} and \code{ylab} get default values from \code{"label"} attributes. } \item{xscale}{allows one to use the default \code{scales} but specify only the \code{x} component of it for \code{Dotplot}} \item{method}{ defaults to \code{"bars"} to draw error-bar type plots. See meaning of other values above. \code{method} can be a function. Specifying \code{method=quantile}, \code{methodArgs=list(probs=c(.5,.25,.75))} is the same as specifying \code{method="quantile"} without specifying \code{probs}. } \item{methodArgs}{ a list containing optional arguments to be passed to the function specified in \code{method} } \item{label.curves}{ set to \code{FALSE} to suppress invocation of \code{labcurve} to label primary curves where they are most separated or to draw a legend in an empty spot on the panel. You can also set \code{label.curves} to a list of options to pass to \code{labcurve}. These options can also be passed as \code{\dots} to \code{xYplot}. See the examples below. } \item{abline}{ a list of arguments to pass to \code{panel.abline} for each panel, e.g. \code{list(a=0, b=1, col=3)} to draw the line of identity using color 3. To make multiple calls to \code{panel.abline}, pass a list of unnamed lists as \code{abline}, e.g., \code{abline=list(list(h=0),list(v=1))}. } \item{probs}{ a vector of three quantiles with the quantile corresponding to the central line listed first. By default \code{probs=c(.5, .25, .75)}. You can also specify \code{probs} through \code{methodArgs=list(probs=\dots)}. } \item{nx}{ number of target observations for each \code{x} group (see \code{cut2} \code{m} argument). \code{nx} defaults to the minimum of 40 and the number of points in the current stratum divided by 4. Set \code{nx=FALSE} or \code{nx=0} if \code{x} is already discrete and requires no grouping. } \item{cap}{ the half-width of horizontal end pieces for error bars, as a fraction of the length of the \code{x}-axis } \item{lty.bar}{ line type for bars } \item{lwd, lty, pch, cex, font, col}{ see \code{trellis.args}. These are vectors when \code{groups} is present, and the order of their elements corresponds to the different \code{groups}, regardless of how many bands or bars are drawn. If you don't specify \code{lty.bands}, for example, all band lines within each group will have the same \code{lty}. } \item{lty.bands, lwd.bands, col.bands}{ used to allow \code{lty}, \code{lwd}, \code{col} to vary across the different band lines for different \code{groups}. These parameters are vectors or lists whose elements correspond to the added band lines (i.e., they ignore the central line, whose line characteristics are defined by \code{lty}, \code{lwd}, \code{col}). For example, suppose that 4 lines are drawn in addition to the central line. Specifying \code{lwd.bands=1:4} will cause line widths of 1:4 to be used for every group, regardless of the value of \code{lwd}. To vary characteristics over the \code{groups} use e.g. \code{lwd.bands=list(rep(1,4), rep(2,4))} or \code{list(c(1,2,1,2), c(3,4,3,4))}. } \item{minor.ticks}{ a list with elements \code{at} and \code{labels} specifying positions and labels for minor tick marks to be used on the x-axis of each panel, if any. } \item{sub}{an optional subtitle} \item{col.fill}{ used to override default colors used for the bands in method='filled bands'. This is a vector when \code{groups} is present, and the order of the elements corresponds to the different \code{groups}, regardless of how many bands are drawn. The default colors for 'filled bands' are pastel colors matching the default colors superpose.line$col (plot.line$col) } \item{size}{ a vector the same length as \code{x} giving a variable whose values are a linear function of the size of the symbol drawn. This is used for example for bubble plots. } \item{rangeCex}{ a vector of two values specifying the range in character sizes to use for the \code{size} variable (lowest first, highest second). \code{size} values are linearly translated to this range, based on the observed range of \code{size} when \code{x} and \code{y} coordinates are not missing. Specify a single numeric \code{cex} value for \code{rangeCex} to use the first character of each observations's \code{size} as the plotting symbol. } \item{strip.blank}{ set to \code{FALSE} to not make the panel strip backgrounds blank } \item{lty.dot.line}{ line type for dot plot reference lines (default = 1 for dotted; use 2 for dotted) } \item{lwd.dot.line}{ line thickness for reference lines for dot plots (default = 1) } \item{label}{ a scalar character string to be used as a variable label after \code{numericScale} converts the variable to numeric form } } \value{ \code{Cbind} returns a matrix with attributes. Other functions return standard \code{trellis} results. } \section{Side Effects}{ plots, and \code{panel.xYplot} may create temporary \code{Key} and \code{sKey} functions in the session frame. } \details{ Unlike \code{xyplot}, \code{xYplot} senses the presence of a \code{groups} variable and automatically invokes \code{panel.superpose} instead of \code{panel.xyplot}. The same is true for \code{Dotplot} vs. \code{dotplot}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} \cr Madeline Bauer \cr Department of Infectious Diseases \cr University of Southern California School of Medicine \cr \email{mbauer@usc.edu} } \seealso{ \code{\link[lattice]{xyplot}}, \code{\link[lattice]{panel.xyplot}}, \code{\link{summarize}}, \code{\link{label}}, \code{\link{labcurve}}, \code{\link{errbar}}, \code{\link[lattice:xyplot]{dotplot}}, \code{\link{reShape}}, \code{\link{cut2}}, \code{\link[lattice:panel.functions]{panel.abline}} } \examples{ # Plot 6 smooth functions. Superpose 3, panel 2. # Label curves with p=1,2,3 where most separated d <- expand.grid(x=seq(0,2*pi,length=150), p=1:3, shift=c(0,pi)) xYplot(sin(x+shift)^p ~ x | shift, groups=p, data=d, type='l') # Use a key instead, use 3 line widths instead of 3 colors # Put key in most empty portion of each panel xYplot(sin(x+shift)^p ~ x | shift, groups=p, data=d, type='l', keys='lines', lwd=1:3, col=1) # Instead of implicitly using labcurve(), put a # single key outside of panels at lower left corner xYplot(sin(x+shift)^p ~ x | shift, groups=p, data=d, type='l', label.curves=FALSE, lwd=1:3, col=1, lty=1:3) Key() # Bubble plots x <- y <- 1:8 x[2] <- NA units(x) <- 'cm^2' z <- 101:108 p <- factor(rep(c('a','b'),4)) g <- c(rep(1,7),2) data.frame(p, x, y, z, g) xYplot(y ~ x | p, groups=g, size=z) Key(other=list(title='g', cex.title=1.2)) # draw key for colors sKey(.2,.85,other=list(title='Z Values', cex.title=1.2)) # draw key for character sizes # Show the median and quartiles of height given age, stratified # by sex and race. Draws 2 sets (male, female) of 3 lines per panel. # xYplot(height ~ age | race, groups=sex, method='quantiles') # Examples of plotting raw data dfr <- expand.grid(month=1:12, continent=c('Europe','USA'), sex=c('female','male')) set.seed(1) dfr <- upData(dfr, y=month/10 + 1*(sex=='female') + 2*(continent=='Europe') + runif(48,-.15,.15), lower=y - runif(48,.05,.15), upper=y + runif(48,.05,.15)) xYplot(Cbind(y,lower,upper) ~ month,subset=sex=='male' & continent=='USA', data=dfr) xYplot(Cbind(y,lower,upper) ~ month|continent, subset=sex=='male',data=dfr) xYplot(Cbind(y,lower,upper) ~ month|continent, groups=sex, data=dfr); Key() # add ,label.curves=FALSE to suppress use of labcurve to label curves where # farthest apart xYplot(Cbind(y,lower,upper) ~ month,groups=sex, subset=continent=='Europe', data=dfr) xYplot(Cbind(y,lower,upper) ~ month,groups=sex, type='b', subset=continent=='Europe', keys='lines', data=dfr) # keys='lines' causes labcurve to draw a legend where the panel is most empty xYplot(Cbind(y,lower,upper) ~ month,groups=sex, type='b', data=dfr, subset=continent=='Europe',method='bands') xYplot(Cbind(y,lower,upper) ~ month,groups=sex, type='b', data=dfr, subset=continent=='Europe',method='upper') label(dfr$y) <- 'Quality of Life Score' # label is in Hmisc library = attr(y,'label') <- 'Quality\dots'; will be # y-axis label # can also specify Cbind('Quality of Life Score'=y,lower,upper) xYplot(Cbind(y,lower,upper) ~ month, groups=sex, subset=continent=='Europe', method='alt bars', offset=unit(.1,'inches'), type='b', data=dfr) # offset passed to labcurve to label .4 y units away from curve # for R (using grid/lattice), offset is specified using the grid # unit function, e.g., offset=unit(.4,'native') or # offset=unit(.1,'inches') or unit(.05,'npc') # The following example uses the summarize function in Hmisc to # compute the median and outer quartiles. The outer quartiles are # displayed using "error bars" set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) month <- dfr$month; year <- dfr$year y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, keys='lines', method='alt', type='b') # Can also do: s <- summarize(y, llist(month,year), quantile, probs=c(.5,.25,.75), stat.name=c('y','Q1','Q3')) xYplot(Cbind(y, Q1, Q3) ~ month, groups=year, data=s, type='b', keys='lines') # Or: xYplot(y ~ month, groups=year, keys='lines', nx=FALSE, method='quantile', type='b') # nx=FALSE means to treat month as a discrete variable # To display means and bootstrapped nonparametric confidence intervals # use: s <- summarize(y, llist(month,year), smean.cl.boot) s xYplot(Cbind(y, Lower, Upper) ~ month | year, data=s, type='b') # Can also use Y <- cbind(y, Lower, Upper); xYplot(Cbind(Y) ~ ...) # Or: xYplot(y ~ month | year, nx=FALSE, method=smean.cl.boot, type='b') # This example uses the summarize function in Hmisc to # compute the median and outer quartiles. The outer quartiles are # displayed using "filled bands" s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) # filled bands: default fill = pastel colors matching solid colors # in superpose.line (this works differently in R) xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, method="filled bands" , data=s, type="l") # note colors based on levels of selected subgroups, not first two colors xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, method="filled bands" , data=s, type="l", subset=(year == 1998 | year == 2000), label.curves=FALSE ) # filled bands using black lines with selected solid colors for fill xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, method="filled bands" , data=s, label.curves=FALSE, type="l", col=1, col.fill = 2:3) Key(.5,.8,col = 2:3) #use fill colors in key # A good way to check for stable variance of residuals from ols # xYplot(resid(fit) ~ fitted(fit), method=smean.sdl) # smean.sdl is defined with summary.formula in Hmisc # Plot y vs. a special variable x # xYplot(y ~ numericScale(x, label='Label for X') | country) # For this example could omit label= and specify # y ~ numericScale(x) | country, xlab='Label for X' # Here is an example of using xYplot with several options # to change various Trellis parameters, # xYplot(y ~ x | z, groups=v, pch=c('1','2','3'), # layout=c(3,1), # 3 panels side by side # ylab='Y Label', xlab='X Label', # main=list('Main Title', cex=1.5), # par.strip.text=list(cex=1.2), # strip=function(\dots) strip.default(\dots, style=1), # scales=list(alternating=FALSE)) # # Dotplot examples # s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) setTrellis() # blank conditioning panel backgrounds Dotplot(month ~ Cbind(y, Lower, Upper) | year, data=s) # or Cbind(\dots), groups=year, data=s # Display a 5-number (5-quantile) summary (2 intervals, dot=median) # Note that summarize produces a matrix for y, and Cbind(y) trusts the # first column to be the point estimate (here the median) s <- summarize(y, llist(month,year), quantile, probs=c(.5,.05,.25,.75,.95), type='matrix') Dotplot(month ~ Cbind(y) | year, data=s) # Use factor(year) to make actual years appear in conditioning title strips # Plot proportions and their Wilson confidence limits set.seed(3) d <- expand.grid(continent=c('USA','Europe'), year=1999:2001, reps=1:100) # Generate binary events from a population probability of 0.2 # of the event, same for all years and continents d$y <- ifelse(runif(6*100) <= .2, 1, 0) s <- with(d, summarize(y, llist(continent,year), function(y) { n <- sum(!is.na(y)) s <- sum(y, na.rm=TRUE) binconf(s, n) }, type='matrix') ) Dotplot(year ~ Cbind(y) | continent, data=s, ylab='Year', xlab='Probability') # Dotplot(z ~ x | g1*g2) # 2-way conditioning # Dotplot(z ~ x | g1, groups=g2); Key() # Key defines symbols for g2 # If the data are organized so that the mean, lower, and upper # confidence limits are in separate records, the Hmisc reShape # function is useful for assembling these 3 values as 3 variables # a single observation, e.g., assuming type has values such as # c('Mean','Lower','Upper'): # a <- reShape(y, id=month, colvar=type) # This will make a matrix with 3 columns named Mean Lower Upper # and with 1/3 as many rows as the original data } \keyword{hplot} \concept{trellis} \concept{lattice} Hmisc/man/Misc.Rd0000644000176200001440000003621014370550706013263 0ustar liggesusers\name{Misc} \alias{clowess} \alias{confbar} \alias{getLatestSource} \alias{grType} \alias{prType} \alias{htmlSpecialType} \alias{inverseFunction} \alias{james.stein} \alias{keepHattrib} \alias{km.quick} \alias{latexBuild} \alias{lm.fit.qr.bare} \alias{matxv} \alias{makeSteps} \alias{nomiss} \alias{outerText} \alias{plotlyParm} \alias{plotp} \alias{rendHTML} \alias{restoreHattrib} \alias{sepUnitsTrans} \alias{strgraphwrap} \alias{tobase64image} \alias{trap.rule} \alias{trellis.strip.blank} \alias{unPaste} \alias{whichClosest} \alias{whichClosePW} \alias{whichClosek} \alias{xless} \title{Miscellaneous Functions} \description{ This documents miscellaneous small functions in Hmisc that may be of interest to users. \code{clowess} runs \code{lowess} but if the \code{iter} argument exceeds zero, sometimes wild values can result, in which case \code{lowess} is re-run with \code{iter=0}. \code{confbar} draws multi-level confidence bars using small rectangles that may be of different colors. \code{getLatestSource} fetches and \code{source}s the most recent source code for functions in GitHub. \code{grType} retrieves the system option \code{grType}, which is forced to be \code{"base"} if the \code{plotly} package is not installed. \code{prType} retrieves the system option \code{prType}, which is set to \code{"plain"} if the option is not set. \code{print} methods that allow for markdown/html/latex can be automatically invoked by setting \code{options(prType="html")} or \code{options(prType='latex')}. \code{htmlSpecialType} retrieves the system option \code{htmlSpecialType}, which is set to \code{"unicode"} if the option is not set. \code{htmlSpecialType='unicode'} cause html-generating functions in \code{Hmisc} and \code{rms} to use unicode for special characters, and \code{htmlSpecialType='&'} uses the older ampersand 3-digit format. \code{inverseFunction} generates a function to find all inverses of a monotonic or nonmonotonic function that is tabulated at vectors (x,y), typically 1000 points. If the original function is monotonic, simple linear interpolation is used and the result is a vector, otherwise linear interpolation is used within each interval in which the function is monotonic and the result is a matrix with number of columns equal to the number of monotonic intervals. If a requested y is not within any interval, the extreme x that pertains to the nearest extreme y is returned. Specifying what='sample' to the returned function will cause a vector to be returned instead of a matrix, with elements taken as a random choice of the possible inverses. \code{james.stein} computes James-Stein shrunken estimates of cell means given a response variable (which may be binary) and a grouping indicator. \code{keepHattrib} for an input variable or a data frame, creates a list object saving special Hmisc attributes such as \code{label} and \code{units} that might be lost during certain operations such as running \code{data.table}. \code{restoreHattrib} restores these attributes. \code{km.quick} provides a fast way to invoke \code{survfitKM} in the \code{survival} package to get Kaplan-Meier estimates for a single stratum for a vector of time points (if \code{times} is given) or to get a vector of survival time quantiles (if \code{q} is given). \code{latexBuild} takes pairs of character strings and produces a single character string containing concatenation of all of them, plus an attribute \code{"close"} which is a character string containing the LaTeX closure that will balance LaTeX code with respect to parentheses, braces, brackets, or \code{begin} vs. \code{end}. When an even-numbered element of the vector is not a left parenthesis, brace, or bracket, the element is taken as a word that was surrounded by \code{begin} and braces, for which the corresponding \code{end} is constructed in the returned attribute. \code{lm.fit.qr.bare} is a fast stripped-down function for computing regression coefficients, residuals, \eqn{R^2}, and fitted values. It uses \code{lm.fit}. \code{matxv} multiplies a matrix by a vector, handling automatic addition of intercepts if the matrix does not have a column of ones. If the first argument is not a matrix, it will be converted to one. An optional argument allows the second argument to be treated as a matrix, useful when its rows represent bootstrap reps of coefficients. Then ab' is computed. \code{matxv} respects the \code{"intercepts"} attribute if it is stored on \code{b} by the \code{rms} package. This is used by \code{\link[rms]{orm}} fits that are bootstrap-repeated by \code{\link[rms]{bootcov}} where only the intercept corresponding to the median is retained. If \code{kint} has nonzero length, it is checked for consistency with the attribute. \code{makeSteps} is a copy of the dostep function inside the \code{survival} package's \code{plot.survfit} function. It expands a series of points to include all the segments needed to plot step functions. This is useful for drawing polygons to shade confidence bands for step functions. \code{nomiss} returns a data frame (if its argument is one) with rows corresponding to \code{NA}s removed, or it returns a matrix with rows with any element missing removed. \code{outerText} uses \code{axis()} to put right-justified text strings in the right margin. Placement depends on \code{par('mar')[4]} \code{plotlyParm} is a list of functions useful for specifying parameters to \code{plotly} graphics. \code{plotp} is a generic to handle \code{plotp} methods to make \code{plotly} graphics. \code{rendHTML} renders HTML in a character vector, first converting to one character string with newline delimeters. If \code{knitr} is currently running, runs this string through \code{knitr::asis_output} so that the user need not include \code{results='asis'} in the chunk header for R Markdown or Quarto. If \code{knitr} is not running, uses \code{htmltools::browsable} and \code{htmltools::HTML} and prints the result so that an RStudio viewer (if running inside RStudio) or separate browser window displays the rendered HTML. The HTML code is surrounded by yaml markup to make Pandoc not fiddle with the HTML. Set the argument \code{html=FALSE} to not add this, in case you are really rendering markdown. \code{sepUnitsTrans} converts character vectors containing values such as \code{c("3 days","3day","4month","2 years","2weeks","7")} to numeric vectors (here \code{c(3,3,122,730,14,7)}) in a flexible fashion. The user can specify a vector of units of measurements and conversion factors. The units with a conversion factor of \code{1} are taken as the target units, and if those units are present in the character strings they are ignored. The target units are added to the resulting vector as the \code{"units"} attribute. \code{strgraphwrap} is like \code{strwrap} but is for the current graphics environment. \code{tobase64image} is a function written by Dirk Eddelbuettel that uses the \code{base64enc} package to convert a png graphic file to base64 encoding to include as an inline image in an html file. \code{trap.rule} computes the area under a curve using the trapezoidal rule, assuming \code{x} is sorted. \code{trellis.strip.blank} sets up Trellis or Lattice graphs to have a clear background on the strips for panel labels. \code{unPaste} provides a version of the S-Plus \code{unpaste} that works for \R and S-Plus. \code{whichClosePW} is a very fast function using weighted multinomial sampling to determine which element of a vector is "closest" to each element of another vector. \code{whichClosest} quickly finds the closest element without any randomness. \code{whichClosek} is a slow function that finds, after jittering the lookup table, the \code{k} closest matchest to each element of the other vector, and chooses from among these one at random. \code{xless} is a function for Linux/Unix users to invoke the system \code{xless} command to pop up a window to display the result of \code{print}ing an object. } \usage{ confbar(at, est, se, width, q = c(0.7, 0.8, 0.9, 0.95, 0.99), col = gray(c(0, 0.25, 0.5, 0.75, 1)), type = c("v", "h"), labels = TRUE, ticks = FALSE, cex = 0.5, side = "l", lwd = 5, clip = c(-1e+30, 1e+30), fun = function(x) x, qfun = function(x) ifelse(x == 0.5, qnorm(x), ifelse(x < 0.5, qnorm(x/2), qnorm((1 + x)/2)))) getLatestSource(x=NULL, package='Hmisc', recent=NULL, avail=FALSE) grType() prType() htmlSpecialType() inverseFunction(x, y) james.stein(y, group) keepHattrib(obj) km.quick(S, times, q) latexBuild(\dots, insert, sep='') lm.fit.qr.bare(x, y, tolerance, intercept=TRUE, xpxi=FALSE, singzero=FALSE) matxv(a, b, kint=1, bmat=FALSE) nomiss(x) outerText(string, y, cex=par('cex'), \dots) plotlyParm plotp(data, ...) rendHTML(x, html=TRUE) restoreHattrib(obj, attribs) sepUnitsTrans(x, conversion=c(day=1, month=365.25/12, year=365.25, week=7), round=FALSE, digits=0) strgraphwrap(x, width = 0.9 * getOption("width"), indent = 0, exdent = 0, prefix = "", simplify = TRUE, units='user', cex=NULL) tobase64image(file, Rd = FALSE, alt = "image") trap.rule(x, y) trellis.strip.blank() unPaste(str, sep="/") whichClosest(x, w) whichClosePW(x, w, f=0.2) whichClosek(x, w, k) xless(x, \dots, title) } \arguments{ \item{a}{a numeric matrix or vector} \item{alt,Rd}{see \code{base64::img}} \item{at}{x-coordinate for vertical confidence intervals, y-coordinate for horizontal} \item{attribs}{an object returned by \code{keepHattrib}} \item{avail}{set to \code{TRUE} to have \code{getLatestSource} return a data frame of available files and latest versions instead of fetching any} \item{b}{a numeric vector} \item{cex}{character expansion factor} \item{clip}{interval to truncate limits} \item{col}{vector of colors} \item{conversion}{a named numeric vector} \item{data}{an object having a \code{plotp} method} \item{digits}{number of digits used for \code{round}} \item{est}{vector of point estimates for confidence limits} \item{f}{a scaling constant} \item{file}{a file name} \item{fun}{function to transform scale} \item{group}{a categorical grouping variable} \item{html}{set to \code{FALSE} to tell \code{rendHTML} to not surround HTML code with yaml} \item{insert}{a list of 3-element lists for \code{latexBuild}. The first of each 3-element list is a character string with an environment name. The second specifies the order: \code{"before"} or \code{"after"}, the former indicating that when the environment is found, the third element of the list is inserted before or after it, according to the second element.} \item{intercept}{set to \code{FALSE} to not automatically add a column of ones to the \code{x} matrix} \item{k}{get the \code{k} closest matches} \item{kint}{which element of \code{b} to add to the result if \code{a} does not contain a column for intercepts} \item{bmat}{set to \code{TRUE} to consider \code{b} a matrix of repeated coefficients, usually resampled estimates with rows corresponding to resamples} \item{labels}{set to \code{FALSE} to omit drawing confidence coefficients} \item{lwd}{line widths} \item{package}{name of package for \code{getLatestSource}, default is \code{'Hmisc'}} \item{obj}{a variable, data frame, or data table} \item{q}{vector of confidence coefficients or quantiles} \item{qfun}{quantiles on transformed scale} \item{recent}{an integer telling \code{getLatestSource} to get the \code{recent} most recently modified files from the package} \item{round}{set to \code{TRUE} to round converted values} \item{S}{a \code{\link[survival]{Surv}} object} \item{se}{vector of standard errors} \item{sep}{a single character string specifying the delimiter. For \code{latexBuild} the default is \code{""}.} \item{side}{for \code{confbar} is \code{"b","l","t","r"} for bottom, left, top, right.} \item{str}{a character string vector} \item{string}{a character string vector} \item{ticks}{set to \code{TRUE} to draw lines between rectangles} \item{times}{a numeric vector of times} \item{title}{a character string to title a window or plot} \item{tolerance}{tolerance for judging singularity in matrix} \item{type}{\code{"v"} for vertical, \code{"h"} for horizontal.} \item{w}{a numeric vector} \item{width}{width of confidence rectanges in user units, or see \code{\link{strwrap}}} \item{x}{a numeric vector (matrix for \code{lm.fit.qr.bare}) or data frame. For \code{xless} may be any object that is sensible to \code{print}. For \code{sepUnitsTrans} is a character or factor variable. For \code{getLatestSource} is a character string or vector of character strings containing base file names to retrieve from CVS. Set \code{x='all'} to retrieve all source files. For \code{clowess}, \code{x} may also be a list with x and y components. For \code{inverseFunction}, \code{x} and \code{y} contain evaluations of the function whose inverse is needed. \code{x} is typically an equally-spaced grid of 1000 points. For \code{strgraphwrap} is a character vector. For \code{rendHTML} \code{x} is a character vector.} \item{xpxi}{set to \code{TRUE} to add an element to the result containing the inverse of \eqn{X'X}} \item{singzero}{set to \code{TRUE} to set coefficients corresponding to singular variables to zero instead of \code{NA}.} \item{y}{a numeric vector. For \code{inverseFunction} \code{y} is the evaluated function values at \code{x}.} \item{indent, exdent, prefix}{see \code{\link{strwrap}}} \item{simplify}{see \code{\link{sapply}}} \item{units}{see \code{\link{par}}} \item{\dots}{arguments passed through to another function. For \code{latexBuild} represents pairs, with odd numbered elements being character strings containing LaTeX code or a zero-length object to ignore, and even-numbered elements representing LaTeX left parenthesis, left brace, or left bracket, or environment name.} } \author{Frank Harrell and Charles Dupont} \examples{ trap.rule(1:100,1:100) unPaste(c('a;b or c','ab;d','qr;s'), ';') sepUnitsTrans(c('3 days','4 months','2 years','7')) set.seed(1) whichClosest(1:100, 3:5) whichClosest(1:100, rep(3,20)) whichClosePW(1:100, rep(3,20)) whichClosePW(1:100, rep(3,20), f=.05) whichClosePW(1:100, rep(3,20), f=1e-10) x <- seq(-1, 1, by=.01) y <- x^2 h <- inverseFunction(x,y) formals(h)$turns # vertex a <- seq(0, 1, by=.01) plot(0, 0, type='n', xlim=c(-.5,1.5)) lines(a, h(a)[,1]) ## first inverse lines(a, h(a)[,2], col='red') ## second inverse a <- c(-.1, 1.01, 1.1, 1.2) points(a, h(a)[,1]) d <- data.frame(x=1:2, y=3:4, z=5:6) d <- upData(d, labels=c(x='X', z='Z lab'), units=c(z='mm')) a <- keepHattrib(d) d <- data.frame(x=1:2, y=3:4, z=5:6) d2 <- restoreHattrib(d, a) sapply(d2, attributes) \dontrun{ getLatestSource(recent=5) # source() most recent 5 revised files in Hmisc getLatestSource('cut2') # fetch and source latest cut2.s getLatestSource('all') # get everything getLatestSource(avail=TRUE) # list available files and latest versions } } \keyword{programming} \keyword{utilities} \keyword{iplot} Hmisc/man/epi.Rd0000644000176200001440000000773013714234051013143 0ustar liggesusers\name{mhgr} \alias{mhgr} \alias{print.mhgr} \alias{lrcum} \alias{print.lrcum} \title{Miscellaneous Functions for Epidemiology} \description{ The \code{mhgr} function computes the Cochran-Mantel-Haenszel stratified risk ratio and its confidence limits using the Greenland-Robins variance estimator. The \code{lrcum} function takes the results of a series of 2x2 tables representing the relationship between test positivity and diagnosis and computes positive and negative likelihood ratios (with all their deficiencies) and the variance of their logarithms. Cumulative likelihood ratios and their confidence intervals (assuming independence of tests) are computed, assuming a string of all positive tests or a string of all negative tests. The method of Simel et al as described in Altman et al is used. } \usage{ mhgr(y, group, strata, conf.int = 0.95) \method{print}{mhgr}(x, \dots) lrcum(a, b, c, d, conf.int = 0.95) \method{print}{lrcum}(x, dec=3, \dots) } \arguments{ \item{y}{a binary response variable} \item{group}{a variable with two unique values specifying comparison groups} \item{strata}{the stratification variable} \item{conf.int}{confidence level} \item{x}{an object created by \code{mhgr} or \code{lrcum}} \item{a}{frequency of true positive tests} \item{b}{frequency of false positive tests} \item{c}{frequency of false negative tests} \item{d}{frequency of true negative tests} \item{dec}{number of places to the right of the decimal to print for \code{lrcum}} \item{\dots}{addtitional arguments to be passed to other print functions} } \details{ Uses equations 4 and 13 from Greenland and Robins. } \value{ a list of class \code{"mhgr"} or of class \code{"lrcum"}. } \references{ Greenland S, Robins JM (1985): Estimation of a common effect parameter from sparse follow-up data. Biometrics 41:55-68. Altman DG, Machin D, Bryant TN, Gardner MJ, Eds. (2000): Statistics with Confidence, 2nd Ed. Bristol: BMJ Books, 105-110. Simel DL, Samsa GP, Matchar DB (1991): Likelihood ratios with confidence: sample size estimation for diagnostic test studies. J Clin Epi 44:763-770. } \author{Frank E Harrell Jr \email{fh@fharrell.com}} \seealso{\code{\link{logrank}}} \examples{ # Greate Migraine dataset used in Example 28.6 in the SAS PROC FREQ guide d <- expand.grid(response=c('Better','Same'), treatment=c('Active','Placebo'), sex=c('female','male')) d$count <- c(16, 11, 5, 20, 12, 16, 7, 19) d # Expand data frame to represent raw data r <- rep(1:8, d$count) d <- d[r,] with(d, mhgr(response=='Better', treatment, sex)) # Discrete survival time example, to get Cox-Mantel relative risk and CL # From Stokes ME, Davis CS, Koch GG, Categorical Data Analysis Using the # SAS System, 2nd Edition, Sectino 17.3, p. 596-599 # # Input data in Table 17.5 d <- expand.grid(treatment=c('A','P'), center=1:3) d$healed2w <- c(15,15,17,12, 7, 3) d$healed4w <- c(17,17,17,13,17,17) d$notHealed4w <- c( 2, 7,10,15,16,18) d # Reformat to the way most people would collect raw data d1 <- d[rep(1:6, d$healed2w),] d1$time <- '2' d1$y <- 1 d2 <- d[rep(1:6, d$healed4w),] d2$time <- '4' d2$y <- 1 d3 <- d[rep(1:6, d$notHealed4w),] d3$time <- '4' d3$y <- 0 d <- rbind(d1, d2, d3) d$healed2w <- d$healed4w <- d$notHealed4w <- NULL d # Finally, duplicate appropriate observations to create 2 and 4-week # risk sets. Healed and not healed at 4w need to be in the 2-week # risk set as not healed d2w <- subset(d, time=='4') d2w$time <- '2' d2w$y <- 0 d24 <- rbind(d, d2w) with(d24, table(y, treatment, time, center)) # Matches Table 17.6 with(d24, mhgr(y, treatment, interaction(center, time, sep=';'))) # Get cumulative likelihood ratios and their 0.95 confidence intervals # based on the following two tables # # Disease Disease # + - + - # Test + 39 3 20 5 # Test - 21 17 22 15 lrcum(c(39,20), c(3,5), c(21,22), c(17,15)) } \keyword{category} \keyword{htest} \concept{epidemiology} Hmisc/man/intMarkovOrd.Rd0000644000176200001440000001072014252463134015002 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simMarkovOrd.r \name{intMarkovOrd} \alias{intMarkovOrd} \title{intMarkovOrd} \usage{ intMarkovOrd( y, times, initial, absorb = NULL, intercepts, extra = NULL, g, target, t, ftarget = NULL, onlycrit = FALSE, constraints = NULL, printsop = FALSE, ... ) } \arguments{ \item{y}{vector of possible y values in order (numeric, character, factor)} \item{times}{vector of measurement times} \item{initial}{initial value of \code{y} (baseline state; numeric, character, or factor matching \code{y}). If length 1 this value is used for all subjects, otherwise it is a vector of length \code{n}.} \item{absorb}{vector of absorbing states, a subset of \code{y} (numeric, character, or factor matching \code{y}). The default is no absorbing states. Observations are truncated when an absorbing state is simulated.} \item{intercepts}{vector of initial guesses for the intercepts} \item{extra}{an optional vector of intial guesses for other parameters passed to \code{g} such as regression coefficients for previous states and for general time trends. Name the elements of \code{extra} for more informative output.} \item{g}{a user-specified function of three or more arguments which in order are \code{yprev} - the value of \code{y} at the previous time, the current time \code{t}, the \code{gap} between the previous time and the current time, an optional (usually named) covariate vector \code{X}, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow \code{yprev} to be a vector and \code{yprev} must not include any absorbing states. The \code{g} function returns the linear predictor for the proportional odds model aside from \code{intercepts}. The returned value must be a matrix with row names taken from \code{yprev}. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to \code{intercepts}. The different columns are used for \code{y}-specific contributions to the linear predictor (aside from \code{intercepts}) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments.} \item{target}{vector of target state occupancy probabilities at time \code{t}. If \code{extra} is specified, \code{target} must be a matrix where row names are character versions of \code{t} and columns represent occupancy probabilities corresponding to values of \code{y} at the time given in the row.} \item{t}{target times. Can have more than one element only if \code{extra} is given.} \item{ftarget}{an optional function defining constraints that relate to transition probabilities. The function returns a penalty which is a sum of absolute differences in probabilities from target probabilities over possibly multiple targets. The \code{ftarget} function must have two arguments: \code{intercepts} and \code{extra}.} \item{onlycrit}{set to \code{TRUE} to only return the achieved objective criterion and not print anything} \item{constraints}{a function of two arguments: the vector of current intercept values and the vector of \code{extra} parameters, returning \code{TRUE} if that vector meets the constrains and \code{FALSE} otherwise} \item{printsop}{set to \code{TRUE} to print solved-for state occupancy probabilities for groups 1 and 2 and log odds ratios corresponding to them} \item{...}{optional arguments to pass to \code{\link[stats:nlm]{stats::nlm()}}. If this is specified, the arguments that \code{intMarkovOrd} normally sends to \code{nlm} are not used.} } \value{ list containing two vectors named \code{intercepts} and \code{extra} unless \code{oncrit=TRUE} in which case the best achieved sum of absolute errors is returned } \description{ Compute Parameters for Proportional Odds Markov Model } \details{ Given a vector \code{intercepts} of initial guesses at the intercepts in a Markov proportional odds model, and a vector \code{extra} if there are other parameters, solves for the \code{intercepts} and \code{extra} vectors that yields a set of occupancy probabilities at time \code{t} that equal, as closely as possible, a vector of target values. } \seealso{ \url{https://hbiostat.org/R/Hmisc/markov/} } \author{ Frank Harrell } Hmisc/man/latexDotchart.Rd0000644000176200001440000001027113714234051015166 0ustar liggesusers\name{latexDotchart} \alias{latexDotchart} \title{Enhanced Dot Chart for LaTeX Picture Environment with epic} \description{ \code{latexDotchart} is a translation of the \code{dotchart3} function for producing a vector of character strings containing LaTeX picture environment markup that mimics \code{dotchart3} output. The LaTeX \code{epic} and \code{color} packages are required. The \code{add} and \code{horizontal=FALSE} options are not available for \code{latexDotchart}, however. } \usage{ latexDotchart(data, labels, groups=NULL, gdata=NA, xlab='', auxdata, auxgdata=NULL, auxtitle, w=4, h=4, margin, lines=TRUE, dotsize = .075, size='small', size.labels='small', size.group.labels='normalsize', ttlabels=FALSE, sort.=TRUE, xaxis=TRUE, lcolor='gray', ...) } \arguments{ \item{data}{a numeric vector whose values are shown on the x-axis} \item{labels}{a vector of labels for each point, corresponding to \code{x}. If omitted, \code{names(data)} are used, and if there are no \code{names}, integers prefixed by \code{"#"} are used.} \item{groups}{an optional categorical variable indicating how \code{data} values are grouped} \item{gdata}{data values for groups, typically summaries such as group medians} \item{xlab}{x-axis title} \item{auxdata}{ a vector of auxiliary data, of the same length as the first (\code{data}) argument. If present, this vector of values will be printed outside the right margin of the dot chart. Usually \code{auxdata} represents cell sizes. } \item{auxgdata}{ similar to \code{auxdata} but corresponding to the \code{gdata} argument. These usually represent overall sample sizes for each group of lines.} \item{auxtitle}{ if \code{auxdata} is given, \code{auxtitle} specifies a column heading for the extra printed data in the chart, e.g., \code{"N"}} \item{w}{width of picture in inches} \item{h}{height of picture in inches} \item{margin}{a 4-vector representing, in inches, the margin to the left of the x-axis, below the y-axis, to the right of the x-axis, and above the y-axis. By default these are computed making educated cases about how to accommodate \code{auxdata} etc.} \item{lines}{set to \code{FALSE} to suppress drawing of reference lines} \item{dotsize}{diameter of filled circles, in inches, for drawing dots} \item{size}{size of text in picture. This and the next two arguments are LaTeX font commands without the opening backslash, e.g., \code{'normalsize'}, \code{'small'}, \code{'large'}, \code{smaller[2]}.} \item{size.labels}{size of labels} \item{size.group.labels}{size of labels corresponding to \code{groups}} \item{ttlabels}{set to \code{TRUE} to use typewriter monospaced font for labels} \item{sort.}{ set to \code{FALSE} to keep \code{latexDotchart} from sorting the input data, i.e., it will assume that the data are already properly arranged. This is especially useful when you are using \code{gdata} and \code{groups} and you want to control the order that groups appear on the chart (from top to bottom).} \item{xaxis}{set to \code{FALSE} to suppress drawing x-axis} \item{lcolor}{ color for horizontal reference lines. Default is \code{"gray"}} \item{...}{ignored} } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link[Hmisc]{dotchart3}} } \examples{ \dontrun{ z <- latexDotchart(c(.1,.2), c('a','bbAAb'), xlab='This Label', auxdata=c(.1,.2), auxtitle='Zcriteria') f <- '/tmp/t.tex' cat('\\documentclass{article}\n\\usepackage{epic,color}\n\\begin{document}\n', file=f) cat(z, sep='\n', file=f, append=TRUE) cat('\\end{document}\n', file=f, append=TRUE) set.seed(135) maj <- factor(c(rep('North',13),rep('South',13))) g <- paste('Category',rep(letters[1:13],2)) n <- sample(1:15000, 26, replace=TRUE) y1 <- runif(26) y2 <- pmax(0, y1 - runif(26, 0, .1)) z <- latexDotchart(y1, g, groups=maj, auxdata=n, auxtitle='n', xlab='Y', size.group.labels='large', ttlabels=TRUE) f <- '/tmp/t2.tex' cat('\\documentclass{article}\n\\usepackage{epic,color}\n\\begin{document}\n\\framebox{', file=f) cat(z, sep='\n', file=f, append=TRUE) cat('}\\end{document}\n', file=f, append=TRUE) } } \keyword{hplot} Hmisc/man/prselect.Rd0000644000176200001440000000457512243661443014220 0ustar liggesusers\name{prselect} \Rdversion{1.1} \alias{prselect} \title{Selectively Print Lines of a Text Vector} \description{ Given one or two regular expressions or exact text matches, removes elements of the input vector that match these specifications. Omitted lines are replaced by \ldots. This is useful for selectively suppressing some of the printed output of R functions such as regression fitting functions, especially in the context of making statistical reports using Sweave or Odfweave. } \usage{ prselect(x, start = NULL, stop = NULL, i = 0, j = 0, pr = TRUE) } \arguments{ \item{x}{ input character vector } \item{start}{ text or regular expression to look for starting line to omit. If omitted, deletions start at the first line. } \item{stop}{ text or regular expression to look for ending line to omit. If omitted, deletions proceed until the last line. } \item{i}{ increment in number of first line to delete after match is found } \item{j}{ increment in number of last line to delete after match is found } \item{pr}{ set to \code{FALSE} to suppress printing } } \value{an invisible vector of retained lines of text} \author{Frank Harrell} \seealso{ \code{\link[utils]{Sweave}} } \examples{ x <- c('the','cat','ran','past','the','dog') prselect(x, 'big','bad') # omit nothing- no match prselect(x, 'the','past') # omit first 4 lines prselect(x,'the','junk') # omit nothing- no match for stop prselect(x,'ran','dog') # omit last 4 lines prselect(x,'cat') # omit lines 2- prselect(x,'cat',i=1) # omit lines 3- prselect(x,'cat','past') # omit lines 2-4 prselect(x,'cat','past',j=1) # omit lines 2-5 prselect(x,'cat','past',j=-1)# omit lines 2-3 prselect(x,'t$','dog') # omit lines 2-6; t must be at end # Example for Sweave: run a regression analysis with the rms package # then selectively output only a portion of what print.ols prints. # (Thanks to \email{romain.francois@dbmail.com}) # <>= # library(rms) # y <- rnorm(20); x1 <- rnorm(20); x2 <- rnorm(20) # ols(y ~ x1 + x2) # <>= # z <- capture.output( { # <> # } ) # prselect(z, 'Residuals:') # keep only summary stats; or: # prselect(z, stop='Coefficients', j=-1) # keep coefficients, rmse, R^2; or: # prselect(z, 'Coefficients', 'Residual standard error', j=-1) # omit coef # @ } \keyword{manip} \keyword{utilities} Hmisc/man/bpower.Rd0000644000176200001440000001157613714234051013667 0ustar liggesusers\name{bpower} \alias{bpower} \alias{bsamsize} \alias{ballocation} \alias{bpower.sim} \title{ Power and Sample Size for Two-Sample Binomial Test } \description{ Uses method of Fleiss, Tytun, and Ury (but without the continuity correction) to estimate the power (or the sample size to achieve a given power) of a two-sided test for the difference in two proportions. The two sample sizes are allowed to be unequal, but for \code{bsamsize} you must specify the fraction of observations in group 1. For power calculations, one probability (\code{p1}) must be given, and either the other probability (\code{p2}), an \code{odds.ratio}, or a \code{percent.reduction} must be given. For \code{bpower} or \code{bsamsize}, any or all of the arguments may be vectors, in which case they return a vector of powers or sample sizes. All vector arguments must have the same length. Given \code{p1, p2}, \code{ballocation} uses the method of Brittain and Schlesselman to compute the optimal fraction of observations to be placed in group 1 that either (1) minimize the variance of the difference in two proportions, (2) minimize the variance of the ratio of the two proportions, (3) minimize the variance of the log odds ratio, or (4) maximize the power of the 2-tailed test for differences. For (4) the total sample size must be given, or the fraction optimizing the power is not returned. The fraction for (3) is one minus the fraction for (1). \code{bpower.sim} estimates power by simulations, in minimal time. By using \code{bpower.sim} you can see that the formulas without any continuity correction are quite accurate, and that the power of a continuity-corrected test is significantly lower. That's why no continuity corrections are implemented here. } \usage{ bpower(p1, p2, odds.ratio, percent.reduction, n, n1, n2, alpha=0.05) bsamsize(p1, p2, fraction=.5, alpha=.05, power=.8) ballocation(p1, p2, n, alpha=.05) bpower.sim(p1, p2, odds.ratio, percent.reduction, n, n1, n2, alpha=0.05, nsim=10000) } \arguments{ \item{p1}{ population probability in the group 1 } \item{p2}{ probability for group 2 } \item{odds.ratio}{ } \item{percent.reduction}{ } \item{n}{ total sample size over the two groups. If you omit this for \code{ballocation}, the \code{fraction} which optimizes power will not be returned. } \item{n1}{ } \item{n2}{ the individual group sample sizes. For \code{bpower}, if \code{n} is given, \code{n1} and \code{n2} are set to \code{n/2}. } \item{alpha}{ type I error } \item{fraction}{ fraction of observations in group 1 } \item{power}{ the desired probability of detecting a difference } \item{nsim}{ number of simulations of binomial responses }} \value{ for \code{bpower}, the power estimate; for \code{bsamsize}, a vector containing the sample sizes in the two groups; for \code{ballocation}, a vector with 4 fractions of observations allocated to group 1, optimizing the four criteria mentioned above. For \code{bpower.sim}, a vector with three elements is returned, corresponding to the simulated power and its lower and upper 0.95 confidence limits. } \details{ For \code{bpower.sim}, all arguments must be of length one. } \section{AUTHOR}{ Frank Harrell Department of Biostatistics Vanderbilt University \email{fh@fharrell.com} } \references{ Fleiss JL, Tytun A, Ury HK (1980): A simple approximation for calculating sample sizes for comparing independent proportions. Biometrics 36:343--6. Brittain E, Schlesselman JJ (1982): Optimal allocation for the comparison of proportions. Biometrics 38:1003--9. Gordon I, Watson R (1996): The myth of continuity-corrected sample size formulae. Biometrics 52:71--6. } \seealso{ \code{\link{samplesize.bin}}, \code{\link{chisq.test}}, \code{\link{binconf}} } \examples{ bpower(.1, odds.ratio=.9, n=1000, alpha=c(.01,.05)) bpower.sim(.1, odds.ratio=.9, n=1000) bsamsize(.1, .05, power=.95) ballocation(.1, .5, n=100) # Plot power vs. n for various odds ratios (base prob.=.1) n <- seq(10, 1000, by=10) OR <- seq(.2,.9,by=.1) plot(0, 0, xlim=range(n), ylim=c(0,1), xlab="n", ylab="Power", type="n") for(or in OR) { lines(n, bpower(.1, odds.ratio=or, n=n)) text(350, bpower(.1, odds.ratio=or, n=350)-.02, format(or)) } # Another way to plot the same curves, but letting labcurve do the # work, including labeling each curve at points of maximum separation pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), n=n) names(pow) <- format(OR) labcurve(pow, pl=TRUE, xlab='n', ylab='Power') # Contour graph for various probabilities of outcome in the control # group, fixing the odds ratio at .8 ([p2/(1-p2) / p1/(1-p1)] = .8) # n is varied also p1 <- seq(.01,.99,by=.01) n <- seq(100,5000,by=250) pow <- outer(p1, n, function(p1,n) bpower(p1, n=n, odds.ratio=.8)) # This forms a length(p1)*length(n) matrix of power estimates contour(p1, n, pow) } \keyword{htest} \keyword{category} \concept{power} \concept{study design} Hmisc/man/rcspline.restate.Rd0000644000176200001440000001175114275452753015667 0ustar liggesusers\name{rcspline.restate} \alias{rcspline.restate} \alias{rcsplineFunction} \title{ Re-state Restricted Cubic Spline Function } \description{ This function re-states a restricted cubic spline function in the un-linearly-restricted form. Coefficients for that form are returned, along with an \R functional representation of this function and a LaTeX character representation of the function. \code{rcsplineFunction} is a fast function that creates a function to compute a restricted cubic spline function with given coefficients and knots, without reformatting the function to be pretty (i.e., into unrestricted form). } \usage{ rcspline.restate(knots, coef, type=c("ordinary","integral"), x="X", lx=nchar(x), norm=2, columns=65, before="& &", after="\\\\", begin="", nbegin=0, digits=max(8, .Options$digits)) rcsplineFunction(knots, coef, norm=2, type=c('ordinary', 'integral')) } \arguments{ \item{knots}{ vector of knots used in the regression fit } \item{coef}{ vector of coefficients from the fit. If the length of \code{coef} is \eqn{k-1}, where k is equal to the \code{length(knots)}, the first coefficient must be for the linear term and remaining \eqn{k-2} coefficients must be for the constructed terms (e.g., from \code{rcspline.eval}). If the length of \code{coef} is k, an intercept is assumed to be in the first element (or a zero is prepended to \code{coef} for \code{rcsplineFunction}). } \item{type}{ The default is to represent the cubic spline function corresponding to the coefficients and knots. Set \code{type = "integral"} to instead represent its anti-derivative. } \item{x}{ a character string to use as the variable name in the LaTeX expression for the formula. } \item{lx}{ length of \code{x} to count with respect to \code{columns}. Default is length of character string contained by \code{x}. You may want to set \code{lx} smaller than this if it includes non-printable LaTeX commands. } \item{norm}{ normalization that was used in deriving the original nonlinear terms used in the fit. See \code{rcspline.eval} for definitions. } \item{columns}{ maximum number of symbols in the LaTeX expression to allow before inserting a newline (\samp{\\\\}) command. Set to a very large number to keep text all on one line. } \item{before}{ text to place before each line of LaTeX output. Use \samp{"& &"} for an equation array environment in LaTeX where you want to have a left-hand prefix e.g. \samp{"f(X) & = &"} or using \samp{"\\lefteqn"}. } \item{after}{ text to place at the end of each line of output. } \item{begin}{ text with which to start the first line of output. Useful when adding LaTeX output to part of an existing formula } \item{nbegin}{ number of columns of printable text in \code{begin} } \item{digits}{ number of significant digits to write for coefficients and knots } } \value{ \code{rcspline.restate} returns a vector of coefficients. The coefficients are un-normalized and two coefficients are added that are linearly dependent on the other coefficients and knots. The vector of coefficients has four attributes. \code{knots} is a vector of knots, \code{latex} is a vector of text strings with the LaTeX representation of the formula. \code{columns.used} is the number of columns used in the output string since the last newline command. \code{function} is an \R function, which is also return in character string format as the \code{text} attribute. \code{rcsplineFunction} returns an \R function with arguments \code{x} (a user-supplied numeric vector at which to evaluate the function), and some automatically-supplied other arguments. } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{rcspline.eval}}, \code{\link{ns}}, \code{\link[rms]{rcs}}, \code{\link{latex}}, \code{\link{Function.transcan}} } \examples{ set.seed(1) x <- 1:100 y <- (x - 50)^2 + rnorm(100, 0, 50) plot(x, y) xx <- rcspline.eval(x, inclx=TRUE, nk=4) knots <- attr(xx, "knots") coef <- lsfit(xx, y)$coef options(digits=4) # rcspline.restate must ignore intercept w <- rcspline.restate(knots, coef[-1], x="{\\\\rm BP}") # could also have used coef instead of coef[-1], to include intercept cat(attr(w,"latex"), sep="\n") xtrans <- eval(attr(w, "function")) # This is an S function of a single argument lines(x, coef[1] + xtrans(x), type="l") # Plots fitted transformation xtrans <- rcsplineFunction(knots, coef) xtrans lines(x, xtrans(x), col='blue') #x <- blood.pressure xx.simple <- cbind(x, pmax(x-knots[1],0)^3, pmax(x-knots[2],0)^3, pmax(x-knots[3],0)^3, pmax(x-knots[4],0)^3) pred.value <- coef[1] + xx.simple \%*\% w plot(x, pred.value, type='l') # same as above } \keyword{regression} \keyword{interface} \keyword{character} % Converted by Sd2Rd version 1.21. Hmisc/man/stringDims.Rd0000644000176200001440000000167412243661443014517 0ustar liggesusers\name{stringDims} \alias{stringDims} %- Also NEED an '\alias' for EACH other topic documented here. \title{String Dimentions} \description{ Finds the height and width of all the string in a character vector. } \usage{ stringDims(string) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{string}{vector of strings} } \details{ \code{stringDims} finds the number of characters in width and number of lines in height for each string in the \code{string} argument. } \value{ \item{height}{a vector of the number of lines in each string.} \item{width}{a vector with the number of character columns in the longest line.} } \author{Charles Dupont} \seealso{\code{\link{string.bounding.box}}, \code{\link{nchar}}} \examples{ a <- c("this is a single line string", "This is a\nmulty line string") stringDims(a) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{print} Hmisc/man/dotchart2.Rd0000644000176200001440000001301613714234051014252 0ustar liggesusers\name{dotchart2} \alias{dotchart2} \title{ Enhanced Dot Chart } \description{ \code{dotchart2} is an enhanced version of the \code{dotchart} function with several new options. } \usage{ dotchart2(data, labels, groups=NULL, gdata=NA, horizontal=TRUE, pch=16, xlab='', ylab='', xlim=NULL, auxdata, auxgdata=NULL, auxtitle, lty=1, lines=TRUE, dotsize = .8, cex = par("cex"), cex.labels = cex, cex.group.labels = cex.labels*1.25, sort.=TRUE, add=FALSE, dotfont=par('font'), groupfont=2, reset.par=add, xaxis=TRUE, width.factor=1.1, lcolor='gray', leavepar=FALSE, axisat=NULL, axislabels=NULL, ...) } \arguments{ \item{data}{a numeric vector whose values are shown on the x-axis} \item{labels}{a vector of labels for each point, corresponding to \code{x}. If omitted, \code{names(data)} are used, and if there are no \code{names}, integers prefixed by \code{"#"} are used.} \item{groups}{an optional categorical variable indicating how \code{data} values are grouped} \item{gdata}{data values for groups, typically summaries such as group medians} \item{horizontal}{set to \code{FALSE} to make the chart vertical instead of the default} \item{pch}{ default character number or value for plotting dots in dot charts. The default is 16.} \item{xlab}{x-axis title} \item{ylab}{y-axis title} \item{xlim}{x-axis limits. Applies only to \code{horizontal=TRUE}.} \item{auxdata}{ a vector of auxiliary data given to \code{dotchart2}, of the same length as the first (\code{data}) argument. If present, this vector of values will be printed outside the right margin of the dot chart. Usually \code{auxdata} represents cell sizes. } \item{auxgdata}{ similar to \code{auxdata} but corresponding to the \code{gdata} argument. These usually represent overall sample sizes for each group of lines.} \item{auxtitle}{ if \code{auxdata} is given, \code{auxtitle} specifies a column heading for the extra printed data in the chart, e.g., \code{"N"}} \item{lty}{line type for horizontal lines. Default is 1 for R, 2 for S-Plus} \item{lines}{set to \code{FALSE} to suppress drawing of reference lines} \item{dotsize}{ \code{cex} value for drawing dots. Default is 0.8. Note that the original \code{dotchart} function used a default of 1.2.} \item{cex}{see \code{\link{par}}} \item{cex.labels}{ \code{cex} parameter that applies only to the line labels for the dot chart \code{cex} parameter for major grouping labels for \code{dotchart2}. Defaults to \code{cex}.} \item{cex.group.labels}{value of \code{cex} corresponding to \code{gdata}} \item{sort.}{ set to \code{FALSE} to keep \code{dotchart2} from sorting the input data, i.e., it will assume that the data are already properly arranged. This is especially useful when you are using \code{gdata} and \code{groups} and you want to control the order that groups appear on the chart (from top to bottom).} \item{add}{set to \code{TRUE} to add to an existing plot} \item{dotfont}{ font number of plotting dots. Default is one. Use \code{-1} to use "outline" fonts. For example, \code{pch=183, dotfont=-1} plots an open circle for UNIX on postscript. \code{pch=1} makes an open octagon under Windows.} \item{groupfont}{ font number to use in drawing \code{group} labels for \code{dotchart2}. Default is \code{2} for boldface. } \item{reset.par}{ set to \code{FALSE} to cause \code{dotchart2} to not reset the \code{par} parameters when finished. This is useful when \code{add=TRUE} is about to be used in another call. The default is to reset the \code{par} parameters if \code{add=TRUE} and not if \code{add=FALSE}, i.e., the program assumes that only one set of points will be added to an existing set. If you fail to use \code{reset.par=TRUE} for the first of a series of plots, the next call to \code{plot} with \code{add=TRUE} will result in distorted x-axis scaling.} \item{xaxis}{set to \code{FALSE} to suppress drawing x-axis} \item{width.factor}{ When the calculated left margin turns out to be faulty, specify a factor by which to multiple the left margin as \code{width.factor} to get the appropriate space for labels on horizonal charts.} \item{lcolor}{ color for horizontal reference lines. Default is \code{"gray"} for R, \code{par("col")} for S-Plus.} \item{leavepar}{set to \code{TRUE} to leave \code{par()} unchanged. This assumes the user has allocated sufficient left and right margins for a horizontal dot chart.} \item{axisat}{a vector of tick mark locations to pass to \code{axis}. Useful if transforming the data axis} \item{axislabels}{a vector of strings specifying axis tick mark labels. Useful if transforming the data axis} \item{...}{arguments passed to \code{plot.default}} } \section{Side Effects}{ \code{dotchart} will leave \code{par} altered if \code{reset.par=FALSE}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{dotchart}} } \examples{ set.seed(135) maj <- factor(c(rep('North',13),rep('South',13))) g <- paste('Category',rep(letters[1:13],2)) n <- sample(1:15000, 26, replace=TRUE) y1 <- runif(26) y2 <- pmax(0, y1 - runif(26, 0, .1)) dotchart2(y1, g, groups=maj, auxdata=n, auxtitle='n', xlab='Y') dotchart2(y2, g, groups=maj, pch=17, add=TRUE) ## Compare with dotchart function (no superpositioning or auxdata allowed): ## dotchart(y1, g, groups=maj, xlab='Y') ## To plot using a transformed scale add for example ## axisat=sqrt(pretty(y)), axislabels=pretty(y) } \keyword{hplot} Hmisc/man/getHdata.Rd0000644000176200001440000000565614270267756014134 0ustar liggesusers\name{getHdata} \alias{getHdata} \title{ Download and Install Datasets for \pkg{Hmisc}, \pkg{rms}, and Statistical Modeling } \description{ This function downloads and makes ready to use datasets from the main web site for the \pkg{Hmisc} and \pkg{rms} libraries. For \R, the datasets were stored in compressed \code{\link{save}} format and \code{getHdata} makes them available by running \code{\link{load}} after download. For S-Plus, the datasets were stored in \code{data.dump} format and are made available by running \code{data.restore} after import. The dataset is run through the \code{\link{cleanup.import}} function. Calling \code{getHdata} with no \code{file} argument provides a character vector of names of available datasets that are currently on the web site. For \R, \R's default browser can optionally be launched to view \verb{html} files that were already prepared using the \pkg{Hmisc} command \code{html(contents())} or to view \file{.txt} or \file{.html} data description files when available. If \code{options(localHfiles=TRUE)} the scripts are read from local directory \code{~/web/data/repo} instead of from the web server. } \usage{ getHdata(file, what = c("data", "contents", "description", "all"), where="https://hbiostat.org/data/repo") } \arguments{ \item{file}{ an unquoted name of a dataset on the web site, e.g. \samp{prostate}. Omit \code{file} to obtain a list of available datasets. } \item{what}{ specify \code{what="contents"} to browse the contents (metadata) for the dataset rather than fetching the data themselves. Specify \code{what="description"} to browse a data description file if available. Specify \code{what="all"} to retrieve the data and see the metadata and description. } \item{where}{ \acronym{URL} containing the data and metadata files } } \value{ \code{getHdata()} without a \code{file} argument returns a character vector of dataset base names. When a dataset is downloaded, the data frame is placed in search position one and is not returned as value of \code{getHdata}. } \author{Frank Harrell} \seealso{ \code{\link{download.file}}, \code{\link{cleanup.import}}, \code{\link[foreign:read.S]{data.restore}}, \code{\link{load}} } \examples{ \dontrun{ getHdata() # download list of available datasets getHdata(prostate) # downloads, load( ) or data.restore( ) # runs cleanup.import for S-Plus 6 getHdata(valung, "contents") # open browser (options(browser="whatever")) # after downloading valung.html # (result of html(contents())) getHdata(support, "all") # download and open one browser window datadensity(support) attach(support) # make individual variables available getHdata(plasma, "all") # download and open two browser windows # (description file is available for plasma) } } \keyword{interface} \keyword{data} Hmisc/man/cut2.Rd0000644000176200001440000000463713412474411013247 0ustar liggesusers\name{cut2} \alias{cut2} \title{Cut a Numeric Variable into Intervals} \description{ Function like cut but left endpoints are inclusive and labels are of the form \code{[lower, upper)}, except that last interval is \code{[lower,upper]}. If cuts are given, will by default make sure that cuts include entire range of \code{x}. Also, if cuts are not given, will cut \code{x} into quantile groups (\code{g} given) or groups with a given minimum number of observations (\code{m}). Whereas cut creates a category object, \code{cut2} creates a factor object. } \usage{ cut2(x, cuts, m=150, g, levels.mean=FALSE, digits, minmax=TRUE, oneval=TRUE, onlycuts=FALSE, formatfun=format, \dots) } \arguments{ \item{x}{ numeric vector to classify into intervals } \item{cuts}{ cut points } \item{m}{ desired minimum number of observations in a group. The algorithm does not guarantee that all groups will have at least \code{m} observations. } \item{g}{ number of quantile groups } \item{levels.mean}{ set to \code{TRUE} to make the new categorical vector have levels attribute that is the group means of \code{x} instead of interval endpoint labels } \item{digits}{ number of significant digits to use in constructing levels. Default is 3 (5 if \code{levels.mean=TRUE}) } \item{minmax}{ if cuts is specified but \code{min(x)max(cuts)}, augments cuts to include min and max \code{x} } \item{oneval}{ if an interval contains only one unique value, the interval will be labeled with the formatted version of that value instead of the interval endpoints, unless \code{oneval=FALSE} } \item{onlycuts}{ set to \code{TRUE} to only return the vector of computed cuts. This consists of the interior values plus outer ranges. } \item{formatfun}{ formatting function, supports formula notation (if \code{rlang} is installed) } \item{\dots}{ additional arguments passed to \code{formatfun} } } \value{ a factor variable with levels of the form \code{[a,b)} or formatted means (character strings) unless \code{onlycuts} is \code{TRUE} in which case a numeric vector is returned } \seealso{ \code{\link{cut}}, \code{\link{quantile}} } \examples{ set.seed(1) x <- runif(1000, 0, 100) z <- cut2(x, c(10,20,30)) table(z) table(cut2(x, g=10)) # quantile groups table(cut2(x, m=50)) # group x into intevals with at least 50 obs. } \keyword{category} \keyword{nonparametric} \concept{grouping} \concept{categorization} \concept{discretization} Hmisc/man/show.pch.Rd0000644000176200001440000000162512243661443014121 0ustar liggesusers\name{show.pch} \alias{show.pch} \alias{show.col} \alias{character.table} \title{Display Colors, Plotting Symbols, and Symbol Numeric Equivalents} \description{ \code{show.pch} plots the definitions of the \code{pch} parameters. \code{show.col} plots definitions of integer-valued colors. \code{character.table} draws numeric equivalents of all latin characters; the character on line \code{xy} and column \code{z} of the table has numeric code \code{"xyz"}, which you would surround in quotes and preceed by a backslash. } \usage{ show.pch(object = par("font")) show.col(object=NULL) character.table(font=1) } \arguments{ \item{object}{font for \code{show.pch}, ignored for \code{show.col}.} \item{font}{font} } \author{Pierre Joyet \email{pierre.joyet@bluewin.ch}, Frank Harrell} \seealso{\code{\link{points}}, \code{\link{text}}} \examples{ \dontrun{ show.pch() show.col() character.table() }} \keyword{aplot} Hmisc/man/curveRep.Rd0000644000176200001440000003375514244143522014170 0ustar liggesusers\name{curveRep} \alias{curveRep} \alias{print.curveRep} \alias{plot.curveRep} \alias{curveSmooth} \title{Representative Curves} \description{\code{curveRep} finds representative curves from a relatively large collection of curves. The curves usually represent time-response profiles as in serial (longitudinal or repeated) data with possibly unequal time points and greatly varying sample sizes per subject. After excluding records containing missing \code{x} or \code{y}, records are first stratified into \code{kn} groups having similar sample sizes per curve (subject). Within these strata, curves are next stratified according to the distribution of \code{x} points per curve (typically measurement times per subject). The \code{\link[cluster]{clara}} clustering/partitioning function is used to do this, clustering on one, two, or three \code{x} characteristics depending on the minimum sample size in the current interval of sample size. If the interval has a minimum number of unique \code{values} of one, clustering is done on the single \code{x} values. If the minimum number of unique \code{x} values is two, clustering is done to create groups that are similar on both \code{min(x)} and \code{max(x)}. For groups containing no fewer than three unique \code{x} values, clustering is done on the trio of values \code{min(x)}, \code{max(x)}, and the longest gap between any successive \code{x}. Then within sample size and \code{x} distribution strata, clustering of time-response profiles is based on \code{p} values of \code{y} all evaluated at the same \code{p} equally-spaced \code{x}'s within the stratum. An option allows per-curve data to be smoothed with \code{\link{lowess}} before proceeding. Outer \code{x} values are taken as extremes of \code{x} across all curves within the stratum. Linear interpolation within curves is used to estimate \code{y} at the grid of \code{x}'s. For curves within the stratum that do not extend to the most extreme \code{x} values in that stratum, extrapolation uses flat lines from the observed extremes in the curve unless \code{extrap=TRUE}. The \code{p} \code{y} values are clustered using \code{\link[cluster]{clara}}. \code{print} and \code{plot} methods show results. By specifying an auxiliary \code{idcol} variable to \code{plot}, other variables such as treatment may be depicted to allow the analyst to determine for example whether subjects on different treatments are assigned to different time-response profiles. To write the frequencies of a variable such as treatment in the upper left corner of each panel (instead of the grand total number of clusters in that panel), specify \code{freq}. \code{curveSmooth} takes a set of curves and smooths them using \code{\link{lowess}}. If the number of unique \code{x} points in a curve is less than \code{p}, the smooth is evaluated at the unique \code{x} values. Otherwise it is evaluated at an equally spaced set of \code{x} points over the observed range. If fewer than 3 unique \code{x} values are in a curve, those points are used and smoothing is not done. } \usage{ curveRep(x, y, id, kn = 5, kxdist = 5, k = 5, p = 5, force1 = TRUE, metric = c("euclidean", "manhattan"), smooth=FALSE, extrap=FALSE, pr=FALSE) \method{print}{curveRep}(x, \dots) \method{plot}{curveRep}(x, which=1:length(res), method=c('all','lattice','data'), m=NULL, probs=c(.5, .25, .75), nx=NULL, fill=TRUE, idcol=NULL, freq=NULL, plotfreq=FALSE, xlim=range(x), ylim=range(y), xlab='x', ylab='y', colorfreq=FALSE, \dots) curveSmooth(x, y, id, p=NULL, pr=TRUE) } \arguments{ \item{x}{a numeric vector, typically measurement times. For \code{plot.curveRep} is an object created by \code{curveRep}.} \item{y}{a numeric vector of response values} \item{id}{a vector of curve (subject) identifiers, the same length as \code{x} and \code{y}} \item{kn}{number of curve sample size groups to construct. \code{curveRep} tries to divide the data into equal numbers of curves across sample size intervals.} \item{kxdist}{maximum number of x-distribution clusters to derive using \code{clara}} \item{k}{maximum number of x-y profile clusters to derive using \code{clara}} \item{p}{number of \code{x} points at which to interpolate \code{y} for profile clustering. For \code{curveSmooth} is the number of equally spaced points at which to evaluate the lowess smooth, and if \code{p} is omitted the smooth is evaluated at the original \code{x} values (which will allow \code{curveRep} to still know the \code{x} distribution} \item{force1}{By default if any curves have only one point, all curves consisting of one point will be placed in a separate stratum. To prevent this separation, set \code{force1 = FALSE}.} \item{metric}{see \code{\link[cluster]{clara}}} \item{smooth}{By default, linear interpolation is used on raw data to obtain \code{y} values to cluster to determine x-y profiles. Specify \code{smooth = TRUE} to replace observed points with \code{\link{lowess}} before computing \code{y} points on the grid. Also, when \code{smooth} is used, it may be desirable to use \code{extrap=TRUE}.} \item{extrap}{set to \code{TRUE} to use linear extrapolation to evaluate \code{y} points for x-y clustering. Not recommended unless smoothing has been or is being done.} \item{pr}{set to \code{TRUE} to print progress notes} \item{which}{an integer vector specifying which sample size intervals to plot. Must be specified if \code{method='lattice'} and must be a single number in that case.} \item{method}{The default makes individual plots of possibly all x-distribution by sample size by cluster combinations. Fewer may be plotted by specifying \code{which}. Specify \code{method='lattice'} to show a lattice \code{xyplot} of a single sample size interval, with x distributions going across and clusters going down. To not plot but instead return a data frame for a single sample size interval, specify \code{method='data'}} \item{m}{the number of curves in a cluster to randomly sample if there are more than \code{m} in a cluster. Default is to draw all curves in a cluster. For \code{method = "lattice"} you can specify \code{m = "quantiles"} to use the \code{xYplot} function to show quantiles of \code{y} as a function of \code{x}, with the quantiles specified by the \code{probs} argument. This cannot be used to draw a group containing \code{n = 1}.} \item{nx}{applies if \code{m = "quantiles"}. See \code{\link{xYplot}}.} \item{probs}{3-vector of probabilities with the central quantile first. Default uses quartiles.} \item{fill}{for \code{method = "all"}, by default if a sample size x-distribution stratum did not have enough curves to stratify into \code{k} x-y profiles, empty graphs are drawn so that a matrix of graphs will have the next row starting with a different sample size range or x-distribution. See the example below.} \item{idcol}{a named vector to be used as a table lookup for color assignments (does not apply when \code{m = "quantile"}). The names of this vector are curve \code{id}s and the values are color names or numbers.} \item{freq}{a named vector to be used as a table lookup for a grouping variable such as treatment. The names are curve \code{id}s and values are any values useful for grouping in a frequency tabulation.} \item{plotfreq}{set to \code{TRUE} to plot the frequencies from the \code{freq} variable as horizontal bars instead of printing them. Applies only to \code{method = "lattice"}. By default the largest bar is 0.1 times the length of a panel's x-axis. Specify \code{plotfreq = 0.5} for example to make the longest bar half this long.} \item{colorfreq}{set to \code{TRUE} to color the frequencies printed by \code{plotfreq} using the colors provided by \code{idcol}.} \item{xlim, ylim, xlab, ylab}{plotting parameters. Default ranges are the ranges in the entire set of raw data given to \code{curveRep}.} \item{\dots}{arguments passed to other functions.} } \value{a list of class \code{"curveRep"} with the following elements \item{res}{a hierarchical list first split by sample size intervals, then by x distribution clusters, then containing a vector of cluster numbers with \code{id} values as a names attribute} \item{ns}{a table of frequencies of sample sizes per curve after removing \code{NA}s} \item{nomit}{total number of records excluded due to \code{NA}s} \item{missfreq}{a table of frequencies of number of \code{NA}s excluded per curve} \item{ncuts}{cut points for sample size intervals} \item{kn}{number of sample size intervals} \item{kxdist}{number of clusters on x distribution} \item{k}{number of clusters of curves within sample size and distribution groups} \item{p}{number of points at which to evaluate each curve for clustering} \item{x}{} \item{y}{} \item{id}{input data after removing \code{NA}s} \code{curveSmooth} returns a list with elements \code{x,y,id}. } \details{ In the graph titles for the default graphic output, \code{n} refers to the minimum sample size, \code{x} refers to the sequential x-distribution cluster, and \code{c} refers to the sequential x-y profile cluster. Graphs from \code{method = "lattice"} are produced by \code{\link[lattice]{xyplot}} and in the panel titles \code{distribution} refers to the x-distribution stratum and \code{cluster} refers to the x-y profile cluster. } \references{ Segal M. (1994): Representative curves for longitudinal data via regression trees. J Comp Graph Stat 3:214-233. Jones MC, Rice JA (1992): Displaying the important features of large collections of similar curves. Am Statistician 46:140-145. Zheng X, Simpson JA, et al (2005): Data from a study of effectiveness suggested potential prognostic factors related to the patterns of shoulder pain. J Clin Epi 58:823-830. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \note{The references describe other methods for deriving representative curves, but those methods were not used here. The last reference which used a cluster analysis on principal components motivated \code{curveRep} however. The \code{kml} package does k-means clustering of longitudinal data with imputation.} \seealso{\code{\link[cluster]{clara}},\code{\link[Hmisc]{dataRep}}} \examples{ \dontrun{ # Simulate 200 curves with per-curve sample sizes ranging from 1 to 10 # Make curves with odd-numbered IDs have an x-distribution that is random # uniform [0,1] and those with even-numbered IDs have an x-dist. that is # half as wide but still centered at 0.5. Shift y values higher with # increasing IDs set.seed(1) N <- 200 nc <- sample(1:10, N, TRUE) id <- rep(1:N, nc) x <- y <- id for(i in 1:N) { x[id==i] <- if(i \%\% 2) runif(nc[i]) else runif(nc[i], c(.25, .75)) y[id==i] <- i + 10*(x[id==i] - .5) + runif(nc[i], -10, 10) } w <- curveRep(x, y, id, kxdist=2, p=10) w par(ask=TRUE, mfrow=c(4,5)) plot(w) # show everything, profiles going across par(mfrow=c(2,5)) plot(w,1) # show n=1 results # Use a color assignment table, assigning low curves to green and # high to red. Unique curve (subject) IDs are the names of the vector. cols <- c(rep('green', N/2), rep('red', N/2)) names(cols) <- as.character(1:N) plot(w, 3, idcol=cols) par(ask=FALSE, mfrow=c(1,1)) plot(w, 1, 'lattice') # show n=1 results plot(w, 3, 'lattice') # show n=4-5 results plot(w, 3, 'lattice', idcol=cols) # same but different color mapping plot(w, 3, 'lattice', m=1) # show a single "representative" curve # Show median, 10th, and 90th percentiles of supposedly representative curves plot(w, 3, 'lattice', m='quantiles', probs=c(.5,.1,.9)) # Same plot but with much less grouping of x variable plot(w, 3, 'lattice', m='quantiles', probs=c(.5,.1,.9), nx=2) # Use ggplot2 for one sample size interval z <- plot(w, 2, 'data') ggplot(z, aes(x, y, color=curve)) + geom_line() + facet_grid(distribution ~ cluster) + theme(legend.position='none') + labs(caption=z$ninterval[1]) # Smooth data before profiling. This allows later plotting to plot # smoothed representative curves rather than raw curves (which # specifying smooth=TRUE to curveRep would do, if curveSmooth was not used) d <- curveSmooth(x, y, id) w <- with(d, curveRep(x, y, id)) # Example to show that curveRep can cluster profiles correctly when # there is no noise. In the data there are four profiles - flat, flat # at a higher mean y, linearly increasing then flat, and flat at the # first height except for a sharp triangular peak set.seed(1) x <- 0:100 m <- length(x) profile <- matrix(NA, nrow=m, ncol=4) profile[,1] <- rep(0, m) profile[,2] <- rep(3, m) profile[,3] <- c(0:3, rep(3, m-4)) profile[,4] <- c(0,1,3,1,rep(0,m-4)) col <- c('black','blue','green','red') matplot(x, profile, type='l', col=col) xeval <- seq(0, 100, length.out=5) s <- x %in% xeval matplot(x[s], profile[s,], type='l', col=col) id <- rep(1:100, each=m) X <- Y <- id cols <- character(100) names(cols) <- as.character(1:100) for(i in 1:100) { s <- id==i X[s] <- x j <- sample(1:4,1) Y[s] <- profile[,j] cols[i] <- col[j] } table(cols) yl <- c(-1,4) w <- curveRep(X, Y, id, kn=1, kxdist=1, k=4) plot(w, 1, 'lattice', idcol=cols, ylim=yl) # Found 4 clusters but two have same profile w <- curveRep(X, Y, id, kn=1, kxdist=1, k=3) plot(w, 1, 'lattice', idcol=cols, freq=cols, plotfreq=TRUE, ylim=yl) # Incorrectly combined black and red because default value p=5 did # not result in different profiles at x=xeval w <- curveRep(X, Y, id, kn=1, kxdist=1, k=4, p=40) plot(w, 1, 'lattice', idcol=cols, ylim=yl) # Found correct clusters because evaluated curves at 40 equally # spaced points and could find the sharp triangular peak in profile 4 } } \keyword{multivariate} \keyword{hplot} \concept{repeated measures} \concept{longitudinal data} \concept{serial data} \concept{representative curves} \concept{descriptive statistics} \concept{exploratory data analysis} Hmisc/man/summarize.Rd0000644000176200001440000002267713714234043014412 0ustar liggesusers\name{summarize} \alias{summarize} \alias{asNumericMatrix} \alias{matrix2dataFrame} \title{Summarize Scalars or Matrices by Cross-Classification} \description{ \code{summarize} is a fast version of \code{summary.formula(formula, method="cross",overall=FALSE)} for producing stratified summary statistics and storing them in a data frame for plotting (especially with trellis \code{xyplot} and \code{dotplot} and Hmisc \code{xYplot}). Unlike \code{aggregate}, \code{summarize} accepts a matrix as its first argument and a multi-valued \code{FUN} argument and \code{summarize} also labels the variables in the new data frame using their original names. Unlike methods based on \code{tapply}, \code{summarize} stores the values of the stratification variables using their original types, e.g., a numeric \code{by} variable will remain a numeric variable in the collapsed data frame. \code{summarize} also retains \code{"label"} attributes for variables. \code{summarize} works especially well with the Hmisc \code{xYplot} function for displaying multiple summaries of a single variable on each panel, such as means and upper and lower confidence limits. \code{asNumericMatrix} converts a data frame into a numeric matrix, saving attributes to reverse the process by \code{matrix2dataframe}. It saves attributes that are commonly preserved across row subsetting (i.e., it does not save \code{dim}, \code{dimnames}, or \code{names} attributes). \code{matrix2dataFrame} converts a numeric matrix back into a data frame if it was created by \code{asNumericMatrix}. } \usage{ summarize(X, by, FUN, \dots, stat.name=deparse(substitute(X)), type=c('variables','matrix'), subset=TRUE, keepcolnames=FALSE) asNumericMatrix(x) matrix2dataFrame(x, at=attr(x, 'origAttributes'), restoreAll=TRUE) } \arguments{ \item{X}{ a vector or matrix capable of being operated on by the function specified as the \code{FUN} argument } \item{by}{ one or more stratification variables. If a single variable, \code{by} may be a vector, otherwise it should be a list. Using the Hmisc \code{llist} function instead of \code{list} will result in individual variable names being accessible to \code{summarize}. For example, you can specify \code{llist(age.group,sex)} or \code{llist(Age=age.group,sex)}. The latter gives \code{age.group} a new temporary name, \code{Age}. } \item{FUN}{ a function of a single vector argument, used to create the statistical summaries for \code{summarize}. \code{FUN} may compute any number of statistics. } \item{...}{extra arguments are passed to \code{FUN}} \item{stat.name}{ the name to use when creating the main summary variable. By default, the name of the \code{X} argument is used. Set \code{stat.name} to \code{NULL} to suppress this name replacement. } \item{type}{ Specify \code{type="matrix"} to store the summary variables (if there are more than one) in a matrix. } \item{subset}{ a logical vector or integer vector of subscripts used to specify the subset of data to use in the analysis. The default is to use all observations in the data frame. } \item{keepcolnames}{by default when \code{type="matrix"}, the first column of the computed matrix is the name of the first argument to \code{summarize}. Set \code{keepcolnames=TRUE} to retain the name of the first column created by \code{FUN}} \item{x}{ a data frame (for \code{asNumericMatrix}) or a numeric matrix (for \code{matrix2dataFrame}). } \item{at}{List containing attributes of original data frame that survive subsetting. Defaults to attribute \code{"origAttributes"} of the object \code{x}, created by the call to \code{asNumericMatrix}} \item{restoreAll}{ set to \code{FALSE} to only restore attributes \code{label}, \code{units}, and \code{levels} instead of all attributes } } \value{ For \code{summarize}, a data frame containing the \code{by} variables and the statistical summaries (the first of which is named the same as the \code{X} variable unless \code{stat.name} is given). If \code{type="matrix"}, the summaries are stored in a single variable in the data frame, and this variable is a matrix. \code{asNumericMatrix} returns a numeric matrix and stores an object \code{origAttributes} as an attribute of the returned object, with original attributes of component variables, the \code{storage.mode}. \code{matrix2dataFrame} returns a data frame. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{label}}, \code{\link{cut2}}, \code{\link{llist}}, \code{\link{by}} } \examples{ \dontrun{ s <- summarize(ap>1, llist(size=cut2(sz, g=4), bone), mean, stat.name='Proportion') dotplot(Proportion ~ size | bone, data=s7) } set.seed(1) temperature <- rnorm(300, 70, 10) month <- sample(1:12, 300, TRUE) year <- sample(2000:2001, 300, TRUE) g <- function(x)c(Mean=mean(x,na.rm=TRUE),Median=median(x,na.rm=TRUE)) summarize(temperature, month, g) mApply(temperature, month, g) mApply(temperature, month, mean, na.rm=TRUE) w <- summarize(temperature, month, mean, na.rm=TRUE) library(lattice) xyplot(temperature ~ month, data=w) # plot mean temperature by month w <- summarize(temperature, llist(year,month), quantile, probs=c(.5,.25,.75), na.rm=TRUE, type='matrix') xYplot(Cbind(temperature[,1],temperature[,-1]) ~ month | year, data=w) mApply(temperature, llist(year,month), quantile, probs=c(.5,.25,.75), na.rm=TRUE) # Compute the median and outer quartiles. The outer quartiles are # displayed using "error bars" set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) attach(dfr) y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) s mApply(y, llist(month,year), smedian.hilow, conf.int=.5) xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, keys='lines', method='alt') # Can also do: s <- summarize(y, llist(month,year), quantile, probs=c(.5,.25,.75), stat.name=c('y','Q1','Q3')) xYplot(Cbind(y, Q1, Q3) ~ month, groups=year, data=s, keys='lines') # To display means and bootstrapped nonparametric confidence intervals # use for example: s <- summarize(y, llist(month,year), smean.cl.boot) xYplot(Cbind(y, Lower, Upper) ~ month | year, data=s) # For each subject use the trapezoidal rule to compute the area under # the (time,response) curve using the Hmisc trap.rule function x <- cbind(time=c(1,2,4,7, 1,3,5,10),response=c(1,3,2,4, 1,3,2,4)) subject <- c(rep(1,4),rep(2,4)) trap.rule(x[1:4,1],x[1:4,2]) summarize(x, subject, function(y) trap.rule(y[,1],y[,2])) \dontrun{ # Another approach would be to properly re-shape the mm array below # This assumes no missing cells. There are many other approaches. # mApply will do this well while allowing for missing cells. m <- tapply(y, list(year,month), quantile, probs=c(.25,.5,.75)) mm <- array(unlist(m), dim=c(3,2,12), dimnames=list(c('lower','median','upper'),c('1997','1998'), as.character(1:12))) # aggregate will help but it only allows you to compute one quantile # at a time; see also the Hmisc mApply function dframe <- aggregate(y, list(Year=year,Month=month), quantile, probs=.5) # Compute expected life length by race assuming an exponential # distribution - can also use summarize g <- function(y) { # computations for one race group futime <- y[,1]; event <- y[,2] sum(futime)/sum(event) # assume event=1 for death, 0=alive } mApply(cbind(followup.time, death), race, g) # To run mApply on a data frame: xn <- asNumericMatrix(x) m <- mApply(xn, race, h) # Here assume h is a function that returns a matrix similar to x matrix2dataFrame(m) # Get stratified weighted means g <- function(y) wtd.mean(y[,1],y[,2]) summarize(cbind(y, wts), llist(sex,race), g, stat.name='y') mApply(cbind(y,wts), llist(sex,race), g) # Compare speed of mApply vs. by for computing d <- data.frame(sex=sample(c('female','male'),100000,TRUE), country=sample(letters,100000,TRUE), y1=runif(100000), y2=runif(100000)) g <- function(x) { y <- c(median(x[,'y1']-x[,'y2']), med.sum =median(x[,'y1']+x[,'y2'])) names(y) <- c('med.diff','med.sum') y } system.time(by(d, llist(sex=d$sex,country=d$country), g)) system.time({ x <- asNumericMatrix(d) a <- subsAttr(d) m <- mApply(x, llist(sex=d$sex,country=d$country), g) }) system.time({ x <- asNumericMatrix(d) summarize(x, llist(sex=d$sex, country=d$country), g) }) # An example where each subject has one record per diagnosis but sex of # subject is duplicated for all the rows a subject has. Get the cross- # classified frequencies of diagnosis (dx) by sex and plot the results # with a dot plot count <- rep(1,length(dx)) d <- summarize(count, llist(dx,sex), sum) Dotplot(dx ~ count | sex, data=d) } d <- list(x=1:10, a=factor(rep(c('a','b'), 5)), b=structure(letters[1:10], label='label for b'), d=c(rep(TRUE,9), FALSE), f=pi*(1 : 10)) x <- asNumericMatrix(d) attr(x, 'origAttributes') matrix2dataFrame(x) detach('dfr') # Run summarize on a matrix to get column means x <- c(1:19,NA) y <- 101:120 z <- cbind(x, y) g <- c(rep(1, 10), rep(2, 10)) summarize(z, g, colMeans, na.rm=TRUE, stat.name='x') # Also works on an all numeric data frame summarize(as.data.frame(z), g, colMeans, na.rm=TRUE, stat.name='x') } \keyword{category} \keyword{manip} \keyword{multivariate} \concept{grouping} \concept{stratification} \concept{aggregation} \concept{cross-classification} Hmisc/man/estSeqMarkovOrd.Rd0000644000176200001440000002442014054721073015455 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simMarkovOrd.r \name{estSeqMarkovOrd} \alias{estSeqMarkovOrd} \title{estSeqMarkovOrd} \usage{ estSeqMarkovOrd( y, times, initial, absorb = NULL, intercepts, parameter, looks, g, formula, ppo = NULL, yprevfactor = TRUE, groupContrast = NULL, cscov = FALSE, timecriterion = NULL, coxzph = FALSE, sstat = NULL, rdsample = NULL, maxest = NULL, maxvest = NULL, nsim = 1, progress = FALSE, pfile = "" ) } \arguments{ \item{y}{vector of possible y values in order (numeric, character, factor)} \item{times}{vector of measurement times} \item{initial}{a vector of probabilities summing to 1.0 that specifies the frequency distribution of initial values to be sampled from. The vector must have names that correspond to values of \code{y} representing non-absorbing states.} \item{absorb}{vector of absorbing states, a subset of \code{y}. The default is no absorbing states. Observations are truncated when an absorbing state is simulated. May be numeric, character, or factor.} \item{intercepts}{vector of intercepts in the proportional odds model. There must be one fewer of these than the length of \code{y}.} \item{parameter}{vector of true parameter (effects; group differences) values. These are group 2:1 log odds ratios in the transition model, conditioning on the previous \code{y}.} \item{looks}{integer vector of ID numbers at which maximum likelihood estimates and their estimated variances are computed. For a single look specify a scalar value for \code{loops} equal to the number of subjects in the sample.} \item{g}{a user-specified function of three or more arguments which in order are \code{yprev} - the value of \code{y} at the previous time, the current time \code{t}, the \code{gap} between the previous time and the current time, an optional (usually named) covariate vector \code{X}, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow \code{yprev} to be a vector and \code{yprev} must not include any absorbing states. The \code{g} function returns the linear predictor for the proportional odds model aside from \code{intercepts}. The returned value must be a matrix with row names taken from \code{yprev}. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to \code{intercepts}. The different columns are used for \code{y}-specific contributions to the linear predictor (aside from \code{intercepts}) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments.} \item{formula}{a formula object given to the \code{lrm()} function using variables with these name: \code{y}, \code{time}, \code{yprev}, and \code{group} (factor variable having values '1' and '2'). The \code{yprev} variable is converted to a factor before fitting the model unless \code{yprevfactor=FALSE}.} \item{ppo}{a formula specifying the part of \code{formula} for which proportional odds is not to be assumed, i.e., that specifies a partial proportional odds model. Specifying \code{ppo} triggers the use of \code{VGAM::vglm()} instead of \code{rms::lrm} and will make the simulations run slower.} \item{yprevfactor}{see \code{formula}} \item{groupContrast}{omit this argument if \code{group} has only one regression coefficient in \code{formula}. Otherwise if \code{ppo} is omitted, provide \code{groupContrast} as a list of two lists that are passed to \code{rms::contrast.rms()} to compute the contrast of interest and its standard error. The first list corresponds to group 1, the second to group 2, to get a 2:1 contrast. If \code{ppo} is given and the group effect is not just a simple regression coefficient, specify as \code{groupContrast} a function of a \code{vglm} fit that computes the contrast of interest and its standard error and returns a list with elements named \code{Contrast} and \code{SE}. For the latter type you can optionally have formal arguments \code{n1}, \code{n2}, and \code{parameter} that are passed to \code{groupContrast} to compute the standard error of the group contrast, where \code{n1} and \code{n2} respectively are the sample sizes for the two groups and \code{parameter} is the true group effect parameter value.} \item{cscov}{applies if \code{ppo} is not used. Set to \code{TRUE} to use the cluster sandwich covariance estimator of the variance of the group comparison.} \item{timecriterion}{a function of a time-ordered vector of simulated ordinal responses \code{y} that returns a vector \code{FALSE} or \code{TRUE} values denoting whether the current \code{y} level met the condition of interest. For example \code{estSeqMarkovOrd} will compute the first time at which \code{y >= 5} if you specify \code{timecriterion=function(y) y >= 5}. This function is only called at the last data look for each simulated study. To have more control, instead of \code{timecriterion} returning a logical vector have it return a numeric 2-vector containing, in order, the event/censoring time and the 1/0 event/censoring indicator.} \item{coxzph}{set to \code{TRUE} if \code{timecriterion} is specified and you want to compute a statistic for testing proportional hazards at the last look of each simulated data} \item{sstat}{set to a function of the time vector and the corresponding vector of ordinal responses for a single group if you want to compute a Wilcoxon test on a derived quantity such as the number of days in a given state.} \item{rdsample}{an optional function to do response-dependent sampling. It is a function of these arguments, which are vectors that stop at any absorbing state: \code{times} (ascending measurement times for one subject), \code{y} (vector of ordinal outcomes at these times for one subject. The function returns \code{NULL} if no observations are to be dropped, returns the vector of new times to sample.} \item{maxest}{maximum acceptable absolute value of the contrast estimate, ignored if \code{NULL}. Any values exceeding \code{maxest} will result in the estimate being set to \code{NA}.} \item{maxvest}{like \code{maxest} but for the estimated variance of the contrast estimate} \item{nsim}{number of simulations (default is 1)} \item{progress}{set to \code{TRUE} to send current iteration number to \code{pfile} every 10 iterations. Each iteration will really involve multiple simulations, if \code{parameter} has length greater than 1.} \item{pfile}{file to which to write progress information. Defaults to \code{''} which is the console. Ignored if \code{progress=FALSE}.} } \value{ a data frame with number of rows equal to the product of \code{nsim}, the length of \code{looks}, and the length of \code{parameter}, with variables \code{sim}, \code{parameter}, \code{look}, \code{est} (log odds ratio for group), and \code{vest} (the variance of the latter). If \code{timecriterion} is specified the data frame also contains \code{loghr} (Cox log hazard ratio for group), \code{lrchisq} (chi-square from Cox test for group), and if \code{coxph=TRUE}, \code{phchisq}, the chi-square for testing proportional hazards. The attribute \code{etimefreq} is also present if \code{timecriterion} is present, and it probvides the frequency distribution of derived event times by group and censoring/event indicator. If \code{sstat} is given, the attribute \code{sstat} is also present, and it contains an array with dimensions corresponding to simulations, parameter values within simulations, \code{id}, and a two-column subarray with columns \code{group} and \code{y}, the latter being the summary measure computed by the \code{sstat} function. The returned data frame also has attribute \code{lrmcoef} which are the last-look logistic regression coefficient estimates over the \code{nsim} simulations and the parameter settings, and an attribute \code{failures} which is a data frame containing the variables \code{reason} and \code{frequency} cataloging the reasons for unsuccessful model fits. } \description{ Simulate Comparisons For Use in Sequential Markov Longitudinal Clinical Trial Simulations } \details{ Simulates sequential clinical trials of longitudinal ordinal outcomes using a first-order Markov model. Looks are done sequentially after subject ID numbers given in the vector \code{looks} with the earliest possible look being after subject 2. At each look, a subject's repeated records are either all used or all ignored depending on the sequent ID number. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function \code{g} that has extra arguments specifying the true effect of \code{parameter} the treatment \code{group} expecting treatments to be coded 1 and 2. \code{parameter} is usually on the scale of a regression coefficient, e.g., a log odds ratio. Fitting is done using the \code{rms::lrm()} function, unless non-proportional odds is allowed in which case \code{VGAM::vglm()} is used. If \code{timecriterion} is specified, the function also, for the last data look only, computes the first time at which the criterion is satisfied for the subject or use the event time and event/censoring indicator computed by \code{timecriterion}. The Cox/logrank chi-square statistic for comparing groups on the derived time variable is saved. If \code{coxzph=TRUE}, the \code{survival} package correlation coefficient \code{rho} from the scaled partial residuals is also saved so that the user can later determine to what extent the Markov model resulted in the proportional hazards assumption being violated when analyzing on the time scale. \code{vglm} is accelerated by saving the first successful fit for the largest sample size and using its coefficients as starting value for further \code{vglm} fits for any sample size for the same setting of \code{parameter}. } \seealso{ \code{gbayesSeqSim()}, \code{simMarkovOrd()}, \url{https://hbiostat.org/R/Hmisc/markov/} } \author{ Frank Harrell } Hmisc/man/mtitle.Rd0000644000176200001440000000321313714234051013654 0ustar liggesusers\name{mtitle} \alias{mtitle} \title{ Margin Titles } \description{ Writes overall titles and subtitles after a multiple image plot is drawn. If \code{par()$oma==c(0,0,0,0)}, \code{title} is used instead of \code{mtext}, to draw titles or subtitles that are inside the plotting region for a single plot. } \usage{ mtitle(main, ll, lc, lr=format(Sys.time(),'\%d\%b\%y'), cex.m=1.75, cex.l=.5, \dots) } \arguments{ \item{main}{ main title to be centered over entire figure, default is none } \item{ll}{ subtitle for lower left of figure, default is none } \item{lc}{ subtitle for lower center of figure, default is none } \item{lr}{ subtitle for lower right of figure, default is today's date in format 23Jan91 for UNIX or R (Thu May 30 09:08:13 1996 format for Windows). Set to \code{""} to suppress lower right title. } \item{cex.m}{ character size for main, default is 1.75 } \item{cex.l}{ character size for subtitles } \item{...}{ other arguments passed to \code{mtext} }} \value{ nothing } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} } \section{Side Effects}{ plots } \seealso{ \code{\link{par}}, \code{\link{mtext}}, \code{\link{title}}, \code{\link{unix}}, \code{\link{pstamp}} } \examples{ #Set up for 1 plot on figure, give a main title, #use date for lr plot(runif(20),runif(20)) mtitle("Main Title") #Set up for 2 x 2 matrix of plots with a lower left subtitle and overall title par(mfrow=c(2,2), oma=c(3,0,3,0)) plot(runif(20),runif(20)) plot(rnorm(20),rnorm(20)) plot(exp(rnorm(20)),exp(rnorm(20))) mtitle("Main Title",ll="n=20") } \keyword{hplot} % Converted by Sd2Rd version 1.21. Hmisc/man/upFirst.Rd0000644000176200001440000000143113714231730014013 0ustar liggesusers\name{upFirst} \alias{upFirst} \title{Change First Letters to Upper Case} \usage{ upFirst(txt, lower = FALSE, alllower = FALSE) } \arguments{ \item{txt}{a character vector} \item{lower}{set to \code{TRUE} to make only the very first letter of the string upper case, and to keep words with at least 2 capital letters in their original form} \item{alllower}{set to \code{TRUE} to make every word start with lower case unless it has at least 2 caps} } \description{ Changes the first letter of each word in a string to upper case, keeping selected words in lower case. Words containing at least 2 capital letters are kept as-is. } \examples{ upFirst(c('this and that','that is Beyond question')) } \references{ \url{https://en.wikipedia.org/wiki/Letter_case#Headings_and_publication_titles} } Hmisc/man/spower.Rd0000644000176200001440000004016314275453747013724 0ustar liggesusers\name{spower} \alias{spower} \alias{print.spower} \alias{Quantile2} \alias{print.Quantile2} \alias{plot.Quantile2} \alias{logrank} \alias{Gompertz2} \alias{Lognorm2} \alias{Weibull2} \title{ Simulate Power of 2-Sample Test for Survival under Complex Conditions } \description{ Given functions to generate random variables for survival times and censoring times, \code{spower} simulates the power of a user-given 2-sample test for censored data. By default, the logrank (Cox 2-sample) test is used, and a \code{logrank} function for comparing 2 groups is provided. Optionally a Cox model is fitted for each each simulated dataset and the log hazard ratios are saved (this requires the \code{survival} package). A \code{print} method prints various measures from these. For composing \R functions to generate random survival times under complex conditions, the \code{Quantile2} function allows the user to specify the intervention:control hazard ratio as a function of time, the probability of a control subject actually receiving the intervention (dropin) as a function of time, and the probability that an intervention subject receives only the control agent as a function of time (non-compliance, dropout). \code{Quantile2} returns a function that generates either control or intervention uncensored survival times subject to non-constant treatment effect, dropin, and dropout. There is a \code{plot} method for plotting the results of \code{Quantile2}, which will aid in understanding the effects of the two types of non-compliance and non-constant treatment effects. \code{Quantile2} assumes that the hazard function for either treatment group is a mixture of the control and intervention hazard functions, with mixing proportions defined by the dropin and dropout probabilities. It computes hazards and survival distributions by numerical differentiation and integration using a grid of (by default) 7500 equally-spaced time points. The \code{logrank} function is intended to be used with \code{spower} but it can be used by itself. It returns the 1 degree of freedom chi-square statistic, with the hazard ratio estimate as an attribute. The \code{Weibull2} function accepts as input two vectors, one containing two times and one containing two survival probabilities, and it solves for the scale and shape parameters of the Weibull distribution (\eqn{S(t) = e^{-\alpha {t}^{\gamma}}}{S(t) = exp(-\alpha*t^\gamma)}) which will yield those estimates. It creates an \R function to evaluate survival probabilities from this Weibull distribution. \code{Weibull2} is useful in creating functions to pass as the first argument to \code{Quantile2}. The \code{Lognorm2} and \code{Gompertz2} functions are similar to \code{Weibull2} except that they produce survival functions for the log-normal and Gompertz distributions. When \code{cox=TRUE} is specified to \code{spower}, the analyst may wish to extract the two margins of error by using the \code{print} method for \code{spower} objects (see example below) and take the maximum of the two. } \usage{ spower(rcontrol, rinterv, rcens, nc, ni, test=logrank, cox=FALSE, nsim=500, alpha=0.05, pr=TRUE) \method{print}{spower}(x, conf.int=.95, \dots) Quantile2(scontrol, hratio, dropin=function(times)0, dropout=function(times)0, m=7500, tmax, qtmax=.001, mplot=200, pr=TRUE, \dots) \method{print}{Quantile2}(x, \dots) \method{plot}{Quantile2}(x, what=c("survival", "hazard", "both", "drop", "hratio", "all"), dropsep=FALSE, lty=1:4, col=1, xlim, ylim=NULL, label.curves=NULL, \dots) logrank(S, group) Gompertz2(times, surv) Lognorm2(times, surv) Weibull2(times, surv) } \arguments{ \item{rcontrol}{ a function of n which returns n random uncensored failure times for the control group. \code{spower} assumes that non-compliance (dropin) has been taken into account by this function. } \item{rinterv}{ similar to \code{rcontrol} but for the intervention group } \item{rcens}{ a function of n which returns n random censoring times. It is assumed that both treatment groups have the same censoring distribution. } \item{nc}{ number of subjects in the control group } \item{ni}{ number in the intervention group } \item{scontrol}{ a function of a time vector which returns the survival probabilities for the control group at those times assuming that all patients are compliant. } \item{hratio}{ a function of time which specifies the intervention:control hazard ratio (treatment effect) } \item{x}{ an object of class \dQuote{Quantile2} created by \code{Quantile2}, or of class \dQuote{spower} created by \code{spower} } \item{conf.int}{ confidence level for determining fold-change margins of error in estimating the hazard ratio } \item{S}{ a \code{Surv} object or other two-column matrix for right-censored survival times } \item{group}{ group indicators have length equal to the number of rows in \code{S} argument. } \item{times}{ a vector of two times } \item{surv}{ a vector of two survival probabilities } \item{test}{ any function of a \code{Surv} object and a grouping variable which computes a chi-square for a two-sample censored data test. The default is \code{logrank}. } \item{cox}{ If true \code{TRUE} the two margins of error are available by using the \code{print} method for \code{spower} objects (see example below) and taking the maximum of the two. } \item{nsim}{ number of simulations to perform (default=500) } \item{alpha}{ type I error (default=.05) } \item{pr}{ If \code{FALSE} prevents \code{spower} from printing progress notes for simulations. If \code{FALSE} prevents \code{Quantile2} from printing \code{tmax} when it calculates \code{tmax}. } \item{dropin}{ a function of time specifying the probability that a control subject actually is treated with the new intervention at the corresponding time } \item{dropout}{ a function of time specifying the probability of an intervention subject dropping out to control conditions. As a function of time, \code{dropout} specifies the probability that a patient is treated with the control therapy at time t. \code{dropin} and \code{dropout} form mixing proportions for control and intervention hazard functions. } \item{m}{ number of time points used for approximating functions (default is 7500) } \item{tmax}{ maximum time point to use in the grid of \code{m} times. Default is the time such that \code{scontrol(time)} is \code{qtmax}. } \item{qtmax}{ survival probability corresponding to the last time point used for approximating survival and hazard functions. Default is 0.001. For \code{qtmax} of the time for which a simulated time is needed which corresponds to a survival probability of less than \code{qtmax}, the simulated value will be \code{tmax}. } \item{mplot}{ number of points used for approximating functions for use in plotting (default is 200 equally spaced points) } \item{\dots}{ optional arguments passed to the \code{scontrol} function when it's evaluated by \code{Quantile2}. Unused for \code{print.spower}. } \item{what}{ a single character constant (may be abbreviated) specifying which functions to plot. The default is \samp{"both"} meaning both survival and hazard functions. Specify \code{what="drop"} to just plot the dropin and dropout functions, \code{what="hratio"} to plot the hazard ratio functions, or \samp{"all"} to make 4 separate plots showing all functions (6 plots if \code{dropsep=TRUE}). } \item{dropsep}{ If \code{TRUE} makes \code{plot.Quantile2} separate pure and contaminated functions onto separate plots } \item{lty}{ vector of line types } \item{col}{ vector of colors } \item{xlim}{ optional x-axis limits } \item{ylim}{ optional y-axis limits } \item{label.curves}{ optional list which is passed as the \code{opts} argument to \code{\link{labcurve}}. } } \value{ \code{spower} returns the power estimate (fraction of simulated chi-squares greater than the alpha-critical value). If \code{cox=TRUE}, \code{spower} returns an object of class \dQuote{spower} containing the power and various other quantities. \code{Quantile2} returns an \R function of class \dQuote{Quantile2} with attributes that drive the \code{plot} method. The major attribute is a list containing several lists. Each of these sub-lists contains a \code{Time} vector along with one of the following: survival probabilities for either treatment group and with or without contamination caused by non-compliance, hazard rates in a similar way, intervention:control hazard ratio function with and without contamination, and dropin and dropout functions. \code{logrank} returns a single chi-square statistic. \code{Weibull2}, \code{Lognorm2} and \code{Gompertz2} return an \R function with three arguments, only the first of which (the vector of \code{times}) is intended to be specified by the user. } \section{Side Effects}{ \code{spower} prints the interation number every 10 iterations if \code{pr=TRUE}. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \references{ Lakatos E (1988): Sample sizes based on the log-rank statistic in complex clinical trials. Biometrics 44:229--241 (Correction 44:923). Cuzick J, Edwards R, Segnan N (1997): Adjusting for non-compliance and contamination in randomized clinical trials. Stat in Med 16:1017--1029. Cook, T (2003): Methods for mid-course corrections in clinical trials with survival outcomes. Stat in Med 22:3431--3447. Barthel FMS, Babiker A et al (2006): Evaluation of sample size and power for multi-arm survival trials allowing for non-uniform accrual, non-proportional hazards, loss to follow-up and cross-over. Stat in Med 25:2521--2542. } \seealso{ \code{\link{cpower}}, \code{\link{ciapower}}, \code{\link{bpower}}, \code{\link[rms]{cph}}, \code{\link[survival]{coxph}}, \code{\link{labcurve}} } \examples{ # Simulate a simple 2-arm clinical trial with exponential survival so # we can compare power simulations of logrank-Cox test with cpower() # Hazard ratio is constant and patients enter the study uniformly # with follow-up ranging from 1 to 3 years # Drop-in probability is constant at .1 and drop-out probability is # constant at .175. Two-year survival of control patients in absence # of drop-in is .8 (mortality=.2). Note that hazard rate is -log(.8)/2 # Total sample size (both groups combined) is 1000 # \% mortality reduction by intervention (if no dropin or dropout) is 25 # This corresponds to a hazard ratio of 0.7283 (computed by cpower) cpower(2, 1000, .2, 25, accrual=2, tmin=1, noncomp.c=10, noncomp.i=17.5) ranfun <- Quantile2(function(x)exp(log(.8)/2*x), hratio=function(x)0.7283156, dropin=function(x).1, dropout=function(x).175) rcontrol <- function(n) ranfun(n, what='control') rinterv <- function(n) ranfun(n, what='int') rcens <- function(n) runif(n, 1, 3) set.seed(11) # So can reproduce results spower(rcontrol, rinterv, rcens, nc=500, ni=500, test=logrank, nsim=50) # normally use nsim=500 or 1000 \dontrun{ # Run the same simulation but fit the Cox model for each one to # get log hazard ratios for the purpose of assessing the tightness # confidence intervals that are likely to result set.seed(11) u <- spower(rcontrol, rinterv, rcens, nc=500, ni=500, test=logrank, nsim=50, cox=TRUE) u v <- print(u) v[c('MOElower','MOEupper','SE')] } # Simulate a 2-arm 5-year follow-up study for which the control group's # survival distribution is Weibull with 1-year survival of .95 and # 3-year survival of .7. All subjects are followed at least one year, # and patients enter the study with linearly increasing probability after that # Assume there is no chance of dropin for the first 6 months, then the # probability increases linearly up to .15 at 5 years # Assume there is a linearly increasing chance of dropout up to .3 at 5 years # Assume that the treatment has no effect for the first 9 months, then # it has a constant effect (hazard ratio of .75) # First find the right Weibull distribution for compliant control patients sc <- Weibull2(c(1,3), c(.95,.7)) sc # Inverse cumulative distribution for case where all subjects are followed # at least a years and then between a and b years the density rises # as (time - a) ^ d is a + (b-a) * u ^ (1/(d+1)) rcens <- function(n) 1 + (5-1) * (runif(n) ^ .5) # To check this, type hist(rcens(10000), nclass=50) # Put it all together f <- Quantile2(sc, hratio=function(x)ifelse(x<=.75, 1, .75), dropin=function(x)ifelse(x<=.5, 0, .15*(x-.5)/(5-.5)), dropout=function(x).3*x/5) par(mfrow=c(2,2)) # par(mfrow=c(1,1)) to make legends fit plot(f, 'all', label.curves=list(keys='lines')) rcontrol <- function(n) f(n, 'control') rinterv <- function(n) f(n, 'intervention') set.seed(211) spower(rcontrol, rinterv, rcens, nc=350, ni=350, test=logrank, nsim=50) # normally nsim=500 or more par(mfrow=c(1,1)) # Compose a censoring time generator function such that at 1 year # 5\% of subjects are accrued, at 3 years 70\% are accured, and at 10 # years 100\% are accrued. The trial proceeds two years past the last # accrual for a total of 12 years of follow-up for the first subject. # Use linear interporation between these 3 points rcens <- function(n) { times <- c(0,1,3,10) accrued <- c(0,.05,.7,1) # Compute inverse of accrued function at U(0,1) random variables accrual.times <- approx(accrued, times, xout=runif(n))$y censor.times <- 12 - accrual.times censor.times } censor.times <- rcens(500) # hist(censor.times, nclass=20) accrual.times <- 12 - censor.times # Ecdf(accrual.times) # lines(c(0,1,3,10), c(0,.05,.7,1), col='red') # spower(..., rcens=rcens, ...) \dontrun{ # To define a control survival curve from a fitted survival curve # with coordinates (tt, surv) with tt[1]=0, surv[1]=1: Scontrol <- function(times, tt, surv) approx(tt, surv, xout=times)$y tt <- 0:6 surv <- c(1, .9, .8, .75, .7, .65, .64) formals(Scontrol) <- list(times=NULL, tt=tt, surv=surv) # To use a mixture of two survival curves, with e.g. mixing proportions # of .2 and .8, use the following as a guide: # # Scontrol <- function(times, t1, s1, t2, s2) # .2*approx(t1, s1, xout=times)$y + .8*approx(t2, s2, xout=times)$y # t1 <- ...; s1 <- ...; t2 <- ...; s2 <- ...; # formals(Scontrol) <- list(times=NULL, t1=t1, s1=s1, t2=t2, s2=s2) # Check that spower can detect a situation where generated censoring times # are later than all failure times rcens <- function(n) runif(n, 0, 7) f <- Quantile2(scontrol=Scontrol, hratio=function(x).8, tmax=6) cont <- function(n) f(n, what='control') int <- function(n) f(n, what='intervention') spower(rcontrol=cont, rinterv=int, rcens=rcens, nc=300, ni=300, nsim=20) # Do an unstratified logrank test library(survival) # From SAS/STAT PROC LIFETEST manual, p. 1801 days <- c(179,256,262,256,255,224,225,287,319,264,237,156,270,257,242, 157,249,180,226,268,378,355,319,256,171,325,325,217,255,256, 291,323,253,206,206,237,211,229,234,209) status <- c(1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0, 0,rep(1,19)) treatment <- c(rep(1,10), rep(2,10), rep(1,10), rep(2,10)) sex <- Cs(F,F,M,F,M,F,F,M,M,M,F,F,M,M,M,F,M,F,F,M, M,M,M,M,F,M,M,F,F,F,M,M,M,F,F,M,F,F,F,F) data.frame(days, status, treatment, sex) table(treatment, status) logrank(Surv(days, status), treatment) # agrees with p. 1807 # For stratified tests the picture is puzzling. # survdiff(Surv(days,status) ~ treatment + strata(sex))$chisq # is 7.246562, which does not agree with SAS (7.1609) # But summary(coxph(Surv(days,status) ~ treatment + strata(sex))) # yields 7.16 whereas summary(coxph(Surv(days,status) ~ treatment)) # yields 5.21 as the score test, not agreeing with SAS or logrank() (5.6485) } } \keyword{htest} \keyword{survival} \concept{power} \concept{study design} Hmisc/man/simplifyDims.Rd0000644000176200001440000000151712243661443015041 0ustar liggesusers\name{simplifyDims} \alias{simplifyDims} %- Also NEED an '\alias' for EACH other topic documented here. \title{List Simplification} \description{ Takes a list where each element is a group of rows that have been spanned by a multirow row and combines it into one large matrix. } \usage{ simplifyDims(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{list of spanned rows} } \details{ All rows must have the same number of columns. This is used to format the list for printing. } \value{ a matrix that contains all of the spanned rows. } \author{Charles Dupont} \seealso{\code{\link{rbind}}} \examples{ a <- list(a = matrix(1:25, ncol=5), b = matrix(1:10, ncol=5), c = 1:5) simplifyDims(a) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{print} Hmisc/man/na.detail.response.Rd0000644000176200001440000000466113714234047016067 0ustar liggesusers\name{na.detail.response} \alias{na.detail.response} \title{ Detailed Response Variable Information } \description{ This function is called by certain \code{na.action} functions if \code{options(na.detail.response=TRUE)} is set. By default, this function returns a matrix of counts of non-NAs and the mean of the response variable computed separately by whether or not each predictor is NA. The default action uses the last column of a \code{Surv} object, in effect computing the proportion of events. Other summary functions may be specified by using \code{options(na.fun.response="name of function")}. } \usage{ na.detail.response(mf) } \arguments{ \item{mf}{ a model frame }} \value{ a matrix, with rows representing the different statistics that are computed for the response, and columns representing the different subsets for each predictor (NA and non-NA value subsets). } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{na.omit}}, \code{\link{na.delete}}, \code{\link{model.frame.default}}, \code{\link{naresid}}, \code{\link{naprint}}, \code{\link{describe}} } \examples{ # sex # [1] m f f m f f m m m m m m m m f f f m f m # age # [1] NA 41 23 30 44 22 NA 32 37 34 38 36 36 50 40 43 34 22 42 30 # y # [1] 0 1 0 0 1 0 1 0 0 1 1 1 0 0 1 1 0 1 0 0 # options(na.detail.response=TRUE, na.action="na.delete", digits=3) # lrm(y ~ age*sex) # # Logistic Regression Model # # lrm(formula = y ~ age * sex) # # # Frequencies of Responses # 0 1 # 10 8 # # Frequencies of Missing Values Due to Each Variable # y age sex # 0 2 0 # # # Statistics on Response by Missing/Non-Missing Status of Predictors # # age=NA age!=NA sex!=NA Any NA No NA # N 2.0 18.000 20.00 2.0 18.000 # Mean 0.5 0.444 0.45 0.5 0.444 # # \dots\dots # options(na.action="na.keep") # describe(y ~ age*sex) # Statistics on Response by Missing/Non-Missing Status of Predictors # # age=NA age!=NA sex!=NA Any NA No NA # N 2.0 18.000 20.00 2.0 18.000 # Mean 0.5 0.444 0.45 0.5 0.444 # # \dots # options(na.fun.response="table") #built-in function table() # describe(y ~ age*sex) # # Statistics on Response by Missing/Non-Missing Status of Predictors # # age=NA age!=NA sex!=NA Any NA No NA # 0 1 10 11 1 10 # 1 1 8 9 1 8 # # \dots } \keyword{models} \keyword{regression} % Converted by Sd2Rd version 1.21. Hmisc/man/na.keep.Rd0000644000176200001440000000166513714234046013714 0ustar liggesusers\name{na.keep} \alias{na.keep} \title{ Do-nothing na.action } \description{ Does not delete rows containing NAs, but does add details concerning the distribution of the response variable if \code{options(na.detail.response=TRUE)}. This \code{na.action} is primarily for use with \code{describe.formula}. } \usage{ na.keep(mf) } \arguments{ \item{mf}{ a model frame }} \value{ the same model frame with the \code{"na.action"} attribute } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{na.omit}}, \code{\link{na.delete}}, \code{\link{model.frame.default}}, \code{\link{na.detail.response}}, \code{\link{naresid}}, \code{\link{naprint}}, \code{\link{describe}} } \examples{ options(na.action="na.keep", na.detail.response=TRUE) x1 <- runif(20) x2 <- runif(20) x2[1:4] <- NA y <- rnorm(20) describe(y ~ x1*x2) } \keyword{models} % Converted by Sd2Rd version 1.21. Hmisc/man/xy.group.Rd0000644000176200001440000000220712243661443014160 0ustar liggesusers\name{xy.group} \alias{xy.group} \title{ Mean x vs. function of y in groups of x } \description{ Compute mean x vs. a function of y (e.g. median) by quantile groups of x or by x grouped to have a given average number of observations. Deletes NAs in x and y before doing computations. } \usage{ xy.group(x, y, m=150, g, fun=mean, result="list") } \arguments{ \item{x}{ a vector, may contain NAs } \item{y}{ a vector of same length as x, may contain NAs } \item{m}{ number of observations per group } \item{g}{ number of quantile groups } \item{fun}{ function of y such as median or mean (the default) } \item{result}{ "list" (the default), or "matrix" }} \value{ if result="list", a list with components x and y suitable for plotting. if result="matrix", matrix with rows corresponding to x-groups and columns named n, x, and y. } \seealso{ \code{\link{cut2}}, \code{\link{tapply}} } \examples{ # plot(xy.group(x, y, g=10)) #Plot mean y by deciles of x # xy.group(x, y, m=100, result="matrix") #Print table, 100 obs/group } \keyword{category} \keyword{nonparametric} \concept{grouping} \concept{stratification} \concept{aggregation} \concept{discretization} Hmisc/man/wtd.stats.Rd0000644000176200001440000002400013714234040014304 0ustar liggesusers\name{wtd.stats} \alias{wtd.mean} \alias{wtd.var} \alias{wtd.quantile} \alias{wtd.Ecdf} \alias{wtd.table} \alias{wtd.rank} \alias{wtd.loess.noiter} \alias{num.denom.setup} \title{ Weighted Statistical Estimates } \description{ These functions compute various weighted versions of standard estimators. In most cases the \code{weights} vector is a vector the same length of \code{x}, containing frequency counts that in effect expand \code{x} by these counts. \code{weights} can also be sampling weights, in which setting \code{normwt} to \code{TRUE} will often be appropriate. This results in making \code{weights} sum to the length of the non-missing elements in \code{x}. \code{normwt=TRUE} thus reflects the fact that the true sample size is the length of the \code{x} vector and not the sum of the original values of \code{weights} (which would be appropriate had \code{normwt=FALSE}). When \code{weights} is all ones, the estimates are all identical to unweighted estimates (unless one of the non-default quantile estimation options is specified to \code{wtd.quantile}). When missing data have already been deleted for, \code{x}, \code{weights}, and (in the case of \code{wtd.loess.noiter}) \code{y}, specifying \code{na.rm=FALSE} will save computation time. Omitting the \code{weights} argument or specifying \code{NULL} or a zero-length vector will result in the usual unweighted estimates. \code{wtd.mean}, \code{wtd.var}, and \code{wtd.quantile} compute weighted means, variances, and quantiles, respectively. \code{wtd.Ecdf} computes a weighted empirical distribution function. \code{wtd.table} computes a weighted frequency table (although only one stratification variable is supported at present). \code{wtd.rank} computes weighted ranks, using mid--ranks for ties. This can be used to obtain Wilcoxon tests and rank correlation coefficients. \code{wtd.loess.noiter} is a weighted version of \code{loess.smooth} when no iterations for outlier rejection are desired. This results in especially good smoothing when \code{y} is binary. \code{wtd.quantile} removes any observations with zero weight at the beginning. Previously, these were changing the quantile estimates. \code{num.denom.setup} is a utility function that allows one to deal with observations containing numbers of events and numbers of trials, by outputting two observations when the number of events and non-events (trials - events) exceed zero. A vector of subscripts is generated that will do the proper duplications of observations, and a new binary variable \code{y} is created along with usual cell frequencies (\code{weights}) for each of the \code{y=0}, \code{y=1} cells per observation. } \usage{ wtd.mean(x, weights=NULL, normwt="ignored", na.rm=TRUE) wtd.var(x, weights=NULL, normwt=FALSE, na.rm=TRUE, method=c('unbiased', 'ML')) wtd.quantile(x, weights=NULL, probs=c(0, .25, .5, .75, 1), type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'), normwt=FALSE, na.rm=TRUE) wtd.Ecdf(x, weights=NULL, type=c('i/n','(i-1)/(n-1)','i/(n+1)'), normwt=FALSE, na.rm=TRUE) wtd.table(x, weights=NULL, type=c('list','table'), normwt=FALSE, na.rm=TRUE) wtd.rank(x, weights=NULL, normwt=FALSE, na.rm=TRUE) wtd.loess.noiter(x, y, weights=rep(1,n), span=2/3, degree=1, cell=.13333, type=c('all','ordered all','evaluate'), evaluation=100, na.rm=TRUE) num.denom.setup(num, denom) } \arguments{ \item{x}{ a numeric vector (may be a character or \code{category} or \code{factor} vector for \code{wtd.table}) } \item{num}{ vector of numerator frequencies } \item{denom}{ vector of denominators (numbers of trials) } \item{weights}{ a numeric vector of weights } \item{normwt}{ specify \code{normwt=TRUE} to make \code{weights} sum to \code{length(x)} after deletion of \code{NA}s. If \code{weights} are frequency weights, then \code{normwt} should be \code{FALSE}, and if \code{weights} are normalization (aka reliability) weights, then \code{normwt} should be \code{TRUE}. In the case of the former, no check is made that \code{weights} are valid frequencies. } \item{na.rm}{ set to \code{FALSE} to suppress checking for NAs } \item{method}{determines the estimator type; if \code{'unbiased'} (the default) then the usual unbiased estimate (using Bessel's correction) is returned, if \code{'ML'} then it is the maximum likelihood estimate for a Gaussian distribution. In the case of the latter, the \code{normwt} argument has no effect. Uses \code{stats:cov.wt} for both methods.} \item{probs}{ a vector of quantiles to compute. Default is 0 (min), .25, .5, .75, 1 (max). } \item{type}{ For \code{wtd.quantile}, \code{type} defaults to \code{quantile} to use the same interpolated order statistic method as \code{quantile}. Set \code{type} to \code{"(i-1)/(n-1)"},\code{"i/(n+1)"}, or \code{"i/n"} to use the inverse of the empirical distribution function, using, respectively, (wt - 1)/T, wt/(T+1), or wt/T, where wt is the cumulative weight and T is the total weight (usually total sample size). These three values of \code{type} are the possibilities for \code{wtd.Ecdf}. For \code{wtd.table} the default \code{type} is \code{"list"}, meaning that the function is to return a list containing two vectors: \code{x} is the sorted unique values of \code{x} and \code{sum.of.weights} is the sum of weights for that \code{x}. This is the default so that you don't have to convert the \code{names} attribute of the result that can be obtained with \code{type="table"} to a numeric variable when \code{x} was originally numeric. \code{type="table"} for \code{wtd.table} results in an object that is the same structure as those returned from \code{table}. For \code{wtd.loess.noiter} the default \code{type} is \code{"all"}, indicating that the function is to return a list containing all the original values of \code{x} (including duplicates and without sorting) and the smoothed \code{y} values corresponding to them. Set \code{type="ordered all"} to sort by \code{x}, and \code{type="evaluate"} to evaluate the smooth only at \code{evaluation} equally spaced points between the observed limits of \code{x}. } \item{y}{a numeric vector the same length as \code{x}} \item{span, degree, cell, evaluation}{ see \code{loess.smooth}. The default is linear (\code{degree}=1) and 100 points to evaluation (if \code{type="evaluate"}). }} \value{ \code{wtd.mean} and \code{wtd.var} return scalars. \code{wtd.quantile} returns a vector the same length as \code{probs}. \code{wtd.Ecdf} returns a list whose elements \code{x} and \code{Ecdf} correspond to unique sorted values of \code{x}. If the first CDF estimate is greater than zero, a point (min(x),0) is placed at the beginning of the estimates. See above for \code{wtd.table}. \code{wtd.rank} returns a vector the same length as \code{x} (after removal of NAs, depending on \code{na.rm}). See above for \code{wtd.loess.noiter}. } \details{ The functions correctly combine weights of observations having duplicate values of \code{x} before computing estimates. When \code{normwt=FALSE} the weighted variance will not equal the unweighted variance even if the weights are identical. That is because of the subtraction of 1 from the sum of the weights in the denominator of the variance formula. If you want the weighted variance to equal the unweighted variance when weights do not vary, use \code{normwt=TRUE}. The articles by Gatz and Smith discuss alternative approaches, to arrive at estimators of the standard error of a weighted mean. \code{wtd.rank} does not handle NAs as elegantly as \code{rank} if \code{weights} is specified. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} \cr Benjamin Tyner \cr \email{btyner@gmail.com} } \references{ Research Triangle Institute (1995): SUDAAN User's Manual, Release 6.40, pp. 8-16 to 8-17. Gatz DF, Smith L (1995): The standard error of a weighted mean concentration--I. Bootstrapping vs other methods. Atmospheric Env 11:1185-1193. Gatz DF, Smith L (1995): The standard error of a weighted mean concentration--II. Estimating confidence intervals. Atmospheric Env 29:1195-1200. https://en.wikipedia.org/wiki/Weighted_arithmetic_mean } \seealso{ \code{\link{mean}}, \code{\link{var}}, \code{\link{quantile}}, \code{\link{table}}, \code{\link{rank}}, \code{\link{loess.smooth}}, \code{\link{lowess}}, \code{\link{plsmo}}, \code{\link{Ecdf}}, \code{\link{somers2}}, \code{\link{describe}} } \examples{ set.seed(1) x <- runif(500) wts <- sample(1:6, 500, TRUE) std.dev <- sqrt(wtd.var(x, wts)) wtd.quantile(x, wts) death <- sample(0:1, 500, TRUE) plot(wtd.loess.noiter(x, death, wts, type='evaluate')) describe(~x, weights=wts) # describe uses wtd.mean, wtd.quantile, wtd.table xg <- cut2(x,g=4) table(xg) wtd.table(xg, wts, type='table') # Here is a method for getting stratified weighted means y <- runif(500) g <- function(y) wtd.mean(y[,1],y[,2]) summarize(cbind(y, wts), llist(xg), g, stat.name='y') # Empirically determine how methods used by wtd.quantile match with # methods used by quantile, when all weights are unity set.seed(1) u <- eval(formals(wtd.quantile)$type) v <- as.character(1:9) r <- matrix(0, nrow=length(u), ncol=9, dimnames=list(u,v)) for(n in c(8, 13, 22, 29)) { x <- rnorm(n) for(i in 1:5) { probs <- sort( runif(9)) for(wtype in u) { wq <- wtd.quantile(x, type=wtype, weights=rep(1,length(x)), probs=probs) for(qtype in 1:9) { rq <- quantile(x, type=qtype, probs=probs) r[wtype, qtype] <- max(r[wtype,qtype], max(abs(wq-rq))) } } } } r # Restructure data to generate a dichotomous response variable # from records containing numbers of events and numbers of trials num <- c(10,NA,20,0,15) # data are 10/12 NA/999 20/20 0/25 15/35 denom <- c(12,999,20,25,35) w <- num.denom.setup(num, denom) w # attach(my.data.frame[w$subs,]) } \keyword{nonparametric} \keyword{category} \keyword{distribution} \keyword{robust} \keyword{loess} \keyword{smooth} \keyword{manip} \concept{weighted sampling} \concept{grouping} \concept{weights} Hmisc/man/label.Rd0000644000176200001440000003455614220336507013455 0ustar liggesusers\name{label} \alias{label} \alias{label<-} \alias{label.default} \alias{label.Surv} \alias{label<-.default} \alias{labelPlotmath} \alias{labelLatex} \alias{[.labelled} \alias{print.labelled} \alias{Label} \alias{Label.data.frame} \alias{llist} \alias{prList} \alias{putHcap} \alias{putHfig} \alias{plotmathTranslate} \alias{as.data.frame.labelled} \alias{data.frame.labelled} \alias{reLabelled} \alias{label.data.frame} \alias{label<-.data.frame} \alias{relevel.labelled} \alias{combineLabels} \title{ Label Attribute of an Object } \description{ \code{label(x)} retrieves the \code{label} attribute of \code{x}. \code{label(x) <- "a label"} stores the label attribute, and also puts the class \code{labelled} as the first class of \code{x} (for S-Plus this class is not used and methods for handling this class are not defined so the \code{"label"} and \code{"units"} attributes are lost upon subsetting). The reason for having this class is so that the subscripting method for \code{labelled}, \code{[.labelled}, can preserve the \code{label} attribute in S. Also, the \code{print} method for \code{labelled} objects prefaces the print with the object's \code{label} (and \code{units} if there). If the variable is also given a \code{"units"} attribute using the \code{units} function, subsetting the variable (using \code{[.labelled}) will also retain the \code{"units"} attribute. \code{label} can optionally append a \code{"units"} attribute to the string, and it can optionally return a string or expression (for \R's \code{plotmath} facility) suitable for plotting. \code{labelPlotmath} is a function that also has this function, when the input arguments are the \code{'label'} and \code{'units'} rather than a vector having those attributes. When \code{plotmath} mode is used to construct labels, the \code{'label'} or \code{'units'} may contain math expressions but they are typed verbatim if they contain percent signs, blanks, or underscores. \code{labelPlotmath} can optionally create the expression as a character string, which is useful in building \code{ggplot} commands. For \code{Surv} objects, \code{label} first looks to see if there is an overall \code{"label"} attribute for the object, then it looks for saved attributes that \code{Surv} put in the \code{"inputAttributes"} object, looking first at the \code{event} variable, then \code{time2}, and finally \code{time}. You can restrict the looking by specifying \code{type}. \code{labelLatex} constructs suitable LaTeX labels a variable or from the \code{label} and \code{units} arguments, optionally right-justifying \code{units} if \code{hfill=TRUE}. This is useful when making tables when the variable in question is not a column heading. If \code{x} is specified, \code{label} and \code{units} values are extracted from its attributes instead of from the other arguments. \code{Label} (actually \code{Label.data.frame}) is a function which generates S source code that makes the labels in all the variables in a data frame easy to edit. \code{llist} is like \code{list} except that it preserves the names or labels of the component variables in the variables \code{label} attribute. This can be useful when looping over variables or using \code{sapply} or \code{lapply}. By using \code{llist} instead of \code{list} one can annotate the output with the current variable's name or label. \code{llist} also defines a \code{names} attribute for the list and pulls the \code{names} from the arguments' expressions for non-named arguments. \code{prList} prints a list with element names (without the dollar sign as in default list printing) and if an element of the list is an unclassed list with a name, all of those elements are printed, with titles of the form "primary list name : inner list name". This is especially useful for Rmarkdown html notebooks when a user-written function creates multiple html and graphical outputs to all be printed in a code chunk. Optionally the names can be printed after the object, and the \code{htmlfig} option provides more capabilities when making html reports. \code{prList} does not work for regular html documents. \code{putHfig} is similar to \code{prList} but for a single graphical object that is rendered with a \code{print} method, making it easy to specify long captions, and short captions for the table of contents in HTML documents. Table of contents entries are generated with the short caption, which is taken as the long caption if there is none. One can optionally not make a table of contents entry. If argument \code{table=TRUE} table captions will be produced instead. Using \code{expcoll}, \code{markupSpecs} \code{html} function \code{expcoll} will be used to make tables expand upon clicking an arrow rather than always appear. \code{putHcap} is like \code{putHfig} except that it assumes that users render the graphics or table outside of the \code{putHcap} call. This allows things to work in ordinary html documents. \code{putHcap} does not handle collapsed text. \code{plotmathTranslate} is a simple function that translates certain character strings to character strings that can be used as part of \R \code{plotmath} expressions. If the input string has a space or percent inside, the string is surrounded by a call to \code{plotmath}'s \code{paste} function. \code{as.data.frame.labelled} is a utility function that is called by \code{[.data.frame}. It is just a copy of \code{as.data.frame.vector}. \code{data.frame.labelled} is another utility function, that adds a class \code{"labelled"} to every variable in a data frame that has a \code{"label"} attribute but not a \code{"labelled"} class. \code{relevel.labelled} is a method for preserving \code{label}s with the \code{relevel} function. \code{reLabelled} is used to add a \code{'labelled'} class back to variables in data frame that have a 'label' attribute but no 'labelled' class. Useful for changing \code{cleanup.import()}'d S-Plus data frames back to general form for \R and old versions of S-Plus. } \usage{ label(x, default=NULL, ...) \method{label}{default}(x, default=NULL, units=plot, plot=FALSE, grid=FALSE, html=FALSE, \dots) \method{label}{Surv}(x, default=NULL, units=plot, plot=FALSE, grid=FALSE, html=FALSE, type=c('any', 'time', 'event'), \dots) \method{label}{data.frame}(x, default=NULL, self=FALSE, \dots) label(x, ...) <- value \method{label}{default}(x, ...) <- value \method{label}{data.frame}(x, self=TRUE, ...) <- value labelPlotmath(label, units=NULL, plotmath=TRUE, html=FALSE, grid=FALSE, chexpr=FALSE) labelLatex(x=NULL, label='', units='', size='smaller[2]', hfill=FALSE, bold=FALSE, default='', double=FALSE) \method{print}{labelled}(x, \dots) ## or x - calls print.labelled Label(object, \dots) \method{Label}{data.frame}(object, file='', append=FALSE, \dots) llist(\dots, labels=TRUE) prList(x, lcap=NULL, htmlfig=0, after=FALSE) putHfig(x, \dots, scap=NULL, extra=NULL, subsub=TRUE, hr=TRUE, table=FALSE, file='', append=FALSE, expcoll=NULL) putHcap(\dots, scap=NULL, extra=NULL, subsub=TRUE, hr=TRUE, table=FALSE, file='', append=FALSE) plotmathTranslate(x) data.frame.labelled(object) \method{relevel}{labelled}(x, \dots) reLabelled(object) combineLabels(\dots) } \arguments{ \item{x}{ any object (for \code{plotmathTranslate} is a character string). For \code{relevel} is a \code{factor} variable. For \code{prList} is a named list. For \code{putHfig} is a graphical object for which a \code{print} method will render the graphic (e.g., a \code{ggplot2} or \code{plotly} object). } \item{self}{lgoical, where to interact with the object or its components} \item{units}{ set to \code{TRUE} to append the \code{'units'} attribute (if present) to the returned label. The \code{'units'} are surrounded by brackets. For \code{labelPlotmath} and \code{labelLatex} is a character string containing the units of measurement. When \code{plot} is \code{TRUE}, \code{units} defaults to \code{TRUE}. } \item{plot}{ set to \code{TRUE} to return a label suitable for \R's \code{plotmath} facility (returns an expression instead of a character string) if R is in effect. If \code{units} is also \code{TRUE}, and if both \code{'label'} and \code{'units'} attributes are present, the \code{'units'} will appear after the label but in smaller type and will not be surrounded by brackets. } \item{default}{ if \code{x} does not have a \code{'label'} attribute and \code{default} (a character string) is specified, the label will be taken as \code{default}. For \code{labelLatex} the \code{default} is the name of the first argument if it is a variable and not a label. } \item{grid}{ Currently \R's \code{lattice} and \code{grid} functions do not support \code{plotmath} expressions for \code{xlab} and \code{ylab} arguments. When using \code{lattice} functions in \R, set the argument \code{grid} to \code{TRUE} so that \code{labelPlotmath} can return an ordinary character string instead of an expression. } \item{html}{set to \code{TRUE} to use HTML formatting instead of plotmath expressions for constructing labels with units} \item{type}{for \code{Surv} objects specifies the type of element for which to restrict the search for a label} \item{label}{a character string containing a variable's label} \item{plotmath}{ set to \code{TRUE} to have \code{labelMathplot} return an expression for plotting using \R's \code{plotmath} facility. If \R is not in effect, an ordinary character string is returned. } \item{chexpr}{set to \code{TRUE} to have \code{labelPlotmath} return a character string of the form \code{"expression(...)"}} \item{size}{LaTeX size for \code{units}. Default is two sizes smaller than \code{label}, which assumes that the LaTeX \code{relsize} package is in use.} \item{hfill}{set to \code{TRUE} to right-justify \code{units} in the field. This is useful when multiple labels are being put into rows in a LaTeX \code{tabular} environment, and will cause a problem if the label is used in an environment where \code{hfill} is not appropriate.} \item{bold}{set to \code{TRUE} to have \code{labelLatex} put the \code{label} in bold face.} \item{double}{set to \code{TRUE} to represent backslash in LaTeX as four backslashes in place of two. This is needed if, for example, you need to convert the result using \code{as.formula}} \item{value}{ the label of the object, or "". } \item{object}{ a data frame } \item{\dots}{ a list of variables or expressions to be formed into a \code{list}. Ignored for \code{print.labelled}. For \code{relevel} is the \code{level} (a single character string) to become the new reference (first) category. For \code{putHfig} and \code{putHcap} represents one or more character strings that are pasted together, separated by a blank. } \item{file}{ the name of a file to which to write S source code. Default is \code{""}, meaning standard output. For \code{putHcap}, set \code{file} to \code{FALSE} to return a character vector instead of writing to \code{file}.} \item{append}{ set to \code{TRUE} to append code generated by \code{Label} to file \code{file}. Also used for \code{putHfig, putHcap}. } \item{labels}{ set to \code{FALSE} to make \code{llist} ignore the variables' \code{label} attribute and use the variables' names. } \item{lcap}{an optional vector of character strings corresponding to elements in \code{x} for \code{prList}. These contain long captions that do not appear in the table of contents but which are printed right after the short caption in the body, in the same font.} \item{htmlfig}{for \code{prList} set to \code{1} to use HTML markup by running the object names through \code{markupSpecs$html$cap} for figure captions. Set \code{htmlfig=2} to also preface the figure caption with \code{"### "} so that it will appear in the table of contents.} \item{after}{set to \code{TRUE} to have \code{prList} put names after the printed object instead of before} \item{scap}{a character string specifying the short (or possibly only) caption.} \item{extra}{an optional vector of character strings. When present the long caption will be put in the first column of an HTML table and the elements of \code{extra} in subsequent columns. This allows extra information to appear in the long caption in a way that is right-justified to the right of the flowing caption text.} \item{subsub}{set to \code{FALSE} to suppress \code{"### "} from being placed in front of the short caption. Set it to different character string to use that instead. Set it to \code{""} to ignore short captions entirely. For example to use second-level headings for the table of contents specify \code{subsub="## "}.} \item{hr}{applies if a caption is present. Specify \code{FALSE} to not put a horizontal line before the caption and figure.} \item{table}{set to \code{TRUE} to produce table captions instead of figure captions} \item{expcoll}{character string to be visible, with a clickable arrow following to allow initial hiding of a table and its captions. Cannot be used with \code{table=FALSE}.} } \value{ \code{label} returns the label attribute of x, if any; otherwise, "". \code{label} is used most often for the individual variables in data frames. The function \code{sas.get} copies labels over from SAS if they exist. } \seealso{ \code{\link{sas.get}}, \code{\link{describe}} } \examples{ age <- c(21,65,43) y <- 1:3 label(age) <- "Age in Years" plot(age, y, xlab=label(age)) data <- data.frame(age=age, y=y) label(data) label(data, self=TRUE) <- "A data frame" label(data, self=TRUE) x1 <- 1:10 x2 <- 10:1 label(x2) <- 'Label for x2' units(x2) <- 'mmHg' x2 x2[1:5] dframe <- data.frame(x1, x2) Label(dframe) labelLatex(x2, hfill=TRUE, bold=TRUE) labelLatex(label='Velocity', units='m/s') ##In these examples of llist, note that labels are printed after ##variable names, because of print.labelled a <- 1:3 b <- 4:6 label(b) <- 'B Label' llist(a,b) llist(a,b,d=0) llist(a,b,0) w <- llist(a, b>5, d=101:103) sapply(w, function(x){ hist(as.numeric(x), xlab=label(x)) # locator(1) ## wait for mouse click }) # Or: for(u in w) {hist(u); title(label(u))} } \keyword{attribute} \keyword{misc} \keyword{utilities} % Converted by Sd2Rd version 1.21. Hmisc/man/solvet.Rd0000644000176200001440000000076212243661443013705 0ustar liggesusers\name{solvet} \alias{solvet} \title{ solve Function with tol argument } \description{ A slightly modified version of \code{solve} that allows a tolerance argument for singularity (\code{tol}) which is passed to \code{qr}. } \usage{ solvet(a, b, tol=1e-09) } \arguments{ \item{a}{a square numeric matrix} \item{b}{a numeric vector or matrix} \item{tol}{tolerance for detecting linear dependencies in columns of \code{a}} } \seealso{ \code{\link{solve}} } \keyword{array} \keyword{algebra} Hmisc/man/transcan.Rd0000644000176200001440000014437514275454737014230 0ustar liggesusers\name{transcan} \alias{transcan} \alias{summary.transcan} \alias{print.transcan} \alias{plot.transcan} \alias{ggplot.transcan} \alias{impute.transcan} \alias{predict.transcan} \alias{Function} \alias{Function.transcan} \alias{fit.mult.impute} \alias{vcov.default} \alias{vcov.fit.mult.impute} \alias{[.transcan} \alias{invertTabulated} \title{ Transformations/Imputations using Canonical Variates } \description{ \code{transcan} is a nonlinear additive transformation and imputation function, and there are several functions for using and operating on its results. \code{transcan} automatically transforms continuous and categorical variables to have maximum correlation with the best linear combination of the other variables. There is also an option to use a substitute criterion - maximum correlation with the first principal component of the other variables. Continuous variables are expanded as restricted cubic splines and categorical variables are expanded as contrasts (e.g., dummy variables). By default, the first canonical variate is used to find optimum linear combinations of component columns. This function is similar to \code{\link[acepack]{ace}} except that transformations for continuous variables are fitted using restricted cubic splines, monotonicity restrictions are not allowed, and \code{NA}s are allowed. When a variable has any \code{NA}s, transformed scores for that variable are imputed using least squares multiple regression incorporating optimum transformations, or \code{NA}s are optionally set to constants. Shrinkage can be used to safeguard against overfitting when imputing. Optionally, imputed values on the original scale are also computed and returned. For this purpose, recursive partitioning or multinomial logistic models can optionally be used to impute categorical variables, using what is predicted to be the most probable category. By default, \code{transcan} imputes \code{NA}s with \dQuote{best guess} expected values of transformed variables, back transformed to the original scale. Values thus imputed are most like conditional medians assuming the transformations make variables' distributions symmetric (imputed values are similar to conditionl modes for categorical variables). By instead specifying \code{n.impute}, \code{transcan} does approximate multiple imputation from the distribution of each variable conditional on all other variables. This is done by sampling \code{n.impute} residuals from the transformed variable, with replacement (a la bootstrapping), or by default, using Rubin's approximate Bayesian bootstrap, where a sample of size n with replacement is selected from the residuals on n non-missing values of the target variable, and then a sample of size m with replacement is chosen from this sample, where m is the number of missing values needing imputation for the current multiple imputation repetition. Neither of these bootstrap procedures assume normality or even symmetry of residuals. For sometimes-missing categorical variables, optimal scores are computed by adding the \dQuote{best guess} predicted mean score to random residuals off this score. Then categories having scores closest to these predicted scores are taken as the random multiple imputations (\code{impcat = "rpart"} is not currently allowed with \code{n.impute}). The literature recommends using \code{n.impute = 5} or greater. \code{transcan} provides only an approximation to multiple imputation, especially since it \dQuote{freezes} the imputation model before drawing the multiple imputations rather than using different estimates of regression coefficients for each imputation. For multiple imputation, the \code{\link{aregImpute}} function provides a much better approximation to the full Bayesian approach while still not requiring linearity assumptions. When you specify \code{n.impute} to \code{transcan} you can use \code{fit.mult.impute} to re-fit any model \code{n.impute} times based on \code{n.impute} completed datasets (if there are any sometimes missing variables not specified to \code{transcan}, some observations will still be dropped from these fits). After fitting \code{n.impute} models, \code{fit.mult.impute} will return the fit object from the last imputation, with \code{coefficients} replaced by the average of the \code{n.impute} coefficient vectors and with a component \code{var} equal to the imputation-corrected variance-covariance matrix. \code{fit.mult.impute} can also use the object created by the \code{\link[mice]{mice}} function in the \pkg{mice} library to draw the multiple imputations, as well as objects created by \code{\link{aregImpute}}. The following components of fit objects are also replaced with averages over the \code{n.impute} model fits: \code{linear.predictors}, \code{fitted.values}, \code{stats}, \code{means}, \code{icoef}, \code{scale}, \code{center}, \code{y.imputed}. The \code{\link{summary}} method for \code{transcan} prints the function call, \eqn{R^2} achieved in transforming each variable, and for each variable the coefficients of all other transformed variables that are used to estimate the transformation of the initial variable. If \code{imputed=TRUE} was used in the call to transcan, also uses the \code{describe} function to print a summary of imputed values. If \code{long = TRUE}, also prints all imputed values with observation identifiers. There is also a simple function \code{print.transcan} which merely prints the transformation matrix and the function call. It has an optional argument \code{long}, which if set to \code{TRUE} causes detailed parameters to be printed. Instead of plotting while \code{transcan} is running, you can plot the final transformations after the fact using \code{plot.transcan} or \code{ggplot.transcan}, if the option \code{trantab = TRUE} was specified to \code{transcan}. If in addition the option \code{imputed = TRUE} was specified to \code{transcan}, \code{plot} and \code{ggplot} will show the location of imputed values (including multiples) along the axes. For \code{ggplot}, imputed values are shown as red plus signs. \code{\link{impute}} method for \code{transcan} does imputations for a selected original data variable, on the original scale (if \code{imputed=TRUE} was given to \code{transcan}). If you do not specify a variable to \code{impute}, it will do imputations for all variables given to \code{transcan} which had at least one missing value. This assumes that the original variables are accessible (i.e., they have been attached) and that you want the imputed variables to have the same names are the original variables. If \code{n.impute} was specified to \code{transcan} you must tell \code{\link{impute}} which \code{imputation} to use. Results are stored in \code{.GlobalEnv} when \code{list.out} is not specified (it is recommended to use \code{list.out=TRUE}). The \code{\link{predict}} method for \code{transcan} computes predicted variables and imputed values from a matrix of new data. This matrix should have the same column variables as the original matrix used with \code{transcan}, and in the same order (unless a formula was used with \code{transcan}). The \code{\link{Function}} function is a generic function generator. \code{Function.transcan} creates \R functions to transform variables using transformations created by \code{transcan}. These functions are useful for getting predicted values with predictors set to values on the original scale. The \code{\link{vcov}} methods are defined here so that imputation-corrected variance-covariance matrices are readily extracted from \code{fit.mult.impute} objects, and so that \code{fit.mult.impute} can easily compute traditional covariance matrices for individual completed datasets. The subscript method for \code{transcan} preserves attributes. The \code{invertTabulated} function does either inverse linear interpolation or uses sampling to sample qualifying x-values having y-values near the desired values. The latter is used to get inverse values having a reasonable distribution (e.g., no floor or ceiling effects) when the transformation has a flat or nearly flat segment, resulting in a many-to-one transformation in that region. Sampling weights are a combination of the frequency of occurrence of x-values that are within \code{tolInverse} times the range of \code{y} and the squared distance between the associated y-values and the target y-value (\code{aty}). } \usage{ transcan(x, method=c("canonical","pc"), categorical=NULL, asis=NULL, nk, imputed=FALSE, n.impute, boot.method=c('approximate bayesian', 'simple'), trantab=FALSE, transformed=FALSE, impcat=c("score", "multinom", "rpart"), mincut=40, inverse=c('linearInterp','sample'), tolInverse=.05, pr=TRUE, pl=TRUE, allpl=FALSE, show.na=TRUE, imputed.actual=c('none','datadensity','hist','qq','ecdf'), iter.max=50, eps=.1, curtail=TRUE, imp.con=FALSE, shrink=FALSE, init.cat="mode", nres=if(boot.method=='simple')200 else 400, data, subset, na.action, treeinfo=FALSE, rhsImp=c('mean','random'), details.impcat='', \dots) \method{summary}{transcan}(object, long=FALSE, digits=6, \dots) \method{print}{transcan}(x, long=FALSE, \dots) \method{plot}{transcan}(x, \dots) \method{ggplot}{transcan}(data, mapping, scale=FALSE, \dots, environment) \method{impute}{transcan}(x, var, imputation, name, pos.in, data, list.out=FALSE, pr=TRUE, check=TRUE, \dots) fit.mult.impute(formula, fitter, xtrans, data, n.impute, fit.reps=FALSE, dtrans, derived, vcovOpts=NULL, pr=TRUE, subset, \dots) \method{predict}{transcan}(object, newdata, iter.max=50, eps=0.01, curtail=TRUE, type=c("transformed","original"), inverse, tolInverse, check=FALSE, \dots) Function(object, \dots) \method{Function}{transcan}(object, prefix=".", suffix="", pos=-1, \dots) invertTabulated(x, y, freq=rep(1,length(x)), aty, name='value', inverse=c('linearInterp','sample'), tolInverse=0.05, rule=2) \method{vcov}{default}(object, regcoef.only=FALSE, \dots) \method{vcov}{fit.mult.impute}(object, regcoef.only=TRUE, intercepts='mid', \dots) } \arguments{ \item{x}{ a matrix containing continuous variable values and codes for categorical variables. The matrix must have column names (\code{dimnames}). If row names are present, they are used in forming the \code{names} attribute of imputed values if \code{imputed = TRUE}. \code{x} may also be a formula, in which case the model matrix is created automatically, using data in the calling frame. Advantages of using a formula are that \verb{categorical} variables can be determined automatically by a variable being a \code{\link{factor}} variable, and variables with two unique levels are modeled \verb{asis}. Variables with 3 unique values are considered to be \verb{categorical} if a formula is specified. For a formula you may also specify that a variable is to remain untransformed by enclosing its name with the identify function, e.g. \code{I(x3)}. The user may add other variable names to the \code{asis} and \code{categorical} vectors. For \code{invertTabulated}, \code{x} is a vector or a list with three components: the x vector, the corresponding vector of transformed values, and the corresponding vector of frequencies of the pair of original and transformed variables. For \code{print}, \code{plot}, \code{ggplot}, \code{impute}, and \code{predict}, \code{x} is an object created by \code{transcan}. } \item{formula}{ any \R model formula } \item{fitter}{ any \R, \code{rms}, modeling function (not in quotes) that computes a vector of \code{\link{coefficients}} and for which \code{\link{vcov}} will return a variance-covariance matrix. E.g., \code{fitter = \link{lm}}, \code{\link{glm}}, \code{\link[rms]{ols}}. At present models involving non-regression parameters (e.g., scale parameters in parametric survival models) are not handled fully. } \item{xtrans}{ an object created by \code{transcan}, \code{\link{aregImpute}}, or \code{\link[mice]{mice}} } \item{method}{ use \code{method="canonical"} or any abbreviation thereof, to use canonical variates (the default). \code{method="pc"} transforms a variable instead so as to maximize the correlation with the first principal component of the other variables. } \item{categorical}{ a character vector of names of variables in \code{x} which are categorical, for which the ordering of re-scored values is not necessarily preserved. If \code{categorical} is omitted, it is assumed that all variables are continuous (or binary). Set \code{categorical="*"} to treat all variables as categorical. } \item{asis}{ a character vector of names of variables that are not to be transformed. For these variables, the guts of \code{\link[stats]{lm.fit}} \code{method="qr"} is used to impute missing values. You may want to treat binary variables \verb{asis} (this is automatic if using a formula). If \code{imputed = TRUE}, you may want to use \samp{"categorical"} for binary variables if you want to force imputed values to be one of the original data values. Set \code{asis="*"} to treat all variables \verb{asis}. } \item{nk}{ number of knots to use in expanding each continuous variable (not listed in \code{asis}) in a restricted cubic spline function. Default is 3 (yielding 2 parameters for a variable) if \eqn{n < 30}, 4 if \eqn{30 <= n < 100}{30 \eq n < 100}, and 5 if \eqn{n \ge 100}{n >= 100} (4 parameters). } \item{imputed}{ Set to \code{TRUE} to return a list containing imputed values on the original scale. If the transformation for a variable is non-monotonic, imputed values are not unique. \code{transcan} uses the \code{\link{approx}} function, which returns the highest value of the variable with the transformed score equalling the imputed score. \code{imputed=TRUE} also causes original-scale imputed values to be shown as tick marks on the top margin of each graph when \code{show.na=TRUE} (for the final iteration only). For categorical predictors, these imputed values are passed through the \code{\link{jitter}} function so that their frequencies can be visualized. When \code{n.impute} is used, each \code{NA} will have \code{n.impute} tick marks. } \item{n.impute}{ number of multiple imputations. If omitted, single predicted expected value imputation is used. \code{n.impute=5} is frequently recommended. } \item{boot.method}{ default is to use the approximate Bayesian bootstrap (sample with replacement from sample with replacement of the vector of residuals). You can also specify \code{boot.method="simple"} to use the usual bootstrap one-stage sampling with replacement. } \item{trantab}{ Set to \code{TRUE} to add an attribute \code{trantab} to the returned matrix. This contains a vector of lists each with components \code{x} and \code{y} containing the unique values and corresponding transformed values for the columns of \code{x}. This is set up to be used easily with the \code{\link{approx}} function. You must specify \code{trantab=TRUE} if you want to later use the \code{predict.transcan} function with \code{type = "original"}. } \item{transformed}{ set to \code{TRUE} to cause \code{transcan} to return an object \code{transformed} containing the matrix of transformed variables } \item{impcat}{ This argument tells how to impute categorical variables on the original scale. The default is \code{impcat="score"} to impute the category whose canonical variate score is closest to the predicted score. Use \code{impcat="rpart"} to impute categorical variables using the values of all other transformed predictors in conjunction with the \code{\link[rpart]{rpart}} function. A better but somewhat slower approach is to use \code{impcat="multinom"} to fit a multinomial logistic model to the categorical variable, at the last iteraction of the \code{transcan} algorithm. This uses the \code{\link{multinom}} function in the \pkg{nnet} library of the \pkg{MASS} package (which is assumed to have been installed by the user) to fit a polytomous logistic model to the current working transformations of all the other variables (using conditional mean imputation for missing predictors). Multiple imputations are made by drawing multinomial values from the vector of predicted probabilities of category membership for the missing categorical values. } \item{mincut}{ If \code{imputed=TRUE}, there are categorical variables, and \code{impcat = "rpart"}, \code{mincut} specifies the lowest node size that will be allowed to be split. The default is 40. } \item{inverse}{ By default, imputed values are back-solved on the original scale using inverse linear interpolation on the fitted tabulated transformed values. This will cause distorted distributions of imputed values (e.g., floor and ceiling effects) when the estimated transformation has a flat or nearly flat section. To instead use the \code{invertTabulated} function (see above) with the \code{"sample"} option, specify \code{inverse="sample"}. } \item{tolInverse}{ the multiplyer of the range of transformed values, weighted by \code{freq} and by the distance measure, for determining the set of x values having y values within a tolerance of the value of \code{aty} in \code{invertTabulated}. For \code{predict.transcan}, \code{inverse} and \code{tolInverse} are obtained from options that were specified to \code{transcan} by default. Otherwise, if not specified by the user, these default to the defaults used to \code{invertTabulated}. } \item{pr}{ For \code{transcan}, set to \code{FALSE} to suppress printing \eqn{R^2} and shrinkage factors. Set \code{impute.transcan=FALSE} to suppress messages concerning the number of \code{NA} values imputed. Set \code{fit.mult.impute=FALSE} to suppress printing variance inflation factors accounting for imputation, rate of missing information, and degrees of freedom. } \item{pl}{ Set to \code{FALSE} to suppress plotting the final transformations with distribution of scores for imputed values (if \code{show.na=TRUE}). } \item{allpl}{ Set to \code{TRUE} to plot transformations for intermediate iterations. } \item{show.na}{ Set to \code{FALSE} to suppress the distribution of scores assigned to missing values (as tick marks on the right margin of each graph). See also \code{imputed}. } \item{imputed.actual}{ The default is \samp{"none"} to suppress plotting of actual vs. imputed values for all variables having any \code{NA} values. Other choices are \samp{"datadensity"} to use \code{\link{datadensity}} to make a single plot, \samp{"hist"} to make a series of back-to-back histograms, \samp{"qq"} to make a series of q-q plots, or \samp{"ecdf"} to make a series of empirical cdfs. For \code{imputed.actual="datadensity"} for example you get a rug plot of the non-missing values for the variable with beneath it a rug plot of the imputed values. When \code{imputed.actual} is not \samp{"none"}, \code{imputed} is automatically set to \code{TRUE}. } \item{iter.max}{ maximum number of iterations to perform for \code{transcan} or \code{predict}. For \code{\link{predict}}, only one iteration is used if there are no \code{NA} values in the data or if \code{imp.con} was used. } \item{eps}{ convergence criterion for \code{transcan} and \code{\link{predict}}. \code{eps} is the maximum change in transformed values from one iteration to the next. If for a given iteration all new transformations of variables differ by less than \code{eps} (with or without negating the transformation to allow for \dQuote{flipping}) from the transformations in the previous iteration, one more iteration is done for \code{transcan}. During this last iteration, individual transformations are not updated but coefficients of transformations are. This improves stability of coefficients of canonical variates on the right-hand-side. \code{eps} is ignored when \code{rhsImp="random"}. } \item{curtail}{ for \code{transcan}, causes imputed values on the transformed scale to be truncated so that their ranges are within the ranges of non-imputed transformed values. For \code{\link{predict}}, \code{curtail} defaults to \code{TRUE} to truncate predicted transformed values to their ranges in the original fit (\code{xt}). } \item{imp.con}{ for \code{transcan}, set to \code{TRUE} to impute \code{NA} values on the original scales with constants (medians or most frequent category codes). Set to a vector of constants to instead always use these constants for imputation. These imputed values are ignored when fitting the current working transformation for asingle variable. } \item{shrink}{ default is \code{FALSE} to use ordinary least squares or canonical variate estimates. For the purposes of imputing \code{NA}s, you may want to set \code{shrink=TRUE} to avoid overfitting when developing a prediction equation to predict each variables from all the others (see details below). } \item{init.cat}{ method for initializing scorings of categorical variables. Default is \samp{"mode"} to use a dummy variable set to 1 if the value is the most frequent value (this is the default). Use \samp{"random"} to use a random 0-1 variable. Set to \samp{"asis"} to use the original integer codes asstarting scores. } \item{nres}{ number of residuals to store if \code{n.impute} is specified. If the dataset has fewer than \code{nres} observations, all residuals are saved. Otherwise a random sample of the residuals of length \code{nres} without replacement is saved. The default for \code{nres} is higher if \code{boot.method="approximate bayesian"}. } \item{data}{ Data frame used to fill the formula. For \code{ggplot} is the result of \code{transcan} with \code{trantab=TRUE}. } \item{subset}{ an integer or logical vector specifying the subset of observations to fit } \item{na.action}{ These may be used if \code{x} is a formula. The default \code{na.action} is \code{na.retain} (defined by \code{transcan}) which keeps all observations with any \code{NA} values. For \code{impute.transcan}, \code{data} is a data frame to use as the source of variables to be imputed, rather than using \code{pos.in}. For \code{fit.mult.impute}, \code{data} is mandatory and is a data frame containing the data to be used in fitting the model but before imputations are applied. Variables omitted from \code{data} are assumed to be available from frame1 and do not need to be imputed. } \item{treeinfo}{ Set to \code{TRUE} to get additional information printed when \code{impcat="rpart"}, such as the predicted probabilities of category membership. } \item{rhsImp}{ Set to \samp{"random"} to use random draw imputation when a sometimes missing variable is moved to be a predictor of other sometimes missing variables. Default is \code{rhsImp="mean"}, which uses conditional mean imputation on the transformed scale. Residuals used are residuals from the transformed scale. When \samp{"random"} is used, \code{transcan} runs 5 iterations and ignores \code{eps}. } \item{details.impcat}{ set to a character scalar that is the name of a category variable to include in the resulting \code{transcan} object an element \code{details.impcat} containing details of how the categorical variable was multiply imputed. } \item{\dots}{ arguments passed to \code{\link{scat1d}} or to the \code{fitter} function (for \code{fit.mult.impute}). For \code{ggplot.transcan}, these arguments are passed to \code{facet_wrap}, e.g. \code{ncol=2}. } \item{long}{ for \code{\link{summary}}, set to \code{TRUE} to print all imputed values. For \code{\link{print}}, set to \code{TRUE} to print details of transformations/imputations. } \item{digits}{ number of significant digits for printing values by \code{\link{summary}} } \item{scale}{for \code{ggplot.transcan} set \code{scale=TRUE} to scale transformed values to [0,1] before plotting.} \item{mapping,environment}{not used; needed because of rules about generics} \item{var}{ For \code{\link{impute}}, is a variable that was originally a column in \code{x}, for which imputated values are to be filled in. \code{imputed=TRUE} must have been used in \code{transcan}. Omit \code{var} to impute all variables, creating new variables in position \code{pos} (see \code{\link{assign}}). } \item{imputation}{ specifies which of the multiple imputations to use for filling in \code{NA} values } \item{name}{ name of variable to impute, for \code{\link{impute}} function. Default is character string version of the second argument (\code{var}) in the call to \code{\link{impute}}. For \code{invertTabulated}, is the name of variable being transformed (used only for warning messages). } \item{pos.in}{ location as defined by \code{\link{assign}} to find variables that need to be imputed, when all variables are to be imputed automatically by \code{impute.transcan} (i.e., when no input variable name is specified). Default is position that contains the first variable to be imputed. } \item{list.out}{ If \code{var} is not specified, you can set \code{list.out=TRUE} to have \code{impute.transcan} return a list containing variables with needed values imputed. This list will contain a single imputation. Variables not needing imputation are copied to the list as-is. You can use this list for analysis just like a data frame. } \item{check}{ set to \code{FALSE} to suppress certain warning messages } \item{newdata}{ a new data matrix for which to compute transformed variables. Categorical variables must use the same integer codes as were used in the call to \code{transcan}. If a formula was originally specified to \code{transcan} (instead of a data matrix), \code{newdata} is optional and if given must be a data frame; a model frame is generated automatically from the previous formula. The \code{na.action} is handled automatically, and the levels for factor variables must be the same and in the same order as were used in the original variables specified in the formula given to \code{transcan}. } \item{fit.reps}{ set to \code{TRUE} to save all fit objects from the fit for each imputation in \code{fit.mult.impute}. Then the object returned will have a component \code{fits} which is a list whose i'th element is the i'th fit object. } \item{dtrans}{ provides an approach to creating derived variables from a single filled-in dataset. The function specified as \code{dtrans} can even reshape the imputed dataset. An example of such usage is fitting time-dependent covariates in a Cox model that are created by \dQuote{start,stop} intervals. Imputations may be done on a one record per subject data frame that is converted by \code{dtrans} to multiple records per subject. The imputation can enforce consistency of certain variables across records so that for example a missing value of sex will not be imputed as \samp{male} for one of the subject's records and \samp{female} as another. An example of how \code{dtrans} might be specified is \code{dtrans=function(w) \{w$age <- w$years + w$months/12; w\}} where \code{months} might havebeen imputed but \code{years} was never missing. An outline for using `dtrans` to impute missing baseline variables in a longitudinal analysis appears in Details below. } \item{derived}{ an expression containing \R expressions for computing derived variables that are used in the model formula. This is useful when multiple imputations are done for component variables but the actual model uses combinations of these (e.g., ratios or other derivations). For a single derived variable you can specify for example \code{derived=expression(ratio <- weight/height)}. For multiple derived variables use the form \code{derived=expression(\{ratio <- weight/height; product <- weight*height\})} or put the expression on separate input lines. To monitor the multiply-imputed derived variables you can add to the \code{expression} a command such as \code{print(describe(ratio))}. See the example below. Note that \code{derived} is not yet implemented. } \item{vcovOpts}{a list of named additional arguments to pass to the \code{vcov} method for \code{fitter}. Useful for \code{orm} models for retaining all intercepts (\code{vcovOpts=list(intercepts='all')}) instead of just the middle one.} \item{type}{ By default, the matrix of transformed variables is returned, with imputed values on the transformed scale. If you had specified \code{trantab=TRUE} to \code{transcan}, specifying \code{type="original"} does the table look-ups with linear interpolation to return the input matrix \code{x} but with imputed values on the original scale inserted for \code{NA} values. For categorical variables, the method used here is to select the category code having a corresponding scaled value closest to the predicted transformed value. This corresponds to the default \code{impcat}. Note: imputed values thus returned when \code{type="original"} are single expected value imputations even in \code{n.impute} is given. } \item{object}{ an object created by \code{transcan}, or an object to be converted to \R function code, typically a model fit object of some sort } \item{prefix, suffix}{ When creating separate \R functions for each variable in \code{x}, the name of the new function will be \code{prefix} placed in front of the variable name, and \code{suffix} placed in back of the name. The default is to use names of the form \samp{.varname}, where varname is the variable name. } \item{pos}{ position as in \code{\link{assign}} at which to store new functions (for \code{\link{Function}}). Default is \code{pos=-1}. } \item{y}{ a vector corresponding to \code{x} for \code{invertTabulated}, if its first argument \code{x} is not a list } \item{freq}{ a vector of frequencies corresponding to cross-classified \code{x} and \code{y} if \code{x} is not a list. Default is a vector of ones. } \item{aty}{ vector of transformed values at which inverses are desired } \item{rule}{ see \code{\link{approx}}. \code{transcan} assumes \code{rule} is always 2. } \item{regcoef.only}{ set to \code{TRUE} to make \code{vcov.default} delete positions in the covariance matrix for any non-regression coefficients (e.g., log scale parameter from \code{\link[rms]{psm}} or \code{\link{survreg}}) } \item{intercepts}{this is primarily for \code{\link[rms]{orm}} objects. Set to \code{"none"} to discard all intercepts from the covariance matrix, or to \code{"all"} or \code{"mid"} to keep all elements generated by \code{orm} (\code{orm} only outputs the covariance matrix for the intercept corresponding to the median). You can also set \code{intercepts} to a vector of subscripts for selecting particular intercepts in a multi-intercept model.} } \value{ For \code{transcan}, a list of class \samp{transcan} with elements \item{call}{ (with the function call)} \item{iter}{ (number of iterations done)} \item{rsq, rsq.adj}{ containing the \eqn{R^2}{R-square}s and adjusted \eqn{R^2}{R-square}s achieved in predicting each variable from all the others } \item{categorical}{ the values supplied for \code{categorical} } \item{asis}{ the values supplied for \code{asis} } \item{coef}{ the within-variable coefficients used to compute the first canonical variate } \item{xcoef}{ the (possibly shrunk) across-variables coefficients of the first canonical variate that predicts each variable in-turn. } \item{parms}{ the parameters of the transformation (knots for splines, contrast matrix for categorical variables) } \item{fillin}{ the initial estimates for missing values (\code{NA} if variable never missing) } \item{ranges}{ the matrix of ranges of the transformed variables (min and max in first and secondrow) } \item{scale}{ a vector of scales used to determine convergence for a transformation. } \item{formula}{ the formula (if \code{x} was a formula) } , and optionally a vector of shrinkage factors used for predicting each variable from the others. For \code{asis} variables, the scale is the average absolute difference about the median. For other variables it is unity, since canonical variables are standardized. For \code{xcoef}, row i has the coefficients to predict transformed variable i, with the column for the coefficient of variable i set to \code{NA}. If \code{imputed=TRUE} was given, an optional element \code{imputed} also appears. This is a list with the vector of imputed values (on the original scale) for each variable containing \code{NA}s. Matrices rather than vectors are returned if \code{n.impute} is given. If \code{trantab=TRUE}, the \code{trantab} element also appears, as described above. If \code{n.impute > 0}, \code{transcan} also returns a list \code{residuals} that can be used for future multiple imputation. \code{impute} returns a vector (the same length as \code{var}) of class \samp{impute} with \code{NA} values imputed. \code{predict} returns a matrix with the same number of columns or variables as were in \code{x}. \code{fit.mult.impute} returns a fit object that is a modification of the fit object created by fitting the completed dataset for the final imputation. The \code{var} matrix in the fit object has the imputation-corrected variance-covariance matrix. \code{coefficients} is the average (over imputations) of the coefficient vectors, \code{variance.inflation.impute} is a vector containing the ratios of the diagonals of the between-imputation variance matrix to the diagonals of the average apparent (within-imputation) variance matrix. \code{missingInfo} is \cite{Rubin's rate of missing information} and \code{dfmi} is \cite{Rubin's degrees of freedom for a t-statistic} for testing a single parameter. The last two objects are vectors corresponding to the diagonal of the variance matrix. The class \code{"fit.mult.impute"} is prepended to the other classes produced by the fitting function. \code{fit.mult.impute} stores \code{intercepts} attributes in the coefficient matrix and in \code{var} for \code{orm} fits. } \section{Side Effects}{ prints, plots, and \code{impute.transcan} creates new variables. } \details{ The starting approximation to the transformation for each variable is taken to be the original coding of the variable. The initial approximation for each missing value is taken to be the median of the non-missing values for the variable (for continuous ones) or the most frequent category (for categorical ones). Instead, if \code{imp.con} is a vector, its values are used for imputing \code{NA} values. When using each variable as a dependent variable, \code{NA} values on that variable cause all observations to be temporarily deleted. Once a new working transformation is found for the variable, along with a model to predict that transformation from all the other variables, that latter model is used to impute \code{NA} values in the selected dependent variable if \code{imp.con} is not specified. When that variable is used to predict a new dependent variable, the current working imputed values are inserted. Transformations are updated after each variable becomes a dependent variable, so the order of variables on \code{x} could conceivably make a difference in the final estimates. For obtaining out-of-sample predictions/transformations, \code{\link{predict}} uses the same iterative procedure as \code{transcan} for imputation, with the same starting values for fill-ins as were used by \code{transcan}. It also (by default) uses a conservative approach of curtailing transformed variables to be within the range of the original ones. Even when \code{method = "pc"} is specified, canonical variables are used for imputing missing values. Note that fitted transformations, when evaluated at imputed variable values (on the original scale), will not precisely match the transformed imputed values returned in \code{xt}. This is because \code{transcan} uses an approximate method based on linear interpolation to back-solve for imputed values on the original scale. Shrinkage uses the method of \cite{Van Houwelingen and Le Cessie (1990)} (similar to \cite{Copas, 1983}). The shrinkage factor is \deqn{\frac{1-\frac{(1-R2)(n-1)}{n-k-1}}{R2}}{% [1 - (1 - R2)(n - 1)/(n - k - 1)]/R2} where R2 is the apparent \eqn{R^2}{R-square}d for predicting the variable, n is the number of non-missing values, and k is the effective number of degrees of freedom (aside from intercepts). A heuristic estimate is used for k: \code{A - 1 + sum(max(0,Bi - 1))/m + m}, where A is the number of d.f. required to represent the variable being predicted, the Bi are the number of columns required to represent all the other variables, and m is the number of all other variables. Division by m is done because the transformations for the other variables are fixed at their current transformations the last time they were being predicted. The \eqn{+ m} term comes from the number of coefficients estimated on the right hand side, whether by least squares or canonical variates. If a shrinkage factor is negative, it is set to 0. The shrinkage factor is the ratio of the adjusted \eqn{R^2}{R-square}d to the ordinary \eqn{R^2}{R-square}d. The adjusted \eqn{R^2}{R-square}d is \deqn{1-\frac{(1-R2)(n-1)}{n-k-1}}{ 1 - (1 - R2)(n - 1)/(n - k - 1)} which is also set to zero if it is negative. If \code{shrink=FALSE} and the adjusted \eqn{R^2}{R-square}s are much smaller than the ordinary \eqn{R^2}{R-square}s, you may want to run \code{transcan} with \code{shrink=TRUE}. Canonical variates are scaled to have variance of 1.0, by multiplying canonical coefficients from \code{\link{cancor}} by \eqn{\sqrt{n-1}}{sqrt(n - 1)}. When specifying a non-\pkg{rms} library fitting function to \code{fit.mult.impute} (e.g., \code{\link{lm}}, \code{\link{glm}}), running the result of \code{fit.mult.impute} through that fit's \code{\link{summary}} method will not use the imputation-adjusted variances. You may obtain the new variances using \code{fit$var} or \code{vcov(fit)}. When you specify a \pkg{rms} function to \code{fit.mult.impute} (e.g. \code{\link[rms]{lrm}}, \code{\link[rms]{ols}}, \code{\link[rms]{cph}}, \code{\link[rms]{psm}}, \code{\link[rms]{bj}}, \code{\link[rms]{Rq}}, \code{\link[rms]{Gls}}, \code{\link[rms]{Glm}}), automatically computed transformation parameters (e.g., knot locations for \code{\link[rms]{rcs}}) that are estimated for the first imputation are used for all other imputations. This ensures that knot locations will not vary, which would change the meaning of the regression coefficients. Warning: even though \code{fit.mult.impute} takes imputation into account when estimating variances of regression coefficient, it does not take into account the variation that results from estimation of the shapes and regression coefficients of the customized imputation equations. Specifying \code{shrink=TRUE} solves a small part of this problem. To fully account for all sources of variation you should consider putting the \code{transcan} invocation inside a bootstrap or loop, if execution time allows. Better still, use \code{\link{aregImpute}} or a package such as as \pkg{mice} that uses real Bayesian posterior realizations to multiply impute missing values correctly. It is strongly recommended that you use the \pkg{Hmisc} \code{\link{naclus}} function to determine is there is a good basis for imputation. \code{\link{naclus}} will tell you, for example, if systolic blood pressure is missing whenever diastolic blood pressure is missing. If the only variable that is well correlated with diastolic bp is systolic bp, there is no basis for imputing diastolic bp in this case. At present, \code{predict} does not work with multiple imputation. When calling \code{fit.mult.impute} with \code{\link{glm}} as the \code{fitter} argument, if you need to pass a \code{family} argument to \code{\link{glm}} do it by quoting the family, e.g., \code{family="binomial"}. \code{fit.mult.impute} will not work with proportional odds models when regression imputation was used (as opposed to predictive mean matching). That's because regression imputation will create values of the response variable that did not exist in the dataset, altering the intercept terms in the model. You should be able to use a variable in the formula given to \code{fit.mult.impute} as a numeric variable in the regression model even though it was a factor variable in the invocation of \code{transcan}. Use for example \code{fit.mult.impute(y ~ codes(x), lrm, trans)} (thanks to Trevor Thompson \email{trevor@hp5.eushc.org}). Here is an outline of the steps necessary to impute baseline variables using the \code{dtrans} argument, when the analysis to be repeated by \code{fit.mult.impute} is a longitudinal analysis (using e.g. \code{Gls}). \enumerate{ \item Create a one row per subject data frame containing baseline variables plus follow-up variables that are assigned to windows. For example, you may have dozens of repeated measurements over years but you capture the measurements at the times measured closest to 1, 2, and 3 years after study entry \item Make sure the dataset contains the subject ID \item This dataset becomes the one passed to \code{aregImpute} as \code{data=}. You will be imputing missing baseline variables from follow-up measurements defined at fixed times. \item Have another dataset with all the non-missing follow-up values on it, one record per measurement time per subject. This dataset should not have the baseline variables on it, and the follow-up measurements should not be named the same as the baseline variable(s); the subject ID must also appear \item Add the dtrans argument to \code{fit.mult.impute} to define a function with one argument representing the one record per subject dataset with missing values filled it from the current imputation. This function merges the above 2 datasets; the returned value of this function is the merged data frame. \item This merged-on-the-fly dataset is the one handed by \code{fit.mult.impute} to your fitting function, so variable names in the formula given to \code{fit.mult.impute} must matched the names created by the merge } } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Kuhfeld, Warren F: The PRINQUAL Procedure. SAS/STAT User's Guide, Fourth Edition, Volume 2, pp. 1265--1323, 1990. Van Houwelingen JC, Le Cessie S: Predictive value of statistical models. Statistics in Medicine 8:1303--1325, 1990. Copas JB: Regression, prediction and shrinkage. JRSS B 45:311--354, 1983. He X, Shen L: Linear regression after spline transformation. Biometrika 84:474--481, 1997. Little RJA, Rubin DB: Statistical Analysis with Missing Data. New York: Wiley, 1987. Rubin DJ, Schenker N: Multiple imputation in health-care databases: An overview and some applications. Stat in Med 10:585--598, 1991. Faris PD, Ghali WA, et al:Multiple imputation versus data enhancement for dealing with missing data in observational health care outcome analyses. J Clin Epidem 55:184--191, 2002. } \seealso{ \code{\link{aregImpute}}, \code{\link{impute}}, \code{\link{naclus}}, \code{\link{naplot}}, \code{\link[acepack]{ace}}, \code{\link[acepack]{avas}}, \code{\link{cancor}}, \code{\link{prcomp}}, \code{\link{rcspline.eval}}, \code{\link{lsfit}}, \code{\link{approx}}, \code{\link{datadensity}}, \code{\link[mice]{mice}}, \code{\link[ggplot2]{ggplot}} } \examples{ \dontrun{ x <- cbind(age, disease, blood.pressure, pH) #cbind will convert factor object `disease' to integer par(mfrow=c(2,2)) x.trans <- transcan(x, categorical="disease", asis="pH", transformed=TRUE, imputed=TRUE) summary(x.trans) #Summary distribution of imputed values, and R-squares f <- lm(y ~ x.trans$transformed) #use transformed values in a regression #Now replace NAs in original variables with imputed values, if not #using transformations age <- impute(x.trans, age) disease <- impute(x.trans, disease) blood.pressure <- impute(x.trans, blood.pressure) pH <- impute(x.trans, pH) #Do impute(x.trans) to impute all variables, storing new variables under #the old names summary(pH) #uses summary.impute to tell about imputations #and summary.default to tell about pH overall # Get transformed and imputed values on some new data frame xnew newx.trans <- predict(x.trans, xnew) w <- predict(x.trans, xnew, type="original") age <- w[,"age"] #inserts imputed values blood.pressure <- w[,"blood.pressure"] Function(x.trans) #creates .age, .disease, .blood.pressure, .pH() #Repeat first fit using a formula x.trans <- transcan(~ age + disease + blood.pressure + I(pH), imputed=TRUE) age <- impute(x.trans, age) predict(x.trans, expand.grid(age=50, disease="pneumonia", blood.pressure=60:260, pH=7.4)) z <- transcan(~ age + factor(disease.code), # disease.code categorical transformed=TRUE, trantab=TRUE, imputed=TRUE, pl=FALSE) ggplot(z, scale=TRUE) plot(z$transformed) } # Multiple imputation and estimation of variances and covariances of # regression coefficient estimates accounting for imputation set.seed(1) x1 <- factor(sample(c('a','b','c'),100,TRUE)) x2 <- (x1=='b') + 3*(x1=='c') + rnorm(100) y <- x2 + 1*(x1=='c') + rnorm(100) x1[1:20] <- NA x2[18:23] <- NA d <- data.frame(x1,x2,y) n <- naclus(d) plot(n); naplot(n) # Show patterns of NAs f <- transcan(~y + x1 + x2, n.impute=10, shrink=FALSE, data=d) options(digits=3) summary(f) f <- transcan(~y + x1 + x2, n.impute=10, shrink=TRUE, data=d) summary(f) h <- fit.mult.impute(y ~ x1 + x2, lm, f, data=d) # Add ,fit.reps=TRUE to save all fit objects in h, then do something like: # for(i in 1:length(h$fits)) print(summary(h$fits[[i]])) diag(vcov(h)) h.complete <- lm(y ~ x1 + x2, na.action=na.omit) h.complete diag(vcov(h.complete)) # Note: had the rms ols function been used in place of lm, any # function run on h (anova, summary, etc.) would have automatically # used imputation-corrected variances and covariances # Example demonstrating how using the multinomial logistic model # to impute a categorical variable results in a frequency # distribution of imputed values that matches the distribution # of non-missing values of the categorical variable \dontrun{ set.seed(11) x1 <- factor(sample(letters[1:4], 1000,TRUE)) x1[1:200] <- NA table(x1)/sum(table(x1)) x2 <- runif(1000) z <- transcan(~ x1 + I(x2), n.impute=20, impcat='multinom') table(z$imputed$x1)/sum(table(z$imputed$x1)) # Here is how to create a completed dataset d <- data.frame(x1, x2) z <- transcan(~x1 + I(x2), n.impute=5, data=d) imputed <- impute(z, imputation=1, data=d, list.out=TRUE, pr=FALSE, check=FALSE) sapply(imputed, function(x)sum(is.imputed(x))) sapply(imputed, function(x)sum(is.na(x))) } # Example where multiple imputations are for basic variables and # modeling is done on variables derived from these set.seed(137) n <- 400 x1 <- runif(n) x2 <- runif(n) y <- x1*x2 + x1/(1+x2) + rnorm(n)/3 x1[1:5] <- NA d <- data.frame(x1,x2,y) w <- transcan(~ x1 + x2 + y, n.impute=5, data=d) # Add ,show.imputed.actual for graphical diagnostics \dontrun{ g <- fit.mult.impute(y ~ product + ratio, ols, w, data=data.frame(x1,x2,y), derived=expression({ product <- x1*x2 ratio <- x1/(1+x2) print(cbind(x1,x2,x1*x2,product)[1:6,])})) } # Here's a method for creating a permanent data frame containing # one set of imputed values for each variable specified to transcan # that had at least one NA, and also containing all the variables # in an original data frame. The following is based on the fact # that the default output location for impute.transcan is # given by the global environment \dontrun{ xt <- transcan(~. , data=mine, imputed=TRUE, shrink=TRUE, n.impute=10, trantab=TRUE) attach(mine, use.names=FALSE) impute(xt, imputation=1) # use first imputation # omit imputation= if using single imputation detach(1, 'mine2') } # Example of using invertTabulated outside transcan x <- c(1,2,3,4,5,6,7,8,9,10) y <- c(1,2,3,4,5,5,5,5,9,10) freq <- c(1,1,1,1,1,2,3,4,1,1) # x=5,6,7,8 with prob. .1 .2 .3 .4 when y=5 # Within a tolerance of .05*(10-1) all y's match exactly # so the distance measure does not play a role set.seed(1) # so can reproduce for(inverse in c('linearInterp','sample')) print(table(invertTabulated(x, y, freq, rep(5,1000), inverse=inverse))) # Test inverse='sample' when the estimated transformation is # flat on the right. First show default imputations set.seed(3) x <- rnorm(1000) y <- pmin(x, 0) x[1:500] <- NA for(inverse in c('linearInterp','sample')) { par(mfrow=c(2,2)) w <- transcan(~ x + y, imputed.actual='hist', inverse=inverse, curtail=FALSE, data=data.frame(x,y)) if(inverse=='sample') next # cat('Click mouse on graph to proceed\n') # locator(1) } } \keyword{smooth} \keyword{regression} \keyword{multivariate} \keyword{methods} \keyword{models} \concept{bootstrap} % Converted by Sd2Rd version 1.21. Hmisc/man/hidingTOC.Rd0000644000176200001440000000357214321004061014165 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hidingTOC.r \name{hidingTOC} \alias{hidingTOC} \title{Moving and Hiding Table of Contents} \usage{ hidingTOC( buttonLabel = "Contents", levels = 3, tocSide = c("right", "left"), buttonSide = c("right", "left"), posCollapse = c("margin", "top", "bottom"), hidden = FALSE ) } \arguments{ \item{buttonLabel}{the text on the button that hides and unhides the table of contents. Defaults to \code{Contents}.} \item{levels}{the max depth of the table of contents that it is desired to have control over the display of. (defaults to 3)} \item{tocSide}{which side of the page should the table of contents be placed on. Can be either \code{'right'} or \code{'left'}. Defaults to \code{'right'}} \item{buttonSide}{which side of the page should the button that hides the TOC be placed on. Can be either \code{'right'} or \code{'left'}. Defaults to \code{'right'}} \item{posCollapse}{if \code{'margin'} then display the depth select buttons vertically along the side of the page choosen by \code{buttonSide}. If \code{'top'} then display the depth select buttons horizontally under the button that hides the TOC. Defaults to \code{'margin'}. \code{'bottom'} is currently unimplemented.} \item{hidden}{Logical should the table of contents be hidden at page load Defaults to \code{FALSE}} } \value{ a HTML formated text string to be inserted into an markdown document } \description{ Moving and hiding table of contents for Rmd HTML documents } \details{ \code{hidingTOC} creates a table of contents in a Rmd document that can be hidden at the press of a button. It also generate buttons that allow the hiding or unhiding of the diffrent level depths of the table of contents. For \code{quarto}-related methods see \url{https://stackoverflow.com/questions/73999697}. } \examples{ \dontrun{ hidingTOC() } } \author{ Thomas Dupont } Hmisc/man/pstamp.Rd0000644000176200001440000000137112243661443013672 0ustar liggesusers\name{pstamp} \alias{pstamp} \title{Date/Time/Directory Stamp the Current Plot} \description{ Date-time stamp the current plot in the extreme lower right corner. Optionally add the current working directory and arbitrary other text to the stamp. } \usage{ pstamp(txt, pwd = FALSE, time. = TRUE) } \arguments{ \item{txt}{an optional single text string} \item{pwd}{set to \code{TRUE} to add the current working directory name to the stamp} \item{time.}{set to \code{FALSE} to use the date without the time} } \details{ Certain functions are not supported for S-Plus under Windows. For \R, results may not be satisfactory if \code{par(mfrow=)} is in effect. } \author{Frank Harrell} \examples{ plot(1:20) pstamp(pwd=TRUE, time=FALSE) } \keyword{aplot} Hmisc/man/partition.Rd0000644000176200001440000000230512243661443014375 0ustar liggesusers\name{partition} \alias{partition} \alias{partition.vector} \alias{partition.matrix} %- Also NEED an '\alias' for EACH other topic documented here. \title{Patitions an object into different sets} \description{ Partitions an object into subsets of length defined in the \code{sep} argument. } \usage{ partition.vector(x, sep, ...) partition.matrix(x, rowsep, colsep, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{object to be partitioned. } \item{sep}{determines how many elements should go into each set. The sum of \code{sep} should be equal to the length of \code{x}.} \item{rowsep}{determins how many rows should go into each set. The sum of \code{rowsep} must equal the number of rows in \code{x}.} \item{colsep}{determins how many columns should go into each set. The sum of \code{colsep} must equal the number of columns in \code{x}.} \item{\dots}{arguments used in other methods of \code{partition}.} } \value{ A list of equal length as \code{sep} containing the partitioned objects. } \author{Charles Dupont} \seealso{ \code{\link{split}}} \examples{ a <- 1:7 partition.vector(a, sep=c(1,3,2,1)) } % R documentation directory. \keyword{manip} Hmisc/man/gbayes.Rd0000644000176200001440000004216213747541704013652 0ustar liggesusers\name{gbayes} \alias{gbayes} \alias{plot.gbayes} \alias{gbayes2} \alias{gbayesMixPredNoData} \alias{gbayesMixPost} \alias{gbayesMixPowerNP} \alias{gbayes1PowerNP} \title{ Gaussian Bayesian Posterior and Predictive Distributions } \description{ \code{gbayes} derives the (Gaussian) posterior and optionally the predictive distribution when both the prior and the likelihood are Gaussian, and when the statistic of interest comes from a 2-sample problem. This function is especially useful in obtaining the expected power of a statistical test, averaging over the distribution of the population effect parameter (e.g., log hazard ratio) that is obtained using pilot data. \code{gbayes} is also useful for summarizing studies for which the statistic of interest is approximately Gaussian with known variance. An example is given for comparing two proportions using the angular transformation, for which the variance is independent of unknown parameters except for very extreme probabilities. A \code{plot} method is also given. This plots the prior, posterior, and predictive distributions on a single graph using a nice default for the x-axis limits and using the \code{labcurve} function for automatic labeling of the curves. \code{gbayes2} uses the method of Spiegelhalter and Freedman (1986) to compute the probability of correctly concluding that a new treatment is superior to a control. By this we mean that a 1-\code{alpha} normal theory-based confidence interval for the new minus old treatment effect lies wholly to the right of \code{delta.w}, where \code{delta.w} is the minimally worthwhile treatment effect (which can be zero to be consistent with ordinary null hypothesis testing, a method not always making sense). This kind of power function is averaged over a prior distribution for the unknown treatment effect. This procedure is applicable to the situation where a prior distribution is not to be used in constructing the test statistic or confidence interval, but is only used for specifying the distribution of \code{delta}, the parameter of interest. Even though \code{gbayes2} assumes that the test statistic has a normal distribution with known variance (which is strongly a function of the sample size in the two treatment groups), the prior distribution function can be completely general. Instead of using a step-function for the prior distribution as Spiegelhalter and Freedman used in their appendix, \code{gbayes2} uses the built-in \code{integrate} function for numerical integration. \code{gbayes2} also allows the variance of the test statistic to be general as long as it is evaluated by the user. The conditional power given the parameter of interest \code{delta} is \code{1 - pnorm((delta.w - delta)/sd + z)}, where z is the normal critical value corresponding to 1 - \code{alpha}/2. \code{gbayesMixPredNoData} derives the predictive distribution of a statistic that is Gaussian given \code{delta} when no data have yet been observed and when the prior is a mixture of two Gaussians. \code{gbayesMixPost} derives the posterior density, cdf, or posterior mean of \code{delta} given the statistic \code{x}, when the prior for \code{delta} is a mixture of two Gaussians and when \code{x} is Gaussian given \code{delta}. \code{gbayesMixPowerNP} computes the power for a test for \code{delta} > \code{delta.w} for the case where (1) a Gaussian prior or mixture of two Gaussian priors is used as the prior distribution, (2) this prior is used in forming the statistical test or credible interval, (3) no prior is used for the distribution of \code{delta} for computing power but instead a fixed single \code{delta} is given (as in traditional frequentist hypothesis tests), and (4) the test statistic has a Gaussian likelihood with known variance (and mean equal to the specified \code{delta}). \code{gbayesMixPowerNP} is handy where you want to use an earlier study in testing for treatment effects in a new study, but you want to mix with this prior a non-informative prior. The mixing probability \code{mix} can be thought of as the "applicability" of the previous study. As with \code{gbayes2}, power here means the probability that the new study will yield a left credible interval that is to the right of \code{delta.w}. \code{gbayes1PowerNP} is a special case of \code{gbayesMixPowerNP} when the prior is a single Gaussian. } \usage{ gbayes(mean.prior, var.prior, m1, m2, stat, var.stat, n1, n2, cut.prior, cut.prob.prior=0.025) \method{plot}{gbayes}(x, xlim, ylim, name.stat='z', \dots) gbayes2(sd, prior, delta.w=0, alpha=0.05, upper=Inf, prior.aux) gbayesMixPredNoData(mix=NA, d0=NA, v0=NA, d1=NA, v1=NA, what=c('density','cdf')) gbayesMixPost(x=NA, v=NA, mix=1, d0=NA, v0=NA, d1=NA, v1=NA, what=c('density','cdf','postmean')) gbayesMixPowerNP(pcdf, delta, v, delta.w=0, mix, interval, nsim=0, alpha=0.05) gbayes1PowerNP(d0, v0, delta, v, delta.w=0, alpha=0.05) } \arguments{ \item{mean.prior}{ mean of the prior distribution } \item{cut.prior,cut.prob.prior,var.prior}{ variance of the prior. Use a large number such as 10000 to effectively use a flat (noninformative) prior. Sometimes it is useful to compute the variance so that the prior probability that \code{stat} is greater than some impressive value \code{u} is only \code{alpha}. The correct \code{var.prior} to use is then \code{((u-mean.prior)/qnorm(1-alpha))^2}. You can specify \code{cut.prior=u} and \code{cut.prob.prior=alpha} (whose default is 0.025) in place of \code{var.prior} to have \code{gbayes} compute the prior variance in this manner. } \item{m1}{ sample size in group 1 } \item{m2}{ sample size in group 2 } \item{stat}{ statistic comparing groups 1 and 2, e.g., log hazard ratio, difference in means, difference in angular transformations of proportions } \item{var.stat}{ variance of \code{stat}, assumed to be known. \code{var.stat} should either be a constant (allowed if \code{n1} is not specified), or a function of two arguments which specify the sample sizes in groups 1 and 2. Calculations will be approximate when the variance is estimated from the data. } \item{x}{ an object returned by \code{gbayes} or the value of the statistic which is an estimator of delta, the parameter of interest } \item{sd}{ the standard deviation of the treatment effect } \item{prior}{ a function of possibly a vector of unknown treatment effects, returning the prior density at those values } \item{pcdf}{ a function computing the posterior CDF of the treatment effect \code{delta}, such as a function created by \code{gbayesMixPost} with \code{what="cdf"}. } \item{delta}{ a true unknown single treatment effect to detect } \item{v}{ the variance of the statistic \code{x}, e.g., \code{s^2 * (1/n1 + 1/n2)}. Neither \code{x} nor \code{v} need to be defined to \code{gbayesMixPost}, as they can be defined at run time to the function created by \code{gbayesMixPost}. } \item{n1}{ number of future observations in group 1, for obtaining a predictive distribution } \item{n2}{ number of future observations in group 2 } \item{xlim}{ vector of 2 x-axis limits. Default is the mean of the posterior plus or minus 6 standard deviations of the posterior. } \item{ylim}{ vector of 2 y-axis limits. Default is the range over combined prior and posterior densities. } \item{name.stat}{ label for x-axis. Default is \code{"z"}. } \item{...}{ optional arguments passed to \code{labcurve} from \code{plot.gbayes} } \item{delta.w}{ the minimum worthwhile treatment difference to detech. The default is zero for a plain uninteristing null hypothesis. } \item{alpha}{ type I error, or more accurately one minus the confidence level for a two-sided confidence limit for the treatment effect } \item{upper}{ upper limit of integration over the prior distribution multiplied by the normal likelihood for the treatment effect statistic. Default is infinity. } \item{prior.aux}{ argument to pass to \code{prior} from \code{integrate} through \code{gbayes2}. Inside of \code{power} the argument must be named \code{prior.aux} if it exists. You can pass multiple parameters by passing \code{prior.aux} as a list and pulling off elements of the list inside \code{prior}. This setup was used because of difficulties in passing \code{\dots} arguments through \code{integrate} for some situations. } \item{mix}{ mixing probability or weight for the Gaussian prior having mean \code{d0} and variance \code{v0}. \code{mix} must be between 0 and 1, inclusive. } \item{d0}{ mean of the first Gaussian distribution (only Gaussian for \code{gbayes1PowerNP} and is a required argument) } \item{v0}{ variance of the first Gaussian (only Gaussian for \code{gbayes1PowerNP} and is a required argument) } \item{d1}{ mean of the second Gaussian (if \code{mix} < 1) } \item{v1}{ variance of the second Gaussian (if \code{mix} < 1). Any of these last 5 arguments can be omitted to \code{gbayesMixPredNoData} as they can be provided at run time to the function created by \code{gbayesMixPredNoData}. } \item{what}{ specifies whether the predictive density or the CDF is to be computed. Default is \code{"density"}. } \item{interval}{ a 2-vector containing the lower and upper limit for possible values of the test statistic \code{x} that would result in a left credible interval exceeding \code{delta.w} with probability 1-\code{alpha}/2 } \item{nsim}{ defaults to zero, causing \code{gbayesMixPowerNP} to solve numerically for the critical value of \code{x}, then to compute the power accordingly. Specify a nonzero number such as 20000 for \code{nsim} to instead have the function estimate power by simulation. In this case 0.95 confidence limits on the estimated power are also computed. This approach is sometimes necessary if \code{uniroot} can't solve the equation for the critical value. }} \value{ \code{gbayes} returns a list of class \code{"gbayes"} containing the following names elements: \code{mean.prior},\code{var.prior},\code{mean.post}, \code{var.post}, and if \code{n1} is specified, \code{mean.pred} and \code{var.pred}. Note that \code{mean.pred} is identical to \code{mean.post}. \code{gbayes2} returns a single number which is the probability of correctly rejecting the null hypothesis in favor of the new treatment. \code{gbayesMixPredNoData} returns a function that can be used to evaluate the predictive density or cumulative distribution. \code{gbayesMixPost} returns a function that can be used to evaluate the posterior density or cdf. \code{gbayesMixPowerNP} returns a vector containing two values if \code{nsim} = 0. The first value is the critical value for the test statistic that will make the left credible interval > \code{delta.w}, and the second value is the power. If \code{nsim} > 0, it returns the power estimate and confidence limits for it if \code{nsim} > 0. The examples show how to use these functions. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{\code{\link{gbayesSeqSim}}} \references{ Spiegelhalter DJ, Freedman LS, Parmar MKB (1994): Bayesian approaches to randomized trials. JRSS A 157:357--416. Results for \code{gbayes} are derived from Equations 1, 2, 3, and 6. Spiegelhalter DJ, Freedman LS (1986): A predictive approach to selecting the size of a clinical trial, based on subjective clinical opinion. Stat in Med 5:1--13. Joseph, Lawrence and Belisle, Patrick (1997): Bayesian sample size determination for normal means and differences between normal means. The Statistician 46:209--226. Grouin, JM, Coste M, Bunouf P, Lecoutre B (2007): Bayesian sample size determination in non-sequential clinical trials: Statistical aspects and some regulatory considerations. Stat in Med 26:4914--4924. } \examples{ # Compare 2 proportions using the var stabilizing transformation # arcsin(sqrt((x+3/8)/(n+3/4))) (Anscombe), which has variance # 1/[4(n+.5)] m1 <- 100; m2 <- 150 deaths1 <- 10; deaths2 <- 30 f <- function(events,n) asin(sqrt((events+3/8)/(n+3/4))) stat <- f(deaths1,m1) - f(deaths2,m2) var.stat <- function(m1, m2) 1/4/(m1+.5) + 1/4/(m2+.5) cat("Test statistic:",format(stat)," s.d.:", format(sqrt(var.stat(m1,m2))), "\n") #Use unbiased prior with variance 1000 (almost flat) b <- gbayes(0, 1000, m1, m2, stat, var.stat, 2*m1, 2*m2) print(b) plot(b) #To get posterior Prob[parameter > w] use # 1-pnorm(w, b$mean.post, sqrt(b$var.post)) #If g(effect, n1, n2) is the power function to #detect an effect of 'effect' with samples size for groups 1 and 2 #of n1,n2, estimate the expected power by getting 1000 random #draws from the posterior distribution, computing power for #each value of the population effect, and averaging the 1000 powers #This code assumes that g will accept vector-valued 'effect' #For the 2-sample proportion problem just addressed, 'effect' #could be taken approximately as the change in the arcsin of #the square root of the probability of the event g <- function(effect, n1, n2, alpha=.05) { sd <- sqrt(var.stat(n1,n2)) z <- qnorm(1 - alpha/2) effect <- abs(effect) 1 - pnorm(z - effect/sd) + pnorm(-z - effect/sd) } effects <- rnorm(1000, b$mean.post, sqrt(b$var.post)) powers <- g(effects, 500, 500) hist(powers, nclass=35, xlab='Power') describe(powers) # gbayes2 examples # First consider a study with a binary response where the # sample size is n1=500 in the new treatment arm and n2=300 # in the control arm. The parameter of interest is the # treated:control log odds ratio, which has variance # 1/[n1 p1 (1-p1)] + 1/[n2 p2 (1-p2)]. This is not # really constant so we average the variance over plausible # values of the probabilities of response p1 and p2. We # think that these are between .4 and .6 and we take a # further short cut v <- function(n1, n2, p1, p2) 1/(n1*p1*(1-p1)) + 1/(n2*p2*(1-p2)) n1 <- 500; n2 <- 300 ps <- seq(.4, .6, length=100) vguess <- quantile(v(n1, n2, ps, ps), .75) vguess # 75\% # 0.02183459 # The minimally interesting treatment effect is an odds ratio # of 1.1. The prior distribution on the log odds ratio is # a 50:50 mixture of a vague Gaussian (mean 0, sd 100) and # an informative prior from a previous study (mean 1, sd 1) prior <- function(delta) 0.5*dnorm(delta, 0, 100)+0.5*dnorm(delta, 1, 1) deltas <- seq(-5, 5, length=150) plot(deltas, prior(deltas), type='l') # Now compute the power, averaged over this prior gbayes2(sqrt(vguess), prior, log(1.1)) # [1] 0.6133338 # See how much power is lost by ignoring the previous # study completely gbayes2(sqrt(vguess), function(delta)dnorm(delta, 0, 100), log(1.1)) # [1] 0.4984588 # What happens to the power if we really don't believe the treatment # is very effective? Let's use a prior distribution for the log # odds ratio that is uniform between log(1.2) and log(1.3). # Also check the power against a true null hypothesis prior2 <- function(delta) dunif(delta, log(1.2), log(1.3)) gbayes2(sqrt(vguess), prior2, log(1.1)) # [1] 0.1385113 gbayes2(sqrt(vguess), prior2, 0) # [1] 0.3264065 # Compare this with the power of a two-sample binomial test to # detect an odds ratio of 1.25 bpower(.5, odds.ratio=1.25, n1=500, n2=300) # Power # 0.3307486 # For the original prior, consider a new study with equal # sample sizes n in the two arms. Solve for n to get a # power of 0.9. For the variance of the log odds ratio # assume a common p in the center of a range of suspected # probabilities of response, 0.3. For this example we # use a zero null value and the uniform prior above v <- function(n) 2/(n*.3*.7) pow <- function(n) gbayes2(sqrt(v(n)), prior2) uniroot(function(n) pow(n)-0.9, c(50,10000))$root # [1] 2119.675 # Check this value pow(2119.675) # [1] 0.9 # Get the posterior density when there is a mixture of two priors, # with mixing probability 0.5. The first prior is almost # non-informative (normal with mean 0 and variance 10000) and the # second has mean 2 and variance 0.3. The test statistic has a value # of 3 with variance 0.4. f <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3) args(f) # Plot this density delta <- seq(-2, 6, length=150) plot(delta, f(delta), type='l') # Add to the plot the posterior density that used only # the almost non-informative prior lines(delta, f(delta, mix=1), lty=2) # The same but for an observed statistic of zero lines(delta, f(delta, mix=1, x=0), lty=3) # Derive the CDF instead of the density g <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3, what='cdf') # Had mix=0 or 1, gbayes1PowerNP could have been used instead # of gbayesMixPowerNP below # Compute the power to detect an effect of delta=1 if the variance # of the test statistic is 0.2 gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12)) # Do the same thing by simulation gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12), nsim=20000) # Compute by what factor the sample size needs to be larger # (the variance needs to be smaller) so that the power is 0.9 ratios <- seq(1, 4, length=50) pow <- single(50) for(i in 1:50) pow[i] <- gbayesMixPowerNP(g, 1, 0.2/ratios[i], interval=c(-10,12))[2] # Solve for ratio using reverse linear interpolation approx(pow, ratios, xout=0.9)$y # Check this by computing power gbayesMixPowerNP(g, 1, 0.2/2.1, interval=c(-10,12)) # So the study will have to be 2.1 times as large as earlier thought } \keyword{htest} \concept{study design} \concept{power} Hmisc/man/simRegOrd.Rd0000644000176200001440000001135013714234044014254 0ustar liggesusers\name{simRegOrd} \alias{simRegOrd} \title{Simulate Power for Adjusted Ordinal Regression Two-Sample Test} \description{ This function simulates the power of a two-sample test from a proportional odds ordinal logistic model for a continuous response variable- a generalization of the Wilcoxon test. The continuous data model is normal with equal variance. Nonlinear covariate adjustment is allowed, and the user can optionally specify discrete ordinal level overrides to the continuous response. For example, if the main response is systolic blood pressure, one can add two ordinal categories higher than the highest observed blood pressure to capture heart attack or death. } \usage{ simRegOrd(n, nsim=1000, delta=0, odds.ratio=1, sigma, p=NULL, x=NULL, X=x, Eyx, alpha=0.05, pr=FALSE) } \arguments{ \item{n}{combined sample size (both groups combined)} \item{nsim}{number of simulations to run} \item{delta}{difference in means to detect, for continuous portion of response variable} \item{odds.ratio}{odds ratio to detect for ordinal overrides of continuous portion} \item{sigma}{standard deviation for continuous portion of response} \item{p}{a vector of marginal cell probabilities which must add up to one. The \code{i}th element specifies the probability that a patient will be in response level \code{i} for the control arm for the discrete ordinal overrides.} \item{x}{optional covariate to adjust for - a vector of length \code{n}} \item{X}{a design matrix for the adjustment covariate \code{x} if present. This could represent for example \code{x} and \code{x^2} or cubic spline components.} \item{Eyx}{a function of \code{x} that provides the mean response for the control arm treatment} \item{alpha}{type I error} \item{pr}{set to \code{TRUE} to see iteration progress} } \value{ a list containing \code{n, delta, sigma, power, betas, se, pvals} where \code{power} is the estimated power (scalar), and \code{betas, se, pvals} are \code{nsim}-vectors containing, respectively, the ordinal model treatment effect estimate, standard errors, and 2-tailed p-values. When a model fit failed, the corresponding entries in \code{betas, se, pvals} are \code{NA} and \code{power} is the proportion of non-failed iterations for which the treatment p-value is significant at the \code{alpha} level. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{\code{\link{popower}}} \examples{ \dontrun{ ## First use no ordinal high-end category overrides, and compare power ## to t-test when there is no covariate n <- 100 delta <- .5 sd <- 1 require(pwr) power.t.test(n = n / 2, delta=delta, sd=sd, type='two.sample') # 0.70 set.seed(1) w <- simRegOrd(n, delta=delta, sigma=sd, pr=TRUE) # 0.686 ## Now do ANCOVA with a quadratic effect of a covariate n <- 100 x <- rnorm(n) w <- simRegOrd(n, nsim=400, delta=delta, sigma=sd, x=x, X=cbind(x, x^2), Eyx=function(x) x + x^2, pr=TRUE) w$power # 0.68 ## Fit a cubic spline to some simulated pilot data and use the fitted ## function as the true equation in the power simulation require(rms) N <- 1000 set.seed(2) x <- rnorm(N) y <- x + x^2 + rnorm(N, 0, sd=sd) f <- ols(y ~ rcs(x, 4), x=TRUE) n <- 100 j <- sample(1 : N, n, replace=n > N) x <- x[j] X <- f$x[j,] w <- simRegOrd(n, nsim=400, delta=delta, sigma=sd, x=x, X=X, Eyx=Function(f), pr=TRUE) w$power ## 0.70 ## Finally, add discrete ordinal category overrides and high end of y ## Start with no effect of treatment on these ordinal event levels (OR=1.0) w <- simRegOrd(n, nsim=400, delta=delta, odds.ratio=1, sigma=sd, x=x, X=X, Eyx=Function(f), p=c(.98, .01, .01), pr=TRUE) w$power ## 0.61 (0.3 if p=.8 .1 .1, 0.37 for .9 .05 .05, 0.50 for .95 .025 .025) ## Now assume that odds ratio for treatment is 2.5 ## First compute power for clinical endpoint portion of Y alone or <- 2.5 p <- c(.9, .05, .05) popower(p, odds.ratio=or, n=100) # 0.275 ## Compute power of t-test on continuous part of Y alone power.t.test(n = 100 / 2, delta=delta, sd=sd, type='two.sample') # 0.70 ## Note this is the same as the p.o. model power from simulation above ## Solve for OR that gives the same power estimate from popower popower(rep(.01, 100), odds.ratio=2.4, n=100) # 0.706 ## Compute power for continuous Y with ordinal override w <- simRegOrd(n, nsim=400, delta=delta, odds.ratio=or, sigma=sd, x=x, X=X, Eyx=Function(f), p=c(.9, .05, .05), pr=TRUE) w$power ## 0.72 } } \keyword{htest} \keyword{category} \concept{power} \concept{study design} \concept{ordinal logistic model} \concept{ordinal response} \concept{proportional odds model} Hmisc/man/summary.formula.Rd0000644000176200001440000011671514221603130015523 0ustar liggesusers\name{summary.formula} \alias{summary.formula} \alias{stratify} \alias{print.summary.formula.response} \alias{plot.summary.formula.response} \alias{latex.summary.formula.response} \alias{print.summary.formula.reverse} \alias{plot.summary.formula.reverse} \alias{latex.summary.formula.reverse} \alias{[.summary.formula.response} \alias{print.summary.formula.cross} \alias{latex.summary.formula.cross} \alias{formula.summary.formula.cross} \alias{na.retain} \alias{cumcategory} \alias{conTestkw} \alias{catTestchisq} \alias{ordTestpo} \title{Summarize Data for Making Tables and Plots} \description{ \code{summary.formula} summarizes the variables listed in an S formula, computing descriptive statistics (including ones in a user-specified function). The summary statistics may be passed to \code{print} methods, \code{plot} methods for making annotated dot charts, and \code{latex} methods for typesetting tables using LaTeX. \code{summary.formula} has three methods for computing descriptive statistics on univariate or multivariate responses, subsetted by categories of other variables. The method of summarization is specified in the parameter \code{method} (see details below). For the \code{response} and \code{cross} methods, the statistics used to summarize the data may be specified in a very flexible way (e.g., the geometric mean, 33rd percentile, Kaplan-Meier 2-year survival estimate, mixtures of several statistics). The default summary statistic for these methods is the mean (the proportion of positive responses for a binary response variable). The \code{cross} method is useful for creating data frames which contain summary statistics that are passed to \code{trellis} as raw data (to make multi-panel dot charts, for example). The \code{print} methods use the \code{print.char.matrix} function to print boxed tables. The right hand side of \code{formula} may contain \code{mChoice} (\dQuote{multiple choice}) variables. When \code{test=TRUE} each choice is tested separately as a binary categorical response. The \code{plot} method for \code{method="reverse"} creates a temporary function \code{Key} in frame 0 as is done by the \code{xYplot} and \code{Ecdf.formula} functions. After \code{plot} runs, you can type \code{Key()} to put a legend in a default location, or e.g. \code{Key(locator(1))} to draw a legend where you click the left mouse button. This key is for categorical variables, so to have the opportunity to put the key on the graph you will probably want to use the command \code{plot(object, which="categorical")}. A second function \code{Key2} is created if continuous variables are being plotted. It is used the same as \code{Key}. If the \code{which} argument is not specified to \code{plot}, two pages of plots will be produced. If you don't define \code{par(mfrow=)} yourself, \code{plot.summary.formula.reverse} will try to lay out a multi-panel graph to best fit all the individual dot charts for continuous variables. There is a subscripting method for objects created with \code{method="response"}. This can be used to print or plot selected variables or summary statistics where there would otherwise be too many on one page. \code{cumcategory} is a utility function useful when summarizing an ordinal response variable. It converts such a variable having \code{k} levels to a matrix with \code{k-1} columns, where column \code{i} is a vector of zeros and ones indicating that the categorical response is in level \code{i+1} or greater. When the left hand side of \code{formula} is \code{cumcategory(y)}, the default \code{fun} will summarize it by computing all of the relevant cumulative proportions. Functions \code{conTestkw}, \code{catTestchisq}, \code{ordTestpo} are the default statistical test functions for \code{summary.formula}. These defaults are: Wilcoxon-Kruskal-Wallis test for continuous variables, Pearson chi-square test for categorical variables, and the likelihood ratio chi-square test from the proportional odds model for ordinal variables. These three functions serve also as templates for the user to create her own testing functions that are self-defining in terms of how the results are printed or rendered in LaTeX, or plotted. } \usage{ \method{summary}{formula}(formula, data=NULL, subset=NULL, na.action=NULL, fun = NULL, method = c("response", "reverse", "cross"), overall = method == "response" | method == "cross", continuous = 10, na.rm = TRUE, na.include = method != "reverse", g = 4, quant = c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975), nmin = if (method == "reverse") 100 else 0, test = FALSE, conTest = conTestkw, catTest = catTestchisq, ordTest = ordTestpo, \dots) \method{[}{summary.formula.response}(x, i, j, drop=FALSE) \method{print}{summary.formula.response}(x, vnames=c('labels','names'), prUnits=TRUE, abbreviate.dimnames=FALSE, prefix.width, min.colwidth, formatArgs=NULL, markdown=FALSE, \dots) \method{plot}{summary.formula.response}(x, which = 1, vnames = c('labels','names'), xlim, xlab, pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), superposeStrata = TRUE, dotfont = 1, add = FALSE, reset.par = TRUE, main, subtitles = TRUE, \dots) \method{latex}{summary.formula.response}(object, title = first.word(deparse(substitute(object))), caption, trios, vnames = c('labels', 'names'), prn = TRUE, prUnits = TRUE, rowlabel = '', cdec = 2, ncaption = TRUE, \dots) \method{print}{summary.formula.reverse}(x, digits, prn = any(n != N), pctdig = 0, what=c('\%', 'proportion'), npct = c('numerator', 'both', 'denominator', 'none'), exclude1 = TRUE, vnames = c('labels', 'names'), prUnits = TRUE, sep = '/', abbreviate.dimnames = FALSE, prefix.width = max(nchar(lab)), min.colwidth, formatArgs=NULL, round=NULL, prtest = c('P','stat','df','name'), prmsd = FALSE, long = FALSE, pdig = 3, eps = 0.001, \dots) \method{plot}{summary.formula.reverse}(x, vnames = c('labels', 'names'), what = c('proportion', '\%'), which = c('both', 'categorical', 'continuous'), xlim = if(what == 'proportion') c(0,1) else c(0,100), xlab = if(what=='proportion') 'Proportion' else 'Percentage', pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE, dotfont = 1, main, prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001, conType = c('dot', 'bp', 'raw'), cex.means = 0.5, \dots) \method{latex}{summary.formula.reverse}(object, title = first.word(deparse(substitute(object))), digits, prn = any(n != N), pctdig = 0, what=c('\%', 'proportion'), npct = c("numerator", "both", "denominator", "slash", "none"), npct.size = 'scriptsize', Nsize = "scriptsize", exclude1 = TRUE, vnames=c("labels", "names"), prUnits = TRUE, middle.bold = FALSE, outer.size = "scriptsize", caption, rowlabel = "", insert.bottom = TRUE, dcolumn = FALSE, formatArgs=NULL, round = NULL, prtest = c('P', 'stat', 'df', 'name'), prmsd = FALSE, msdsize = NULL, long = dotchart, pdig = 3, eps = 0.001, auxCol = NULL, dotchart=FALSE, \dots) \method{print}{summary.formula.cross}(x, twoway = nvar == 2, prnmiss = any(stats$Missing > 0), prn = TRUE, abbreviate.dimnames = FALSE, prefix.width = max(nchar(v)), min.colwidth, formatArgs = NULL, \dots) \method{latex}{summary.formula.cross}(object, title = first.word(deparse(substitute(object))), twoway = nvar == 2, prnmiss = TRUE, prn = TRUE, caption=attr(object, "heading"), vnames=c("labels", "names"), rowlabel="", \dots) stratify(\dots, na.group = FALSE, shortlabel = TRUE) \method{formula}{summary.formula.cross}(x, \dots) cumcategory(y) conTestkw(group, x) catTestchisq(tab) ordTestpo(group, x) } \arguments{ \item{formula}{ An \R formula with additive effects. For \code{method="response"} or \code{"cross"}, the dependent variable has the usual connotation. For \code{method="reverse"}, the dependent variable is what is usually thought of as an independent variable, and it is one that is used to stratify all of the right hand side variables. For \code{method="response"} (only), the \code{formula} may contain one or more invocations of the \code{stratify} function whose arguments are defined below. This causes the entire analysis to be stratified by cross-classifications of the combined list of stratification factors. This stratification will be reflected as major column groupings in the resulting table, or as more response columns for plotting. If \code{formula} has no dependent variable \code{method="reverse"} is the only legal value and so \code{method} defaults to \code{"reverse"} in this case. } \item{x}{an object created by \code{summary.formula}. For \code{conTestkw} a numeric vector, and for \code{ordTestpo}, a numeric or factor variable that can be considered ordered} \item{y}{ a numeric, character, category, or factor vector for \code{cumcategory}. Is converted to a categorical variable is needed. } \item{drop}{ logical. If \code{TRUE} the result is coerced to the lowest possible dimension. } \item{data}{ name or number of a data frame. Default is the current frame. } \item{subset}{ a logical vector or integer vector of subscripts used to specify the subset of data to use in the analysis. The default is to use all observations in the data frame. } \item{na.action}{ function for handling missing data in the input data. The default is a function defined here called \code{na.retain}, which keeps all observations for processing, with missing variables or not. } \item{fun}{ function for summarizing data in each cell. Default is to take the mean of each column of the possibly multivariate response variable. You can specify \code{fun="\%"} to compute percentages (100 times the mean of a series of logical or binary variables). User--specified functions can also return a matrix. For example, you might compute quartiles on a bivariate response. Does not apply to \code{method="reverse"}. } \item{method}{ The default is \code{"response"}, in which case the response variable may be multivariate and any number of statistics may be used to summarize them. Here the responses are summarized separately for each of any number of independent variables. Continuous independent variables (see the \code{continuous} parameter below) are automatically stratified into \code{g} (see below) quantile groups (if you want to control the discretization for selected variables, use the \code{cut2} function on them). Otherwise, the data are subsetted by all levels of discrete right hand side variables. For multivariate responses, subjects are considered to be missing if any of the columns is missing. The \code{method="reverse"} option is typically used to make baseline characteristic tables, for example. The single left hand side variable must be categorical (e.g., treatment), and the right hand side variables are broken down one at a time by the "dependent" variable. Continuous variables are described by three quantiles (quartiles by default) along with outer quantiles (used only for scaling x-axes when plotting quartiles; all are used when plotting box-percentile plots), and categorical ones are described by counts and percentages. If there is no left hand side variable, \code{summary} assumes that there is only one group in the data, so that only one column of summaries will appear. If there is no dependent variable in \code{formula}, \code{method} defaults to \code{"reverse"} automatically. The \code{method="cross"} option allows for a multivariate dependent variable and for up to three independents. Continuous independent variables (those with at least \code{continuous} unique values) are automatically divided into \code{g} quantile groups. The independents are cross-classified, and marginal statistics may optionally be computed. The output of \code{summary.formula} in this case is a data frame containing the independent variable combinations (with levels of \code{"All"} corresponding to marginals) and the corresponding summary statistics in the matrix \code{S}. The output data frame is suitable for direct use in \code{trellis}. The \code{print} and \code{latex} typesetting methods for this method allows for a special two-way format if there are two right hand variables. } \item{overall}{ For \code{method="reverse"}, setting \code{overall=TRUE} makes a new column with overall statistics for the whole sample. For \code{method="cross"}, \code{overall=TRUE} (the default) results in all marginal statistics being computed. For \code{trellis} displays (usually multi-panel dot plots), these marginals just form other categories. For \code{"response"}, the default is \code{overall=TRUE}, causing a final row of global summary statistics to appear in tables and dot charts. If \code{test=TRUE} these marginal statistics are ignored in doing statistical tests. } \item{continuous}{ specifies the threshold for when a variable is considered to be continuous (when there are at least \code{continuous} unique values). \code{factor} variables are always considered to be categorical no matter how many levels they have. } \item{na.rm}{ \code{TRUE} (the default) to exclude \code{NA}s before passing data to \code{fun} to compute statistics, \code{FALSE} otherwise. \code{na.rm=FALSE} is useful if the response variable is a matrix and you do not wish to exclude a row of the matrix if any of the columns in that row are \code{NA}. \code{na.rm} also applies to summary statistic functions such as \code{smean.cl.normal}. For these \code{na.rm} defaults to \code{TRUE} unlike built-in functions. } \item{na.include}{ for \code{method="response"}, set \code{na.include=FALSE} to exclude missing values from being counted as their own category when subsetting the response(s) by levels of a categorical variable. For \code{method="reverse"} set \code{na.include=TRUE} to keep missing values of categorical variables from being excluded from the table. } \item{g}{ number of quantile groups to use when variables are automatically categorized with \code{method="response"} or \code{"cross"} using \code{cut2} } \item{nmin}{ if fewer than \code{nmin} observations exist in a category for \code{"response"} (over all strata combined), that category will be ignored. For \code{"reverse"}, for categories of the response variable in which there are less than or equal to \code{nmin} non-missing observations, the raw data are retained for later plotting in place of box plots. } \item{test}{ applies if \code{method="reverse"}. Set to \code{TRUE} to compute test statistics using tests specified in \code{conTest} and \code{catTest}. } \item{conTest}{ a function of two arguments (grouping variable and a continuous variable) that returns a list with components \code{P} (the computed P-value), \code{stat} (the test statistic, either chi-square or F), \code{df} (degrees of freedom), \code{testname} (test name), \code{statname} (statistic name), \code{namefun} (\code{"chisq", "fstat"}), an optional component \code{latexstat} (LaTeX representation of \code{statname}), an optional component \code{plotmathstat} (for R - the \code{plotmath} representation of \code{statname}, as a character string), and an optional component \code{note} that contains a character string note about the test (e.g., \code{"test not done because n < 5"}). \code{conTest} is applied to continuous variables on the right-hand-side of the formula when \code{method="reverse"}. The default uses the \code{spearman2} function to run the Wilcoxon or Kruskal-Wallis test using the F distribution. } \item{catTest}{ a function of a frequency table (an integer matrix) that returns a list with the same components as created by \code{conTest}. By default, the Pearson chi-square test is done, without continuity correction (the continuity correction would make the test conservative like the Fisher exact test). } \item{ordTest}{ a function of a frequency table (an integer matrix) that returns a list with the same components as created by \code{conTest}. By default, the Proportional odds likelihood ratio test is done. } \item{...}{ for \code{summary.formula} these are optional arguments for \code{cut2} when variables are automatically categorized. For \code{plot} methods these arguments are passed to \code{dotchart2}. For \code{Key} and \code{Key2} these arguments are passed to \code{key}, \code{text}, or \code{mtitle}. For \code{print} methods these are optional arguments to \code{print.char.matrix}. For \code{latex} methods these are passed to \code{latex.default}. One of the most important of these is \code{file}. Specifying \code{file=""} will cause LaTeX code to just be printed to standard output rather than be stored in a permanent file. } \item{object}{an object created by \code{summary.formula}} \item{quant}{ vector of quantiles to use for summarizing data with \code{method="reverse"}. This must be numbers between 0 and 1 inclusive and must include the numbers 0.5, 0.25, and 0.75 which are used for printing and for plotting quantile intervals. The outer quantiles are used for scaling the x-axes for such plots. Specify outer quantiles as \code{0} and \code{1} to scale the x-axes using the whole observed data ranges instead of the default (a 0.95 quantile interval). Box-percentile plots are drawn using all but the outer quantiles. } \item{vnames}{ By default, tables and plots are usually labeled with variable labels (see the \code{label} and \code{sas.get} functions). To use the shorter variable names, specify \code{vnames="name"}. } \item{pch}{ vector of plotting characters to represent different groups, in order of group levels. For \code{method="response"} the characters correspond to levels of the \code{stratify} variable if \code{superposeStrata=TRUE}, and if no \code{strata} are used or if \code{superposeStrata=FALSE}, the \code{pch} vector corresponds to the \code{which} argument for \code{method="response"}. } \item{superposeStrata}{ If \code{stratify} was used, set \code{superposeStrata=FALSE} to make separate dot charts for each level of the \code{stratification} variable, for \code{method='response'}. The default is to superposition all strata on one dot chart. } \item{dotfont}{font for plotting points} \item{reset.par}{set to \code{FALSE} to suppress the restoring of the old par values in \code{plot.summary.formula.response} } \item{abbreviate.dimnames}{see \code{print.char.matrix}} \item{prefix.width}{see \code{print.char.matrix}} \item{min.colwidth}{ minimum column width to use for boxes printed with \code{print.char.matrix}. The default is the maximum of the minimum column label length and the minimum length of entries in the data cells. } \item{formatArgs}{ a list containing other arguments to pass to \code{format.default} such as \code{scientific}, e.g., \code{formatArgs=list(scientific=c(-5,5))}. For \code{print.summary.formula.reverse} and \code{format.summary.formula.reverse}, \code{formatArgs} applies only to statistics computed on continuous variables, not to percents, numerators, and denominators. The \code{round} argument may be preferred. } \item{markdown}{for \code{print.summary.formula.response} set to \code{TRUE} to use \code{knitr::kable} to produce the table in markdown format rather than using raw text output created by \code{print.char.matrix}} \item{digits}{ number of significant digits to print. Default is to use the current value of the \code{digits} system option. } \item{prn}{ set to \code{TRUE} to print the number of non-missing observations on the current (row) variable. The default is to print these only if any of the counts of non-missing values differs from the total number of non-missing values of the left-hand-side variable. For \code{method="cross"} the default is to always print \code{N}. } \item{prnmiss}{ set to \code{FALSE} to suppress printing counts of missing values for \code{"cross"} } \item{what}{ for \code{method="reverse"} specifies whether proportions or percentages are to be plotted } \item{pctdig}{ number of digits to the right of the decimal place for printing percentages. The default is zero, so percents will be rounded to the nearest percent. } \item{npct}{ specifies which counts are to be printed to the right of percentages. The default is to print the frequency (numerator of the percent) in parentheses. You can specify \code{"both"} to print both numerator and denominator, \code{"denominator"}, \code{"slash"} to typeset horizontally using a forward slash, or \code{"none"}. } \item{npct.size}{ the size for typesetting \code{npct} information which appears after percents. The default is \code{"scriptsize"}. } \item{Nsize}{ When a second row of column headings is added showing sample sizes, \code{Nsize} specifies the LaTeX size for these subheadings. Default is \code{"scriptsize"}. } \item{exclude1}{ by default, \code{method="reverse"} objects will be printed, plotted, or typeset by removing redundant entries from percentage tables for categorical variables. For example, if you print the percent of females, you don't need to print the percent of males. To override this, set \code{exclude1=FALSE}. } \item{prUnits}{ set to \code{FALSE} to suppress printing or latexing \code{units} attributes of variables, when \code{method='reverse'} or \code{'response'} } \item{sep}{ character to use to separate quantiles when printing \code{method="reverse"} tables } \item{prtest}{ a vector of test statistic components to print if \code{test=TRUE} was in effect when \code{summary.formula} was called. Defaults to printing all components. Specify \code{prtest=FALSE} or \code{prtest="none"} to not print any tests. This applies to \code{print}, \code{latex}, and \code{plot} methods for \code{method='reverse'}. } \item{round}{ for \code{print.summary.formula.reverse} and \code{latex.summary.formula.reverse} specify \code{round} to round the quantiles and optional mean and standard deviation to \code{round} digits after the decimal point } \item{prmsd}{ set to \code{TRUE} to print mean and SD after the three quantiles, for continuous variables with \code{method="reverse"} } \item{msdsize}{ defaults to \code{NULL} to use the current font size for the mean and standard deviation if \code{prmsd} is \code{TRUE}. Set to a character string to specify an alternate LaTeX font size. } \item{long}{ set to \code{TRUE} to print the results for the first category on its own line, not on the same line with the variable label (for \code{method="reverse"} with \code{print} and \code{latex} methods) } \item{pdig}{ number of digits to the right of the decimal place for printing P-values. Default is \code{3}. This is passed to \code{format.pval}. } \item{eps}{ P-values less than \code{eps} will be printed as \code{< eps}. See \code{format.pval}. } \item{auxCol}{ an optional auxiliary column of information, right justified, to add in front of statistics typeset by \code{latex.summary.formula.reverse}. This argument is a list with a single element that has a name specifying the column heading. If this name includes a newline character, the portions of the string before and after the newline form respectively the main heading and the subheading (typically set in smaller font), respectively. See the \code{extracolheads} argument to \code{latex.default}. \code{auxCol} is filled with blanks when a variable being summarized takes up more than one row in the output. This happens with categorical variables. } \item{twoway}{ for \code{method="cross"} with two right hand side variables, \code{twoway} controls whether the resulting table will be printed in enumeration format or as a two-way table (the default) } \item{which}{ For \code{method="response"} specifies the sequential number or a vector of subscripts of statistics to plot. If you had any \code{stratify} variables, these are counted as if more statistics were computed. For \code{method="reverse"} specifies whether to plot results for categorical variables, continuous variables, or both (the default). } \item{conType}{ For plotting \code{method="reverse"} plots for continuous variables, dot plots showing quartiles are drawn by default. Specify \code{conType='bp'} to draw box-percentile plots using all the quantiles in \code{quant} except the outermost ones. Means are drawn with a solid dot and vertical reference lines are placed at the three quartiles. Specify \code{conType='raw'} to make a strip chart showing the raw data. This can only be used if the sample size for each left-hand-side group is less than or equal to \code{nmin}.} \item{cex.means}{ character size for means in box-percentile plots; default is .5} \item{xlim}{ vector of length two specifying x-axis limits. For \code{method="reverse"}, this is only used for plotting categorical variables. Limits for continuous variables are determined by the outer quantiles specified in \code{quant}. } \item{xlab}{ x-axis label } \item{add}{ set to \code{TRUE} to add to an existing plot } \item{main}{ a main title. For \code{method="reverse"} this applies only to the plot for categorical variables. } \item{subtitles}{ set to \code{FALSE} to suppress automatic subtitles } \item{caption}{ character string containing LaTeX table captions. } \item{title}{ name of resulting LaTeX file omitting the \code{.tex} suffix. Default is the name of the \code{summary} object. If \code{caption} is specied, \code{title} is also used for the table's symbolic reference label. } \item{trios}{ If for \code{method="response"} you summarized the response(s) by using three quantiles, specify \code{trios=TRUE} or \code{trios=v} to group each set of three statistics into one column for \code{latex} output, using the format a B c, where the outer quantiles are in smaller font (\code{scriptsize}). For \code{trios=TRUE}, the overall column names are taken from the column names of the original data matrix. To give new column names, specify \code{trios=v}, where \code{v} is a vector of column names, of length \code{m/3}, where \code{m} is the original number of columns of summary statistics. } \item{rowlabel}{ see \code{latex.default} (under the help file \code{latex}) } \item{cdec}{ number of decimal places to the right of the decimal point for \code{latex}. This value should be a scalar (which will be properly replicated), or a vector with length equal to the number of columns in the table. For \code{"response"} tables, this length does not count the column for \code{N}. } \item{ncaption}{ set to \code{FALSE} to not have \code{latex.summary.formula.response} put sample sizes in captions } \item{i}{ a vector of integers, or character strings containing variable names to subset on. Note that each row subsetted on in an \code{summary.formula.reverse} object subsets on all the levels that make up the corresponding variable (automatically). } \item{j}{ a vector of integers representing column numbers } \item{middle.bold}{ set to \code{TRUE} to have LaTeX use bold face for the middle quantile for \code{method="reverse"} } \item{outer.size}{ the font size for outer quantiles for \code{"reverse"} tables } \item{insert.bottom}{ set to \code{FALSE} to suppress inclusion of definitions placed at the bottom of LaTeX tables for \code{method="reverse"} } \item{dcolumn}{ see \code{latex} } \item{na.group}{ set to \code{TRUE} to have missing stratification variables given their own category (\code{NA}) } \item{shortlabel}{ set to \code{FALSE} to include stratification variable names and equal signs in labels for strata levels } \item{dotchart}{ set to \code{TRUE} to output a dotchart in the latex table being generated. } \item{group}{for \code{conTest} and \code{ordTest}, a numeric or factor variable with length the same as \code{x} } \item{tab}{for \code{catTest}, a frequency table such as that created by \code{table()} } } \value{ \code{summary.formula} returns a data frame or list depending on \code{method}. \code{plot.summary.formula.reverse} returns the number of pages of plots that were made. } \section{Side Effects}{ \code{plot.summary.formula.reverse} creates a function \code{Key} and \code{Key2} in frame 0 that will draw legends. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \references{ Harrell FE (2007): Statistical tables and plots using S and LaTeX. Document available from \url{https://hbiostat.org/R/Hmisc/summary.pdf}. } \seealso{ \code{\link{mChoice}}, \code{\link{smean.sd}}, \code{\link{summarize}}, \code{\link{label}}, \code{\link[survival]{strata}}, \code{\link{dotchart2}}, \code{\link{print.char.matrix}}, \code{\link{update}}, \code{\link{formula}}, \code{\link{cut2}}, \code{\link{llist}}, \code{\link{format.default}}, \code{\link{latex}}, \code{\link{latexTranslate}} \code{\link{bpplt}}, \code{\link{summaryM}}, \code{\link{summary}} } \examples{ options(digits=3) set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) age <- rnorm(500, 50, 5) treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') table(Symptoms) # Note: In this example, some subjects have the same symptom checked # multiple times; in practice these redundant selections would be NAs # mChoice will ignore these redundant selections #Frequency table sex*treatment, sex*Symptoms summary(sex ~ treatment + Symptoms, fun=table) # could also do summary(sex ~ treatment + # mChoice(symptom1,symptom2,symptom3), fun=table) #Compute mean age, separately by 3 variables summary(age ~ sex + treatment + Symptoms) f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE) f # trio of numbers represent 25th, 50th, 75th percentile print(f, long=TRUE) plot(f) plot(f, conType='bp', prtest='P') bpplt() # annotated example showing layout of bp plot #Compute predicted probability from a logistic regression model #For different stratifications compute receiver operating #characteristic curve areas (C-indexes) predicted <- plogis(.4*(sex=="m")+.15*(age-50)) positive.diagnosis <- ifelse(runif(500)<=predicted, 1, 0) roc <- function(z) { x <- z[,1]; y <- z[,2]; n <- length(x); if(n<2)return(c(ROC=NA)); n1 <- sum(y==1); c(ROC= (mean(rank(x)[y==1])-(n1+1)/2)/(n-n1) ); } y <- cbind(predicted, positive.diagnosis) options(digits=2) summary(y ~ age + sex, fun=roc) options(digits=3) summary(y ~ age + sex, fun=roc, method="cross") #Use stratify() to produce a table in which time intervals go down the #page and going across 3 continuous variables are summarized using #quartiles, and are stratified by two treatments set.seed(1) d <- expand.grid(visit=1:5, treat=c('A','B'), reps=1:100) d$sysbp <- rnorm(100*5*2, 120, 10) label(d$sysbp) <- 'Systolic BP' d$diasbp <- rnorm(100*5*2, 80, 7) d$diasbp[1] <- NA d$age <- rnorm(100*5*2, 50, 12) g <- function(y) { N <- apply(y, 2, function(w) sum(!is.na(w))) h <- function(x) { qu <- quantile(x, c(.25,.5,.75), na.rm=TRUE) names(qu) <- c('Q1','Q2','Q3') c(N=sum(!is.na(x)), qu) } w <- as.vector(apply(y, 2, h)) names(w) <- as.vector( outer(c('N','Q1','Q2','Q3'), dimnames(y)[[2]], function(x,y) paste(y,x))) w } #Use na.rm=FALSE to count NAs separately by column s <- summary(cbind(age,sysbp,diasbp) ~ visit + stratify(treat), na.rm=FALSE, fun=g, data=d) #The result is very wide. Re-do, putting treatment vertically x <- with(d, factor(paste('Visit', visit, treat))) summary(cbind(age,sysbp,diasbp) ~ x, na.rm=FALSE, fun=g, data=d) #Compose LaTeX code directly g <- function(y) { h <- function(x) { qu <- format(round(quantile(x, c(.25,.5,.75), na.rm=TRUE),1),nsmall=1) paste('{\\\\scriptsize(',sum(!is.na(x)), ')} \\\\hfill{\\\\scriptsize ', qu[1], '} \\\\textbf{', qu[2], '} {\\\\scriptsize ', qu[3],'}', sep='') } apply(y, 2, h) } s <- summary(cbind(age,sysbp,diasbp) ~ visit + stratify(treat), na.rm=FALSE, fun=g, data=d) # latex(s, prn=FALSE) ## need option in latex to not print n #Put treatment vertically s <- summary(cbind(age,sysbp,diasbp) ~ x, fun=g, data=d, na.rm=FALSE) # latex(s, prn=FALSE) #Plot estimated mean life length (assuming an exponential distribution) #separately by levels of 4 other variables. Repeat the analysis #by levels of a stratification variable, drug. Automatically break #continuous variables into tertiles. #We are using the default, method='response' \dontrun{ life.expect <- function(y) c(Years=sum(y[,1])/sum(y[,2])) attach(pbc) S <- Surv(follow.up.time, death) s2 <- summary(S ~ age + albumin + ascites + edema + stratify(drug), fun=life.expect, g=3) #Note: You can summarize other response variables using the same #independent variables using e.g. update(s2, response~.), or you #can change the list of independent variables using e.g. #update(s2, response ~.- ascites) or update(s2, .~.-ascites) #You can also print, typeset, or plot subsets of s2, e.g. #plot(s2[c('age','albumin'),]) or plot(s2[1:2,]) s2 # invokes print.summary.formula.response #Plot results as a separate dot chart for each of the 3 strata levels par(mfrow=c(2,2)) plot(s2, cex.labels=.6, xlim=c(0,40), superposeStrata=FALSE) #Typeset table, creating s2.tex w <- latex(s2, cdec=1) #Typeset table but just print LaTeX code latex(s2, file="") # useful for Sweave #Take control of groups used for age. Compute 3 quartiles for #both cholesterol and bilirubin (excluding observations that are missing #on EITHER ONE) age.groups <- cut2(age, c(45,60)) g <- function(y) apply(y, 2, quantile, c(.25,.5,.75)) y <- cbind(Chol=chol,Bili=bili) label(y) <- 'Cholesterol and Bilirubin' #You can give new column names that are not legal S names #by enclosing them in quotes, e.g. 'Chol (mg/dl)'=chol s <- summary(y ~ age.groups + ascites, fun=g) par(mfrow=c(1,2), oma=c(3,0,3,0)) # allow outer margins for overall for(ivar in 1:2) { # title isub <- (1:3)+(ivar-1)*3 # *3=number of quantiles/var. plot(s3, which=isub, main='', xlab=c('Cholesterol','Bilirubin')[ivar], pch=c(91,16,93)) # [, closed circle, ] } mtext(paste('Quartiles of', label(y)), adj=.5, outer=TRUE, cex=1.75) #Overall (outer) title prlatex(latex(s3, trios=TRUE)) # trios -> collapse 3 quartiles #Summarize only bilirubin, but do it with two statistics: #the mean and the median. Make separate tables for the two randomized #groups and make plots for the active arm. g <- function(y) c(Mean=mean(y), Median=median(y)) for(sub in c("D-penicillamine", "placebo")) { ss <- summary(bili ~ age.groups + ascites + chol, fun=g, subset=drug==sub) cat('\n',sub,'\n\n') print(ss) if(sub=='D-penicillamine') { par(mfrow=c(1,1)) plot(s4, which=1:2, dotfont=c(1,-1), subtitles=FALSE, main='') #1=mean, 2=median -1 font = open circle title(sub='Closed circle: mean; Open circle: median', adj=0) title(sub=sub, adj=1) } w <- latex(ss, append=TRUE, fi='my.tex', label=if(sub=='placebo') 's4b' else 's4a', caption=paste(label(bili),' {\\\\em (',sub,')}', sep='')) #Note symbolic labels for tables for two subsets: s4a, s4b prlatex(w) } #Now consider examples in 'reverse' format, where the lone dependent #variable tells the summary function how to stratify all the #'independent' variables. This is typically used to make tables #comparing baseline variables by treatment group, for example. s5 <- summary(drug ~ bili + albumin + stage + protime + sex + age + spiders, method='reverse') #To summarize all variables, use summary(drug ~., data=pbc) #To summarize all variables with no stratification, use #summary(~a+b+c) or summary(~.,data=\dots) options(digits=1) print(s5, npct='both') #npct='both' : print both numerators and denominators plot(s5, which='categorical') Key(locator(1)) # draw legend at mouse click par(oma=c(3,0,0,0)) # leave outer margin at bottom plot(s5, which='continuous') Key2() # draw legend at lower left corner of plot # oma= above makes this default key fit the page better options(digits=3) w <- latex(s5, npct='both', here=TRUE) # creates s5.tex #Turn to a different dataset and do cross-classifications on possibly #more than one independent variable. The summary function with #method='cross' produces a data frame containing the cross- #classifications. This data frame is suitable for multi-panel #trellis displays, although `summarize' works better for that. attach(prostate) size.quartile <- cut2(sz, g=4) bone <- factor(bm,labels=c("no mets","bone mets")) s7 <- summary(ap>1 ~ size.quartile + bone, method='cross') #In this case, quartiles are the default so could have said sz + bone options(digits=3) print(s7, twoway=FALSE) s7 # same as print(s7) w <- latex(s7, here=TRUE) # Make s7.tex library(trellis,TRUE) invisible(ps.options(reset=TRUE)) trellis.device(postscript, file='demo2.ps') dotplot(S ~ size.quartile|bone, data=s7, #s7 is name of summary stats xlab="Fraction ap>1", ylab="Quartile of Tumor Size") #Can do this more quickly with summarize: # s7 <- summarize(ap>1, llist(size=cut2(sz, g=4), bone), mean, # stat.name='Proportion') # dotplot(Proportion ~ size | bone, data=s7) summary(age ~ stage, method='cross') summary(age ~ stage, fun=quantile, method='cross') summary(age ~ stage, fun=smean.sd, method='cross') summary(age ~ stage, fun=smedian.hilow, method='cross') summary(age ~ stage, fun=function(x) c(Mean=mean(x), Median=median(x)), method='cross') #The next statements print real two-way tables summary(cbind(age,ap) ~ stage + bone, fun=function(y) apply(y, 2, quantile, c(.25,.75)), method='cross') options(digits=2) summary(log(ap) ~ sz + bone, fun=function(y) c(Mean=mean(y), quantile(y)), method='cross') #Summarize an ordered categorical response by all of the needed #cumulative proportions summary(cumcategory(disease.severity) ~ age + sex) } } \keyword{category} \keyword{interface} \keyword{hplot} \keyword{manip} \concept{grouping} \concept{stratification} \concept{aggregation} \concept{cross-classification} Hmisc/man/samplesize.bin.Rd0000644000176200001440000000274612243661443015320 0ustar liggesusers\name{samplesize.bin} \alias{samplesize.bin} \title{ Sample Size for 2-sample Binomial } \description{ Computes sample size(s) for 2-sample binomial problem given vector or scalar probabilities in the two groups. } \usage{ samplesize.bin(alpha, beta, pit, pic, rho=0.5) } \arguments{ \item{alpha}{ scalar ONE-SIDED test size, or two-sided size/2 } \item{beta}{ scalar or vector of powers } \item{pit}{ hypothesized treatment probability of success } \item{pic}{ hypothesized control probability of success } \item{rho}{ proportion of the sample devoted to treated group (\eqn{0 <\code{rho} < 1}) } } \value{ TOTAL sample size(s) } \section{AUTHOR}{ Rick Chappell\cr Dept. of Statistics and Human Oncology\cr University of Wisconsin at Madison\cr \email{chappell@stat.wisc.edu} } \examples{ alpha <- .05 beta <- c(.70,.80,.90,.95) # N1 is a matrix of total sample sizes whose # rows vary by hypothesized treatment success probability and # columns vary by power # See Meinert's book for formulae. N1 <- samplesize.bin(alpha, beta, pit=.55, pic=.5) N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.60, pic=.5)) N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.65, pic=.5)) N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.70, pic=.5)) attr(N1,"dimnames") <- NULL #Accounting for 5\% noncompliance in the treated group inflation <- (1/.95)**2 print(round(N1*inflation+.5,0)) } \keyword{htest} \keyword{category} \concept{study design} \concept{power} Hmisc/man/sasxport.get.Rd0000644000176200001440000001707613714231637015042 0ustar liggesusers\name{sasxport.get} \alias{sasxport.get} \alias{sasdsLabels} \title{Enhanced Importing of SAS Transport Files using read.xport} \description{ Uses the \code{read.xport} and \code{lookup.xport} functions in the \code{foreign} library to import SAS datasets. SAS date, time, and date/time variables are converted respectively to \code{Date}, POSIX, or \code{POSIXct} objects in \R, variable names are converted to lower case, SAS labels are associated with variables, and (by default) integer-valued variables are converted from storage mode \code{double} to \code{integer}. If the user ran \code{PROC FORMAT CNTLOUT=} in SAS and included the resulting dataset in the SAS version 5 transport file, variables having customized formats that do not include any ranges (i.e., variables having standard \code{PROC FORMAT; VALUE} label formats) will have their format labels looked up, and these variables are converted to S \code{factor}s. For those users having access to SAS, \code{method='csv'} is preferred when importing several SAS datasets. Run SAS macro \code{exportlib.sas} available from \url{https://github.com/harrelfe/Hmisc/blob/master/src/sas/exportlib.sas} to convert all SAS datasets in a SAS data library (from any engine supported by your system) into \code{CSV} files. If any customized formats are used, it is assumed that the \code{PROC FORMAT CNTLOUT=} dataset is in the data library as a regular SAS dataset, as above. \code{SASdsLabels} reads a file containing \code{PROC CONTENTS} printed output to parse dataset labels, assuming that \code{PROC CONTENTS} was run on an entire library. } \usage{ sasxport.get(file, lowernames=TRUE, force.single = TRUE, method=c('read.xport','dataload','csv'), formats=NULL, allow=NULL, out=NULL, keep=NULL, drop=NULL, as.is=0.5, FUN=NULL) sasdsLabels(file) } \arguments{ \item{file}{name of a file containing the SAS transport file. \code{file} may be a URL beginning with \code{https://}. For \code{sasdsLabels}, \code{file} is the name of a file containing a \code{PROC CONTENTS} output listing. For \code{method='csv'}, \code{file} is the name of the directory containing all the \code{CSV} files created by running the \code{exportlib} SAS macro. } \item{lowernames}{set to \code{FALSE} to keep from converting SAS variable names to lower case} \item{force.single}{set to \code{FALSE} to keep integer-valued variables not exceeding \eqn{2^31-1} in value from being converted to \code{integer} storage mode} \item{method}{set to \code{"dataload"} if you have the \code{dataload} executable installed and want to use it instead of \code{read.xport}. This seems to correct some errors in which rarely some factor variables are always missing when read by \code{read.xport} when in fact they have some non-missing values.} \item{formats}{a data frame or list (like that created by \code{read.xport}) containing \code{PROC FORMAT} output, if such output is not stored in the main transport file.} \item{allow}{a vector of characters allowed by \R that should not be converted to periods in variable names. By default, underscores in variable names are converted to periods as with \R before version 1.9.} \item{out}{a character string specifying a directory in which to write separate \R \code{save} files (\code{.rda} files) for each regular dataset. Each file and the data frame inside it is named with the SAS dataset name translated to lower case and with underscores changed to periods. The default \code{NULL} value of \code{out} results in a data frame or a list of data frames being returned. When \code{out} is given, \code{sasxport.get} returns only metadata (see below), invisibly. \code{out} only works with \code{methods='csv'}. \code{out} should not have a trailing slash.} \item{keep}{a vector of names of SAS datasets to process (original SAS upper case names). Must include \code{PROC FORMAT} dataset if it exists, and if the kept datasets use any of its value label formats.} \item{drop}{a vector of names of SAS datasets to ignore (original SAS upper case names)} \item{as.is}{ SAS character variables are converted to S factor objects if \code{as.is=FALSE} or if \code{as.is} is a number between 0 and 1 inclusive and the number of unique values of the variable is less than the number of observations (\code{n}) times \code{as.is}. The default if \code{as.is} is .5, so character variables are converted to factors only if they have fewer than \code{n/2} unique values. The primary purpose of this is to keep unique identification variables as character values in the data frame instead of using more space to store both the integer factor codes and the factor labels. } \item{FUN}{an optional function that will be run on each data frame created, when \code{method='csv'} and \code{out} are specified. The result of all the \code{FUN} calls is made into a list corresponding to the SAS datasets that are read. This list is the \code{FUN} attribute of the result returned by \code{sasxport.get}. } } \value{ If there is more than one dataset in the transport file other than the \code{PROC FORMAT} file, the result is a list of data frames containing all the non-\code{PROC FORMAT} datasets. Otherwise the result is the single data frame. There is an exception if \code{out} is specified; that causes separate \R \code{save} files to be written and the returned value to be a list corresponding to the SAS datasets, with key \code{PROC CONTENTS} information in a data frame making up each part of the list. \code{sasdsLabels} returns a named vector of dataset labels, with names equal to the dataset names. } \details{See \code{\link{contents.list}} for a way to print the directory of SAS datasets when more than one was imported.} \author{Frank E Harrell Jr} \seealso{\code{\link[foreign]{read.xport}},\code{\link{label}},\code{\link{sas.get}}, \code{\link{Dates}},\code{\link{DateTimeClasses}}, \code{\link[foreign]{lookup.xport}},\code{\link{contents}},\code{\link{describe}}} \examples{ \dontrun{ # SAS code to generate test dataset: # libname y SASV5XPT "test2.xpt"; # # PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; # PROC FORMAT CNTLOUT=format;RUN; * Name, e.g. 'format', unimportant; # data test; # LENGTH race 3 age 4; # age=30; label age="Age at Beginning of Study"; # race=2; # d1='3mar2002'd ; # dt1='3mar2002 9:31:02'dt; # t1='11:13:45't; # output; # # age=31; # race=4; # d1='3jun2002'd ; # dt1='3jun2002 9:42:07'dt; # t1='11:14:13't; # output; # format d1 mmddyy10. dt1 datetime. t1 time. race race.; # run; # data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8; # DO i=1 TO 100; # x3=ranuni(3); # x4=ranuni(5); # x5=ranuni(7); # x6=ranuni(9); # x7=ranuni(11); # x8=ranuni(13); # output; # END; # DROP i; # RUN; # PROC MEANS; RUN; # PROC COPY IN=work OUT=y;SELECT test format z;RUN; *Creates test2.xpt; w <- sasxport.get('test2.xpt') # To use an existing copy of test2.xpt available on the web: w <- sasxport.get('https://github.com/harrelfe/Hmisc/raw/master/inst/tests/test2.xpt') describe(w$test) # see labels, format names for dataset test # Note: if only one dataset (other than format) had been exported, # just do describe(w) as sasxport.get would not create a list for that lapply(w, describe)# see descriptive stats for both datasets contents(w$test) # another way to see variable attributes lapply(w, contents)# show contents of both datasets options(digits=7) # compare the following matrix with PROC MEANS output t(sapply(w$z, function(x) c(Mean=mean(x),SD=sqrt(var(x)),Min=min(x),Max=max(x)))) } } \keyword{interface} \keyword{manip} Hmisc/man/yearDays.Rd0000644000176200001440000000066313215524054014146 0ustar liggesusers\name{yearDays} \alias{yearDays} \alias{monthDays} \title{ Get Number of Days in Year or Month } \description{Returns the number of days in a specific year or month.} \usage{ yearDays(time) monthDays(time) } \arguments{ \item{time}{ A POSIXt or Date object describing the month or year in question. } } \author{ Charles Dupont } \seealso{ \code{\link{POSIXt}}, \code{\link{Date}} } \keyword{ utilities } \keyword{ chron } Hmisc/man/summaryS.Rd0000644000176200001440000003564413631256116014217 0ustar liggesusers\name{summaryS} \alias{summaryS} \alias{plot.summaryS} \alias{plotp.summaryS} \alias{mbarclPanel} \alias{medvPanel} \alias{mbarclpl} \alias{medvpl} \title{Summarize Multiple Response Variables and Make Multipanel Scatter or Dot Plot} \description{ Multiple left-hand formula variables along with right-hand side conditioning variables are reshaped into a "tall and thin" data frame if \code{fun} is not specified. The resulting raw data can be plotted with the \code{plot} method using user-specified \code{panel} functions for \code{lattice} graphics, typically to make a scatterplot or \code{loess} smooths, or both. The \code{Hmisc} \code{panel.plsmo} function is handy in this context. Instead, if \code{fun} is specified, this function takes individual response variables (which may be matrices, as in \code{\link[survival]{Surv}} objects) and creates one or more summary statistics that will be computed while the resulting data frame is being collapsed to one row per condition. The \code{plot} method in this case plots a multi-panel dot chart using the \code{lattice} \code{\link[lattice]{dotplot}} function if \code{panel} is not specified to \code{plot}. There is an option to print selected statistics as text on the panels. \code{summaryS} pays special attention to \code{Hmisc} variable annotations: \code{label, units}. When \code{panel} is specified in addition to \code{fun}, a special \code{x-y} plot is made that assumes that the \code{x}-axis variable (typically time) is discrete. This is used for example to plot multiple quantile intervals as vertical lines next to the main point. A special panel function \code{mvarclPanel} is provided for this purpose. The \code{plotp} method produces corresponding \code{plotly} graphics. When \code{fun} is given and \code{panel} is omitted, and the result of \code{fun} is a vector of more than one statistic, the first statistic is taken as the main one. Any columns with names not in \code{textonly} will figure into the calculation of axis limits. Those in \code{textonly} will be printed right under the dot lines in the dot chart. Statistics with names in \code{textplot} will figure into limits, be plotted, and printed. \code{pch.stats} can be used to specify symbols for statistics after the first column. When \code{fun} computed three columns that are plotted, columns two and three are taken as confidence limits for which horizontal "error bars" are drawn. Two levels with different thicknesses are drawn if there are four plotted summary statistics beyond the first. \code{mbarclPanel} is used to draw multiple vertical lines around the main points, such as a series of quantile intervals stratified by \code{x} and paneling variables. If \code{mbarclPanel} finds a column of an arument \code{yother} that is named \code{"se"}, and if there are exactly two levels to a superpositioning variable, the half-height of the approximate 0.95 confidence interval for the difference between two point estimates is shown, positioned at the midpoint of the two point estimates at an \code{x} value. This assume normality of point estimates, and the standard error of the difference is the square root of the sum of squares of the two standard errors. By positioning the intervals in this fashion, a failure of the two point estimates to touch the half-confidence interval is consistent with rejecting the null hypothesis of no difference at the 0.05 level. \code{mbarclpl} is the \code{sfun} function corresponding to \code{mbarclPanel} for \code{plotp}, and \code{medvpl} is the \code{sfun} replacement for \code{medvPanel}. \code{medvPanel} takes raw data and plots median \code{y} vs. \code{x}, along with confidence intervals and half-interval for the difference in medians as with \code{mbarclPanel}. Quantile intervals are optional. Very transparent vertical violin plots are added by default. Unlike \code{panel.violin}, only half of the violin is plotted, and when there are two superpose groups they are side-by-side in different colors. For \code{plotp}, the function corresponding to \code{medvPanel} is \code{medvpl}, which draws back-to-back spike histograms, optional Gini mean difference, optional SD, quantiles (thin line version of box plot with 0.05 0.25 0.5 0.75 0.95 quantiles), and half-width confidence interval for differences in medians. For quantiles, the Harrell-Davis estimator is used. } \usage{ summaryS(formula, fun = NULL, data = NULL, subset = NULL, na.action = na.retain, continuous=10, \dots) \method{plot}{summaryS}(x, formula=NULL, groups=NULL, panel=NULL, paneldoesgroups=FALSE, datadensity=NULL, ylab='', funlabel=NULL, textonly='n', textplot=NULL, digits=3, custom=NULL, xlim=NULL, ylim=NULL, cex.strip=1, cex.values=0.5, pch.stats=NULL, key=list(columns=length(groupslevels), x=.75, y=-.04, cex=.9, col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)), outerlabels=TRUE, autoarrange=TRUE, scat1d.opts=NULL, \dots) \method{plotp}{summaryS}(data, formula=NULL, groups=NULL, sfun=NULL, fitter=NULL, showpts=! length(fitter), funlabel=NULL, digits=5, xlim=NULL, ylim=NULL, shareX=TRUE, shareY=FALSE, autoarrange=TRUE, \dots) mbarclPanel(x, y, subscripts, groups=NULL, yother, \dots) medvPanel(x, y, subscripts, groups=NULL, violin=TRUE, quantiles=FALSE, \dots) mbarclpl(x, y, groups=NULL, yother, yvar=NULL, maintracename='y', xlim=NULL, ylim=NULL, xname='x', alphaSegments=0.45, \dots) medvpl(x, y, groups=NULL, yvar=NULL, maintracename='y', xlim=NULL, ylim=NULL, xlab=xname, ylab=NULL, xname='x', zeroline=FALSE, yother=NULL, alphaSegments=0.45, dhistboxp.opts=NULL, \dots) } \arguments{ \item{formula}{a formula with possibly multiple left and right-side variables separated by \code{+}. Analysis (response) variables are on the left and are typically numeric. For \code{plot}, \code{formula} is optional and overrides the default formula inferred for the reshaped data frame.} \item{fun}{an optional summarization function, e.g., \code{\link{smean.sd}}} \item{data}{optional input data frame. For \code{plotp} is the object produced by \code{summaryS}.} \item{subset}{optional subsetting criteria} \item{na.action}{function for dealing with \code{NA}s when constructing the model data frame} \item{continuous}{minimum number of unique values for a numeric variable to have to be considered continuous} \item{\dots}{ignored for \code{summaryS} and \code{mbarclPanel}, passed to \code{strip} and \code{panel} for \code{plot}. Passed to the \code{\link{density}} function by \code{medvPanel}. For \code{plotp}, are passed to \code{plotlyM} and \code{sfun}. For \code{mbarclpl}, passed to \code{plotlyM}.} \item{x}{an object created by \code{summaryS}. For \code{mbarclPanel} is an \code{x}-axis argument provided by \code{lattice}} \item{groups}{a character string or factor specifying that one of the conditioning variables is used for superpositioning and not paneling} \item{panel}{optional \code{lattice} \code{panel} function} \item{paneldoesgroups}{set to \code{TRUE} if, like \code{\link{panel.plsmo}}, the paneling function internally handles superpositioning for \code{groups}} \item{datadensity}{set to \code{TRUE} to add rug plots etc. using \code{\link{scat1d}}} \item{ylab}{optional \code{y}-axis label} \item{funlabel}{optional axis label for when \code{fun} is given} \item{textonly}{names of statistics to print and not plot. By default, any statistic named \code{"n"} is only printed.} \item{textplot}{names of statistics to print and plot} \item{digits}{used if any statistics are printed as text (including \code{plotly} hovertext), to specify the number of significant digits to render} \item{custom}{a function that customizes formatting of statistics that are printed as text. This is useful for generating plotmath notation. See the example in the tests directory.} \item{xlim}{optional \code{x}-axis limits} \item{ylim}{optional \code{y}-axis limits} \item{cex.strip}{size of strip labels} \item{cex.values}{size of statistics printed as text} \item{pch.stats}{symbols to use for statistics (not included the one one in columne one) that are plotted. This is a named vectors, with names exactly matching those created by \code{fun}. When a column does not have an entry in \code{pch.stats}, no point is drawn for that column.} \item{key}{\code{lattice} \code{key} specification} \item{outerlabels}{set to \code{FALSE} to not pass two-way charts through \code{\link[latticeExtra]{useOuterStrips}}} \item{autoarrange}{set to \code{FALSE} to prevent \code{plot} from trying to optimize which conditioning variable is vertical} \item{scat1d.opts}{a list of options to specify to \code{\link{scat1d}}} \item{y, subscripts}{provided by \code{lattice}} \item{yother}{passed to the panel function from the \code{plot} method based on multiple statistics computed} \item{violin}{controls whether violin plots are included} \item{quantiles}{controls whether quantile intervals are included} \item{sfun}{a function called by \code{plotp.summaryS} to compute and plot user-specified summary measures. Two functions for doing this are provided here: \code{mbarclpl, medvpl}.} \item{fitter}{a fitting function such as \code{loess} to smooth points. The smoothed values over a systematic grid will be evaluated and plotted as curves.} \item{showpts}{set to \code{TRUE} to show raw data points in additon to smoothed curves} \item{shareX}{\code{TRUE} to cause \code{plotly} to share a single x-axis when graphs are aligned vertically} \item{shareY}{\code{TRUE} to cause \code{plotly} to share a single y-axis when graphs are aligned horizontally} \item{yvar}{a character or factor variable used to stratify the analysis into multiple y-variables} \item{maintracename}{a default trace name when it can't be inferred} \item{xname}{x-axis variable name for hover text when it can't be inferred} \item{xlab}{x-axis label when it can't be inferred} \item{alphaSegments}{alpha saturation to draw line segments for \code{plotly}} \item{dhistboxp.opts}{\code{list} of options to pass to \code{dhistboxp}} \item{zeroline}{set to \code{FALSE} to suppress \code{plotly} zero line at x=0} } \value{a data frame with added attributes for \code{summaryS} or a \code{lattice} object ready to render for \code{plot}} \author{Frank Harrell} \seealso{\code{\link{summary}}, \code{\link{summarize}}} \examples{ # See tests directory file summaryS.r for more examples, and summarySp.r # for plotp examples n <- 100 set.seed(1) d <- data.frame(sbp=rnorm(n, 120, 10), dbp=rnorm(n, 80, 10), age=rnorm(n, 50, 10), days=sample(1:n, n, TRUE), S1=Surv(2*runif(n)), S2=Surv(runif(n)), race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex=sample(c('Female', 'Male'), n, TRUE), treat=sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE), meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE)) d <- upData(d, labels=c(sbp='Systolic BP', dbp='Diastolic BP', race='Race', sex='Sex', treat='Treatment', days='Time Since Randomization', S1='Hospitalization', S2='Re-Operation', meda='Medication A', medb='Medication B'), units=c(sbp='mmHg', dbp='mmHg', age='Year', days='Days')) s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d) # plot(s) # 3 pages plot(s, groups='treat', datadensity=TRUE, scat1d.opts=list(lwd=.5, nhistSpike=0)) plot(s, groups='treat', panel=panel.loess, key=list(space='bottom', columns=2), datadensity=TRUE, scat1d.opts=list(lwd=.5)) # To make a plotly graph when the stratification variable region is not # present, run the following (showpts adds raw data points): # plotp(s, groups='treat', fitter=loess, showpts=TRUE) # Make your own plot using data frame created by summaryP # xyplot(y ~ days | yvar * region, groups=treat, data=s, # scales=list(y='free', rot=0)) # Use loess to estimate the probability of two different types of events as # a function of time s <- summaryS(meda + medb ~ days + treat + region, data=d) pan <- function(...) panel.plsmo(..., type='l', label.curves=max(which.packet()) == 1, datadensity=TRUE) plot(s, groups='treat', panel=pan, paneldoesgroups=TRUE, scat1d.opts=list(lwd=.7), cex.strip=.8) # Repeat using intervals instead of nonparametric smoother pan <- function(...) # really need mobs > 96 to est. proportion panel.plsmo(..., type='l', label.curves=max(which.packet()) == 1, method='intervals', mobs=5) plot(s, groups='treat', panel=pan, paneldoesgroups=TRUE, xlim=c(0, 150)) # Demonstrate dot charts of summary statistics s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean) plot(s) plot(s, groups='treat', funlabel=expression(bar(X))) # Compute parametric confidence limits for mean, and include sample # sizes by naming a column "n" f <- function(x) { x <- x[! is.na(x)] c(smean.cl.normal(x, na.rm=FALSE), n=length(x)) } s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f) plot(s, funlabel=expression(bar(X) \%+-\% t[0.975] \%*\% s)) plot(s, groups='treat', cex.values=.65, key=list(space='bottom', columns=2, text=c('Treatment A:','Treatment B:'))) # For discrete time, plot Harrell-Davis quantiles of y variables across # time using different line characteristics to distinguish quantiles d <- upData(d, days=round(days / 30) * 30) g <- function(y) { probs <- c(0.05, 0.125, 0.25, 0.375) probs <- sort(c(probs, 1 - probs)) y <- y[! is.na(y)] w <- hdquantile(y, probs) m <- hdquantile(y, 0.5, se=TRUE) se <- as.numeric(attr(m, 'se')) c(Median=as.numeric(m), w, se=se, n=length(y)) } s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d) plot(s, panel=mbarclPanel) plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) # For discrete time, plot median y vs x along with CL for difference, # using Harrell-Davis median estimator and its s.e., and use violin # plots s <- summaryS(sbp + dbp ~ days + region, data=d) plot(s, groups='region', panel=medvPanel, paneldoesgroups=TRUE) # Proportions and Wilson confidence limits, plus approx. Gaussian # based half/width confidence limits for difference in probabilities g <- function(y) { y <- y[!is.na(y)] n <- length(y) p <- mean(y) se <- sqrt(p * (1. - p) / n) structure(c(binconf(sum(y), n), se=se, n=n), names=c('Proportion', 'Lower', 'Upper', 'se', 'n')) } s <- summaryS(meda + medb ~ days + region, fun=g, data=d) plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) } \keyword{category} \keyword{hplot} \keyword{manip} \keyword{grouping} \concept{stratification} \concept{aggregation} Hmisc/man/hist.data.frame.Rd0000644000176200001440000000325312243661443015337 0ustar liggesusers\name{hist.data.frame} \alias{hist.data.frame} \title{Histograms for Variables in a Data Frame} \description{ This functions tries to compute the maximum number of histograms that will fit on one page, then it draws a matrix of histograms. If there are more qualifying variables than will fit on a page, the function waits for a mouse click before drawing the next page. } \usage{ \method{hist}{data.frame}(x, n.unique = 3, nclass = "compute", na.big = FALSE, rugs = FALSE, freq=TRUE, mtitl = FALSE, ...) } \arguments{ \item{x}{a data frame} \item{n.unique}{minimum number of unique values a variable must have before a histogram is drawn} \item{nclass}{number of bins. Default is max(2,trunc(min(n/10,25*log(n,10))/2)), where n is the number of non-missing values for a variable.} \item{na.big}{set to \code{TRUE} to draw the number of missing values on the top of the histogram in addition to in a subtitle. In the subtitle, n is the number of non-missing values and m is the number of missing values} \item{rugs}{set to \code{TRUE} to add rug plots at the top of each histogram} \item{freq}{see \code{\link{hist}}. Default is to show frequencies.} \item{mtitl}{set to a character string to set aside extra outside top margin and to use the string for an overall title} \item{\dots}{arguments passed to \code{scat1d}} } \value{the number of pages drawn} \author{Frank E Harrell Jr} \seealso{\code{\link{hist}}, \code{\link{scat1d}}} \examples{ d <- data.frame(a=runif(200), b=rnorm(200), w=factor(sample(c('green','red','blue'), 200, TRUE))) hist.data.frame(d) # in R, just say hist(d) } \keyword{hplot} \keyword{dplot} \keyword{distribution} Hmisc/man/abs.error.pred.Rd0000644000176200001440000000534214275452342015221 0ustar liggesusers\name{abs.error.pred} \alias{abs.error.pred} \alias{print.abs.error.pred} \title{ Indexes of Absolute Prediction Error for Linear Models } \description{ Computes the mean and median of various absolute errors related to ordinary multiple regression models. The mean and median absolute errors correspond to the mean square due to regression, error, and total. The absolute errors computed are derived from \eqn{\hat{Y} - \mbox{median($\hat{Y}$)}}{Yhat - median(Yhat)}, \eqn{\hat{Y} - Y}{Yhat - Y}, and \eqn{Y - \mbox{median($Y$)}}{Y - median(Y)}. The function also computes ratios that correspond to \eqn{R^2} and \eqn{1 - R^2} (but these ratios do not add to 1.0); the \eqn{R^2} measure is the ratio of mean or median absolute \eqn{\hat{Y} - \mbox{median($\hat{Y}$)}}{Yhat - median(Yhat)} to the mean or median absolute \eqn{Y - \mbox{median($Y$)}}{Y - median(Y)}. The \eqn{1 - R^2} or SSE/SST measure is the mean or median absolute \eqn{\hat{Y} - Y}{Yhat - Y} divided by the mean or median absolute \eqn{\hat{Y} - \mbox{median($Y$)}}{Y - median(Y)}. } \usage{ abs.error.pred(fit, lp=NULL, y=NULL) \method{print}{abs.error.pred}(x, \dots) } \arguments{ \item{fit}{ a fit object typically from \code{\link{lm}} or \code{\link[rms]{ols}} that contains a y vector (i.e., you should have specified \code{y=TRUE} to the fitting function) unless the \code{y} argument is given to \code{abs.error.pred}. If you do not specify the \code{lp} argument, \code{fit} must contain \code{fitted.values} or \code{linear.predictors}. You must specify \code{fit} or both of \code{lp} and \code{y}. } \item{lp}{ a vector of predicted values (Y hat above) if \code{fit} is not given } \item{y}{ a vector of response variable values if \code{fit} (with \code{y=TRUE} in effect) is not given } \item{x}{an object created by \code{abs.error.pred}} \item{\dots}{unused} } \value{ a list of class \code{abs.error.pred} (used by \code{print.abs.error.pred}) containing two matrices: \code{differences} and \code{ratios}. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University School of Medicine\cr \email{fh@fharrell.com} } \seealso{ \code{\link{lm}}, \code{\link[rms]{ols}}, \code{\link{cor}}, \code{\link[rms]{validate.ols}} } \references{ Schemper M (2003): Stat in Med 22:2299-2308. Tian L, Cai T, Goetghebeur E, Wei LJ (2007): Biometrika 94:297-311. } \examples{ set.seed(1) # so can regenerate results x1 <- rnorm(100) x2 <- rnorm(100) y <- exp(x1+x2+rnorm(100)) f <- lm(log(y) ~ x1 + poly(x2,3), y=TRUE) abs.error.pred(lp=exp(fitted(f)), y=y) rm(x1,x2,y,f) } \keyword{robust} \keyword{regression} \keyword{models} \concept{predictive accuracy} Hmisc/man/latexTherm.Rd0000644000176200001440000001047213077245250014505 0ustar liggesusers\name{latexTherm} \alias{latexTherm} \alias{latexNeedle} \alias{pngNeedle} \title{Create LaTeX Thermometers and Colored Needles} \description{ \code{latexTherm} creates a LaTeX picture environment for drawing a series of thermometers whose heights depict the values of a variable \code{y} assumed to be scaled from 0 to 1. This is useful for showing fractions of sample analyzed in any table or plot, intended for a legend. For example, four thermometers might be used to depict the fraction of enrolled patients included in the current analysis, the fraction randomized, the fraction of patients randomized to treatment A being analyzed, and the fraction randomized to B being analyzed. The picture is placed inside a LaTeX macro definition for macro variable named \code{name}, to be invoked by the user later in the LaTeX file using \code{name} preceeded by a backslash. If \code{y} has an attribute \code{"table"}, it is assumed to contain a character string with LaTeX code. This code is used as a tooltip popup for PDF using the LaTeX \code{ocgtools} package or using style \code{tooltips}. Typically the code will contain a \code{tabular} environment. The user must define a LaTeX macro \code{tooltipn} that takes two arguments (original object and pop-up object) that does the pop-up. \code{latexNeedle} is similar to \code{latexTherm} except that vertical needles are produced and each may have its own color. A grayscale box is placed around the needles and provides the 0-1 \code{y}-axis reference. Horizontal grayscale grid lines may be drawn. \code{pngNeedle} is similar to \code{latexNeedle} but is for generating small png graphics. The full graphics file name is returned invisibly. } \usage{ latexTherm(y, name, w = 0.075, h = 0.15, spacefactor = 1/2, extra = 0.07, file = "", append = TRUE) latexNeedle(y, x=NULL, col='black', href=0.5, name, w=.05, h=.15, extra=0, file = "", append=TRUE) pngNeedle(y, x=NULL, col='black', href=0.5, lwd=3.5, w=6, h=18, file=tempfile(fileext='.png')) } \arguments{ \item{y}{a vector of 0-1 scaled values. Boxes and their frames are omitted for \code{NA} elements} \item{x}{a vector corresponding to \code{y} giving x-coordinates. Scaled accordingly, or defaults to equally-spaced values.} \item{name}{name of LaTeX macro variable to be defined} \item{w}{width of a single box (thermometer) in inches. For \code{latexNeedle} and \code{pngNeedle} is the spacing between needles, the latter being in pixels.} \item{h}{height of a single box in inches. For \code{latexNeedle} and \code{pngNeedle} is the height of the frame, the latter in pixels.} \item{spacefactor}{fraction of \code{w} added for extra space between boxes for \code{latexTherm}} \item{extra}{extra space in inches to set aside to the right of and above the series of boxes or frame} \item{file}{name of file to which to write LaTeX code. Default is the console. Also used as base file name for png graphic. Default for that is from \code{tempfile}.} \item{append}{set to \code{FALSE} to write over \code{file}} \item{col}{a vector of colors corresponding to positions in \code{y}. \code{col} is repeated if too short.} \item{href}{values of \code{y} (0-1) for which horizontal grayscale reference lines are drawn for \code{latexNeedle} and \code{pngNeedle}. Set to \code{NULL} to not draw any reference lines} \item{lwd}{line width of needles for \code{pngNeedle}} } \author{Frank Harrell} \examples{ \dontrun{ # The following is in the Hmisc tests directory # For a knitr example see latexTherm.Rnw in that directory ct <- function(...) cat(..., sep='') ct('\\documentclass{report}\\begin{document}\n') latexTherm(c(1, 1, 1, 1), name='lta') latexTherm(c(.5, .7, .4, .2), name='ltb') latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0) latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc') latexTherm(c(0, 0, 0, 0), name='ltd') ct('This is a the first:\\lta and the second:\\ltb\\\\ and the third without extra:\\ltc END\\\\\nThird with extra:\\ltcc END\\\\ \\vspace{2in}\\\\ All data = zero, frame only:\\ltd\\\\ \\end{document}\n') w <- pngNeedle(c(.2, .5, .7)) cat(tobase64image(w)) # can insert this directly into an html file }} \keyword{utilities} \keyword{interface} \keyword{file} \keyword{character} \keyword{manip} Hmisc/man/Cs.Rd0000644000176200001440000000135614235535675012750 0ustar liggesusers\name{Cs} \alias{Cs} \alias{.q} \title{ Character strings from unquoted names } \description{ \code{Cs} makes a vector of character strings from a list of valid R names. \code{.q} is similar but also makes uses of names of arguments. } \usage{ Cs(\dots) .q(\dots) } \arguments{ \item{...}{ any number of names separated by commas. For \code{.q} any names of arguments will be used. }} \value{ character string vector. For \code{.q} there will be a \code{names} attribute to the vector if any names appeared in \dots. } \seealso{ sys.frame, deparse } \examples{ Cs(a,cat,dog) # subset.data.frame <- dataframe[,Cs(age,sex,race,bloodpressure,height)] .q(a, b, c, 'this and that') .q(dog=a, giraffe=b, cat=c) } \keyword{character} \keyword{utilities} Hmisc/man/labcurve.Rd0000644000176200001440000006546113714234051014176 0ustar liggesusers\name{labcurve} \alias{labcurve} \alias{putKey} \alias{putKeyEmpty} \alias{largest.empty} \alias{drawPlot} \alias{plot.drawPlot} \alias{bezier} \title{Label Curves, Make Keys, and Interactively Draw Points and Curves} \description{ \code{labcurve} optionally draws a set of curves then labels the curves. A variety of methods for drawing labels are implemented, ranging from positioning using the mouse to automatic labeling to automatic placement of key symbols with manual placement of key legends to automatic placement of legends. For automatic positioning of labels or keys, a curve is labeled at a point that is maximally separated from all of the other curves. Gaps occurring when curves do not start or end at the same x-coordinates are given preference for positioning labels. If labels are offset from the curves (the default behaviour), if the closest curve to curve i is above curve i, curve i is labeled below its line. If the closest curve is below curve i, curve i is labeled above its line. These directions are reversed if the resulting labels would appear outside the plot region. Both ordinary lines and step functions are handled, and there is an option to draw the labels at the same angle as the curve within a local window. Unless the mouse is used to position labels or plotting symbols are placed along the curves to distinguish them, curves are examined at 100 (by default) equally spaced points over the range of x-coordinates in the current plot area. Linear interpolation is used to get y-coordinates to line up (step function or constant interpolation is used for step functions). There is an option to instead examine all curves at the set of unique x-coordinates found by unioning the x-coordinates of all the curves. This option is especially useful when plotting step functions. By setting \code{adj="auto"} you can have \code{labcurve} try to optimally left- or right-justify labels depending on the slope of the curves at the points at which labels would be centered (plus a vertical offset). This is especially useful when labels must be placed on steep curve sections. You can use the \code{on top} method to write (short) curve names directly on the curves (centered on the y-coordinate). This is especially useful when there are many curves whose full labels would run into each other. You can plot letters or numbers on the curves, for example (using the \code{keys} option), and have \code{labcurve} use the \code{key} function to provide long labels for these short ones (see the end of the example). There is another option for connecting labels to curves using arrows. When \code{keys} is a vector of integers, it is taken to represent plotting symbols (\code{pch}s), and these symbols are plotted at equally-spaced x-coordinates on each curve (by default, using 5 points per curve). The points are offset in the x-direction between curves so as to minimize the chance of collisions. To add a legend defining line types, colors, or line widths with no symbols, specify \code{keys="lines"}, e.g., \code{labcurve(curves, keys="lines", lty=1:2)}. \code{putKey} provides a different way to use \code{key()} by allowing the user to specify vectors for labels, line types, plotting characters, etc. Elements that do not apply (e.g., \code{pch} for lines (\code{type="l"})) may be \code{NA}. When a series of points is represented by both a symbol and a line, the corresponding elements of both \code{pch} and \code{lty}, \code{col.}, or \code{lwd} will be non-missing. \code{putKeyEmpty}, given vectors of all the x-y coordinates that have been plotted, uses \code{largest.empty} to find the largest empty rectangle large enough to hold the key, and draws the key using \code{putKey}. \code{drawPlot} is a simple mouse-driven function for drawing series of lines, step functions, polynomials, Bezier curves, and points, and automatically labeling the point groups using \code{labcurve} or \code{putKeyEmpty}. When \code{drawPlot} is invoked it creates temporary functions \code{Points}, \code{Curve}, and \code{Abline}. The user calls these functions inside the call to \code{drawPlot} to define groups of points in the order they are defined with the mouse. \code{Abline} is used to call \code{abline} and not actually great a group of points. For some curve types, the curve generated to represent the corresponding series of points is drawn after all points are entered for that series, and this curve may be different than the simple curve obtained by connecting points at the mouse clicks. For example, to draw a general smooth Bezier curve the user need only click on a few points, and she must overshoot the final curve coordinates to define the curve. The originally entered points are not erased once the curve is drawn. The same goes for step functions and polynomials. If you \code{plot()} the object returned by \code{drawPlot}, however, only final curves will be shown. The last examples show how to use \code{drawPlot}. The \code{largest.empty} function finds the largest rectangle that is large enough to hold a rectangle of a given height and width, such that the rectangle does not contain any of a given set of points. This is used by \code{labcurve} and \code{putKeyEmpty} to position keys at the most empty part of an existing plot. The default method was created by Hans Borchers. } \usage{ labcurve(curves, labels=names(curves), method=NULL, keys=NULL, keyloc=c("auto","none"), type="l", step.type=c("left", "right"), xmethod=if(any(type=="s")) "unique" else "grid", offset=NULL, xlim=NULL, tilt=FALSE, window=NULL, npts=100, cex=NULL, adj="auto", angle.adj.auto=30, lty=pr$lty, lwd=pr$lwd, col.=pr$col, transparent=TRUE, arrow.factor=1, point.inc=NULL, opts=NULL, key.opts=NULL, empty.method=c('area','maxdim'), numbins=25, pl=!missing(add), add=FALSE, ylim=NULL, xlab="", ylab="", whichLabel=1:length(curves), grid=FALSE, xrestrict=NULL, \dots) putKey(z, labels, type, pch, lty, lwd, cex=par('cex'), col=rep(par('col'),nc), transparent=TRUE, plot=TRUE, key.opts=NULL, grid=FALSE) putKeyEmpty(x, y, labels, type=NULL, pch=NULL, lty=NULL, lwd=NULL, cex=par('cex'), col=rep(par('col'),nc), transparent=TRUE, plot=TRUE, key.opts=NULL, empty.method=c('area','maxdim'), numbins=25, xlim=pr$usr[1:2], ylim=pr$usr[3:4], grid=FALSE) drawPlot(\dots, xlim=c(0,1), ylim=c(0,1), xlab='', ylab='', ticks=c('none','x','y','xy'), key=FALSE, opts=NULL) # Points(label=' ', type=c('p','r'), # n, pch=pch.to.use[1], cex=par('cex'), col=par('col'), # rug = c('none','x','y','xy'), ymean) # Curve(label=' ', # type=c('bezier','polygon','linear','pol','loess','step','gauss'), # n=NULL, lty=1, lwd=par('lwd'), col=par('col'), degree=2, # evaluation=100, ask=FALSE) # Abline(\dots) \method{plot}{drawPlot}(x, xlab, ylab, ticks, key=x$key, keyloc=x$keyloc, \dots) largest.empty(x, y, width=0, height=0, numbins=25, method=c('exhaustive','rexhaustive','area','maxdim'), xlim=pr$usr[1:2], ylim=pr$usr[3:4], pl=FALSE, grid=FALSE) } \arguments{ \item{curves}{ a list of lists, each of which have at least two components: a vector of \code{x} values and a vector of corresponding \code{y} values. \code{curves} is mandatory except when \code{method="mouse"} or \code{"locator"}, in which case \code{labels} is mandatory. Each list in \code{curves} may optionally have any of the parameters \code{type}, \code{lty}, \code{lwd}, or \code{col} for that curve, as defined below (see one of the last examples). } \item{z}{ a two-element list specifying the coordinate of the center of the key, e.g. \code{locator(1)} to use the mouse for positioning } \item{labels}{ For \code{labcurve}, a vector of character strings used to label curves (which may contain newline characters to stack labels vertically). The default labels are taken from the names of the \code{curves} list. Setting \code{labels=FALSE} will suppress drawing any labels (for \code{labcurve} only). For \code{putKey} and \code{putKeyEmpty} is a vector of character strings specifying group labels } \item{x}{ } \item{y}{ for \code{putKeyEmpty} and \code{largest.empty}, \code{x} and \code{y} are same-length vectors specifying points that have been plotted. \code{x} can also be an object created by \code{drawPlot}. } \item{\dots}{ For \code{drawPlot} is a series of invocations of \code{Points} and \code{Curve} (see example). Any number of point groups can be defined in this way. For \code{Abline} these may be any arguments to \code{abline}. For \code{labcurve}, other parameters to pass to \code{text}. } \item{width}{ } \item{height}{ for \code{largest.empty}, specifies the minimum allowable width in \code{x} units and the minimum allowable height in \code{y} units } \item{method}{ \code{"offset"} (the default) offsets labels at largest gaps between curves, and draws labels beside curves. \code{"on top"} draws labels on top of the curves (especially good when using keys). \code{"arrow"} draws arrows connecting labels to the curves. \code{"mouse"} or \code{"locator"} positions labels according to mouse clicks. If \code{keys} is specified and is an integer vector or is \code{"lines"}, \code{method} defaults to \code{"on top"}. If \code{keys} is character, \code{method} defaults to \code{"offset"}. Set \code{method="none"} to suppress all curve labeling and key drawing, which is useful when \code{pl=TRUE} and you only need \code{labcurve} to draw the curves and the rest of the basic graph. For \code{largest.empty} specifies the method a rectangle that does not collide with any of the (\code{x}, \code{y}) points. The default method, \code{'exhaustive'}, uses a Fortran translation of an R function and algorithm developed by Hans Borchers. The same result, more slowly, may be obtained by using pure R code by specifying \code{method='rexhaustive'}. The original algorithms using binning (and the only methods supported for S-Plus) are still available. For all methods, screening of candidate rectangles having at least a given width in \code{x}-units of \code{width} or having at least a given height in \code{y}-units of \code{height} is possible. Use \code{method="area"} to use the binning method to find the rectangle having the largest area, or \code{method="maxdim"} to use the binning method to return with last rectangle searched that had both the largest width and largest height over all previous rectangles. } \item{keys}{ This causes keys (symbols or short text) to be drawn on or beside curves, and if \code{keyloc} is not equal to \code{"none"}, a legend to be automatically drawn. The legend links keys with full curve labels and optionally with colors and line types. Set \code{keys} to a vector of character strings, or a vector of integers specifying plotting character (\code{pch} values - see \code{points}). For the latter case, the default behavior is to plot the symbols periodically, at equally spaced x-coordinates. } \item{keyloc}{ When \code{keys} is specified, \code{keyloc} specifies how the legend is to be positioned for drawing using the \code{key} function in \code{trellis}. The default is \code{"auto"}, for which the \code{largest.empty} function to used to find the most empty part of the plot. If no empty rectangle large enough to hold the key is found, no key will be drawn. Specify \code{keyloc="none"} to suppress drawing a legend, or set \code{keyloc} to a 2-element list containing the x and y coordinates for the center of the legend. For example, use \code{keyloc=locator(1)} to click the mouse at the center. \code{keyloc} specifies the coordinates of the center of the key to be drawn with \code{plot.drawPlot} when \code{key=TRUE}. } \item{type}{ for \code{labcurve}, a scalar or vector of character strings specifying the method that the points in the curves were connected. \code{"l"} means ordinary connections between points and \code{"s"} means step functions. For \code{putKey} and \code{putKeyEmpty} is a vector of plotting types, \code{"l"} for regular line, \code{"p"} for point, \code{"b"} for both point and line, and \code{"n"} for none. For \code{Points} is either \code{"p"} (the default) for regular points, or \code{"r"} for rugplot (one-dimensional scatter diagram to be drawn using the \code{scat1d} function). For \code{Curve}, \code{type} is \code{"bezier"} (the default) for drawing a smooth Bezier curves (which can represent a non-1-to-1 function such as a circle), \code{"polygon"} for orginary line segments, \code{"linear"} for a straight line defined by two endpoints, \code{"pol"} for a \code{degree}-degree polynomial to be fitted to the mouse-clicked points, \code{"step"} for a left-step-function, \code{"gauss"} to plot a Gaussian density fitted to 3 clicked points, \code{"loess"} to use the \code{lowess} function to smooth the clicked points, or a function to draw a user-specified function, evaluated at \code{evaluation} points spanning the whole x-axis. For the density the user must click in the left tail, at the highest value (at the mean), and in the right tail, with the two tail values being approximately equidistant from the mean. The density is scaled to fit in the highest value regardless of its area. } \item{step.type}{ type of step functions used (default is \code{"left"}) } \item{xmethod}{ method for generating the unique set of x-coordinates to examine (see above). Default is \code{"grid"} for \code{type="l"} or \code{"unique"} for \code{type="s"}. } \item{offset}{ distance in y-units between the center of the label and the line being labeled. Default is 0.75 times the height of an "m" that would be drawn in a label. For R grid/lattice you must specify offset using the \code{grid} \code{unit} function, e.g., \code{offset=unit(2,"native")} or \code{offset=unit(.25,"cm")} (\code{"native"} means data units) } \item{xlim}{ limits for searching for label positions, and is also used to set up plots when \code{pl=TRUE} and \code{add=FALSE}. Default is total x-axis range for current plot (\code{par("usr")[1:2]}). For \code{largest.empty}, \code{xlim} limits the search for largest rectanges, but it has the same default as above. For \code{pl=TRUE,add=FALSE} you may want to extend \code{xlim} somewhat to allow large keys to fit, when using \code{keyloc="auto"}. For \code{drawPlot} default is \code{c(0,1)}. When using \code{largest.empty} with \code{ggplot2}, \code{xlim} and \code{ylim} are mandatory. } \item{tilt}{ set to \code{TRUE} to tilt labels to follow the curves, for \code{method="offset"} when \code{keys} is not given. } \item{window}{ width of a window, in x-units, to use in determining the local slope for tilting labels. Default is 0.5 times number of characters in the label times the x-width of an "m" in the current character size and font. } \item{npts}{ number of points to use if \code{xmethod="grid"} } \item{cex}{ character size to pass to \code{text} and \code{key}. Default is current \code{par("cex")}. For \code{putKey}, \code{putKeyEmpty}, and \code{Points} is the size of the plotting symbol. } \item{adj}{ Default is \code{"auto"} which has \code{labcurve} figure justification automatically when \code{method="offset"}. This will cause centering to be used when the local angle of the curve is less than \code{angle.adj.auto} in absolute value, left justification if the angle is larger and either the label is under a curve of positive slope or over a curve of negative slope, and right justification otherwise. For step functions, left justification is used when the label is above the curve and right justifcation otherwise. Set \code{adj=.5} to center labels at computed coordinates. Set to 0 for left-justification, 1 for right. Set \code{adj} to a vector to vary adjustments over the curves. } \item{angle.adj.auto}{ see \code{adj}. Does not apply to step functions. } \item{lty}{ vector of line types which were used to draw the curves. This is only used when keys are drawn. If all of the line types, line widths, and line colors are the same, lines are not drawn in the key. } \item{lwd}{ vector of line widths which were used to draw the curves. This is only used when keys are drawn. See \code{lty} also. } \item{col.}{ } \item{col}{ vector of integer color numbers for use in curve labels, symbols, lines, and legends. Default is \code{par("col")} for all curves. See \code{lty} also. } \item{transparent}{ Default is \code{TRUE} to make \code{key} draw transparent legends, i.e., to suppress drawing a solid rectangle background for the legend. Set to \code{FALSE} otherwise. } \item{arrow.factor}{ factor by which to multiply default arrow lengths } \item{point.inc}{ When \code{keys} is a vector of integers, \code{point.inc} specifies the x-increment between the point symbols that are overlaid periodically on the curves. By default, \code{point.inc} is equal to the range for the x-axis divided by 5. } \item{opts}{ an optional list which can be used to specify any of the options to \code{labcurve}, with the usual element name abbreviations allowed. This is useful when \code{labcurve} is being called from another function. Example: \code{opts=list(method="arrow", cex=.8, np=200)}. For \code{drawPlot} a list of \code{labcurve} options to pass as \code{labcurve(\dots, opts=)}. } \item{key.opts}{ a list of extra arguments you wish to pass to \code{key()}, e.g., \code{key.opts=list(background=1, between=3)}. The argument names must be spelled out in full. } \item{empty.method}{ } \item{numbins}{ These two arguments are passed to the \code{largest.empty} function's \code{method} and \code{numbins} arguments (see below). For \code{largest.empty} specifies the number of bins in which to discretize both the \code{x} and \code{y} directions for searching for rectangles. Default is 25. } \item{pl}{ set to \code{TRUE} (or specify \code{add}) to cause the curves in \code{curves} to be drawn, under the control of \code{type},\code{lty},\code{lwd},\code{col} parameters defined either in the \code{curves} lists or in the separate arguments given to \code{labcurve} or through \code{opts}. For \code{largest.empty}, set \code{pl=TRUE} to show the rectangle the function found by drawing it with a solid color. May not be used under \code{ggplot2}. } \item{add}{ By default, when curves are actually drawn by \code{labcurve} a new plot is started. To add to an existing plot, set \code{add=TRUE}. } \item{ylim}{ When a plot has already been started, \code{ylim} defaults to \code{par("usr")[3:4]}. When \code{pl=TRUE}, \code{ylim} and \code{xlim} are determined from the ranges of the data. Specify \code{ylim} yourself to take control of the plot construction. In some cases it is advisable to make \code{ylim} larger than usual to allow for automatically-positioned keys. For \code{largest.empty}, \code{ylim} specifies the limits on the y-axis to limit the search for rectangle. Here \code{ylim} defaults to the same as above, i.e., the range of the y-axis of an open plot from \code{par}. For \code{drawPlot} the default is \code{c(0,1)}. } \item{xlab}{ } \item{ylab}{ x-axis and y-axis labels when \code{pl=TRUE} and \code{add=FALSE} or for \code{drawPlot}. Defaults to \code{""} unless the first curve has names for its first two elements, in which case the names of these elements are taken as \code{xlab} and \code{ylab}. } \item{whichLabel}{ integer vector corresponding to \code{curves} specifying which curves are to be labelled or have a legend } \item{grid}{ set to \code{TRUE} if the R \code{grid} package was used to draw the current plot. This prevents \code{labcurve} from using \code{par("usr")} etc. If using R \code{grid} you can pass coordinates and lengths having arbitrary units, as documented in the \code{unit} function. This is especially useful for \code{offset}. } \item{xrestrict}{ When having \code{labcurve} label curves where they are most separated, you can restrict the search for this separation point to a range of the x-axis, specified as a 2-vector \code{xrestrict}. This is useful when one part of the curve is very steep. Even though steep regions may have maximum separation, the labels will collide when curves are steep. } \item{pch}{ vector of plotting characters for \code{putKey} and \code{putKeyEmpty}. Can be any value including \code{NA} when only a line is used to indentify the group. Is a single plotting character for \code{Points}, with the default being the next unused value from among 1, 2, 3, 4, 16, 17, 5, 6, 15, 18, 19. } \item{plot}{ set to \code{FALSE} to keep \code{putKey} or \code{putKeyEmpty} from actually drawing the key. Instead, the size of the key will be return by \code{putKey}, or the coordinates of the key by \code{putKeyEmpty}. } \item{ticks}{ tells \code{drawPlot} which axes to draw tick marks and tick labels. Default is \code{"none"}. } \item{key}{ for \code{drawPlot} and \code{plot.drawPlot}. Default is \code{FALSE} so that \code{labcurve} is used to label points or curves. Set to \code{TRUE} to use \code{putKeyEmpty}.} } \value{ \code{labcurve} returns an invisible list with components \code{x, y, offset, adj, cex, col}, and if \code{tilt=TRUE}, \code{angle}. \code{offset} is the amount to add to \code{y} to draw a label. \code{offset} is negative if the label is drawn below the line. \code{adj} is a vector containing the values 0, .5, 1. \code{largest.empty} returns a list with elements \code{x} and \code{y} specifying the coordinates of the center of the rectangle which was found, and element \code{rect} containing the 4 \code{x} and \code{y} coordinates of the corners of the found empty rectangle. The \code{area} of the rectangle is also returned. } \details{ The internal functions \code{Points}, \code{Curve}, \code{Abline} have unique arguments as follows. \describe{ \item{\code{label}:}{for \code{Points} and \code{Curve} is a single character string to label that group of points} \item{\code{n}:}{number of points to accept from the mouse. Default is to input points until a right mouse click.} \item{\code{rug}:}{for \code{Points}. Default is \code{"none"} to not show the marginal x or y distributions as rug plots, for the points entered. Other possibilities are used to execute \code{scat1d} to show the marginal distribution of x, y, or both as rug plots.} \item{\code{ymean}:}{for \code{Points}, subtracts a constant from each y-coordinate entered to make the overall mean \code{ymean}} \item{\code{degree}:}{degree of polynomial to fit to points by \code{Curve}} \item{\code{evaluation}:}{number of points at which to evaluate Bezier curves, polynomials, and other functions in \code{Curve}} \item{\code{ask}:}{set \code{ask=TRUE} to give the user the opportunity to try again at specifying points for Bezier curves, step functions, and polynomials} } The \code{labcurve} function used some code from the function \code{plot.multicurve} written by Rod Tjoelker of The Boeing Company (\email{tjoelker@espresso.rt.cs.boeing.com}). If there is only one curve, a label is placed at the middle x-value, and no fancy features such as \code{angle} or positive/negative offsets are used. \code{key} is called once (with the argument \code{plot=FALSE}) to find the key dimensions. Then an empty rectangle with at least these dimensions is searched for using \code{largest.empty}. Then \code{key} is called again to draw the key there, using the argument \code{corner=c(.5,.5)} so that the center of the rectangle can be specified to \code{key}. If you want to plot the data, an easier way to use \code{labcurve} is through \code{xYplot} as shown in some of its examples. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{approx}}, \code{\link{text}}, \code{\link{legend}}, \code{\link{scat1d}}, \code{\link{xYplot}}, \code{\link{abline}} } \examples{ n <- 2:8 m <- length(n) type <- c('l','l','l','l','s','l','l') # s=step function l=ordinary line (polygon) curves <- vector('list', m) plot(0,1,xlim=c(0,1),ylim=c(-2.5,4),type='n') set.seed(39) for(i in 1:m) { x <- sort(runif(n[i])) y <- rnorm(n[i]) lines(x, y, lty=i, type=type[i], col=i) curves[[i]] <- list(x=x,y=y) } labels <- paste('Label for',letters[1:m]) labcurve(curves, labels, tilt=TRUE, type=type, col=1:m) # Put only single letters on curves at points of # maximum space, and use key() to define the letters, # with automatic positioning of the key in the most empty # part of the plot # Have labcurve do the plotting, leaving extra space for key names(curves) <- labels labcurve(curves, keys=letters[1:m], type=type, col=1:m, pl=TRUE, ylim=c(-2.5,4)) # Put plotting symbols at equally-spaced points, # with a key for the symbols, ignoring line types labcurve(curves, keys=1:m, lty=1, type=type, col=1:m, pl=TRUE, ylim=c(-2.5,4)) # Plot and label two curves, with line parameters specified with data set.seed(191) ages.f <- sort(rnorm(50,20,7)) ages.m <- sort(rnorm(40,19,7)) height.f <- pmin(ages.f,21)*.2+60 height.m <- pmin(ages.m,21)*.16+63 labcurve(list(Female=list(ages.f,height.f,col=2), Male =list(ages.m,height.m,col=3,lty='dashed')), xlab='Age', ylab='Height', pl=TRUE) # add ,keys=c('f','m') to label curves with single letters # For S-Plus use lty=2 # Plot power for testing two proportions vs. n for various odds ratios, # using 0.1 as the probability of the event in the control group. # A separate curve is plotted for each odds ratio, and the curves are # labeled at points of maximum separation n <- seq(10, 1000, by=10) OR <- seq(.2,.9,by=.1) pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), n=n) names(pow) <- format(OR) labcurve(pow, pl=TRUE, xlab='n', ylab='Power') # Plot some random data and find the largest empty rectangle # that is at least .1 wide and .1 tall x <- runif(50) y <- runif(50) plot(x, y) z <- largest.empty(x, y, .1, .1) z points(z,pch=3) # mark center of rectangle, or polygon(z$rect, col='blue') # to draw the rectangle, or #key(z$x, z$y, \dots stuff for legend) # Use the mouse to draw a series of points using one symbol, and # two smooth curves or straight lines (if two points are clicked), # none of these being labeled # d <- drawPlot(Points(), Curve(), Curve()) # plot(d) \dontrun{ # Use the mouse to draw a Gaussian density, two series of points # using 2 symbols, one Bezier curve, a step function, and raw data # along the x-axis as a 1-d scatter plot (rug plot). Draw a key. # The density function is fit to 3 mouse clicks # Abline draws a dotted horizontal reference line d <- drawPlot(Curve('Normal',type='gauss'), Points('female'), Points('male'), Curve('smooth',ask=TRUE,lty=2), Curve('step',type='s',lty=3), Points(type='r'), Abline(h=.5, lty=2), xlab='X', ylab='y', xlim=c(0,100), key=TRUE) plot(d, ylab='Y') plot(d, key=FALSE) # label groups using labcurve } } \keyword{hplot} \keyword{aplot} \keyword{dplot} \keyword{iplot} Hmisc/man/biVar.Rd0000644000176200001440000001636213714234051013432 0ustar liggesusers\name{biVar} \alias{biVar} \alias{print.biVar} \alias{plot.biVar} \alias{spearman2} \alias{spearman2.default} \alias{spearman2.formula} \alias{spearman} \alias{spearman.test} \alias{chiSquare} \title{Bivariate Summaries Computed Separately by a Series of Predictors} \description{ \code{biVar} is a generic function that accepts a formula and usual \code{data}, \code{subset}, and \code{na.action} parameters plus a list \code{statinfo} that specifies a function of two variables to compute along with information about labeling results for printing and plotting. The function is called separately with each right hand side variable and the same left hand variable. The result is a matrix of bivariate statistics and the \code{statinfo} list that drives printing and plotting. The plot method draws a dot plot with x-axis values by default sorted in order of one of the statistics computed by the function. \code{spearman2} computes the square of Spearman's rho rank correlation and a generalization of it in which \code{x} can relate non-monotonically to \code{y}. This is done by computing the Spearman multiple rho-squared between \code{(rank(x), rank(x)^2)} and \code{y}. When \code{x} is categorical, a different kind of Spearman correlation used in the Kruskal-Wallis test is computed (and \code{spearman2} can do the Kruskal-Wallis test). This is done by computing the ordinary multiple \code{R^2} between \code{k-1} dummy variables and \code{rank(y)}, where \code{x} has \code{k} categories. \code{x} can also be a formula, in which case each predictor is correlated separately with \code{y}, using non-missing observations for that predictor. \code{biVar} is used to do the looping and bookkeeping. By default the plot shows the adjusted \code{rho^2}, using the same formula used for the ordinary adjusted \code{R^2}. The \code{F} test uses the unadjusted R2. \code{spearman} computes Spearman's rho on non-missing values of two variables. \code{spearman.test} is a simple version of \code{spearman2.default}. \code{chiSquare} is set up like \code{spearman2} except it is intended for a categorical response variable. Separate Pearson chi-square tests are done for each predictor, with optional collapsing of infrequent categories. Numeric predictors having more than \code{g} levels are categorized into \code{g} quantile groups. \code{chiSquare} uses \code{biVar}. } \usage{ biVar(formula, statinfo, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, ...) \method{print}{biVar}(x, ...) \method{plot}{biVar}(x, what=info$defaultwhat, sort.=TRUE, main, xlab, vnames=c('names','labels'), ...) spearman2(x, ...) \method{spearman2}{default}(x, y, p=1, minlev=0, na.rm=TRUE, exclude.imputed=na.rm, ...) \method{spearman2}{formula}(formula, data=NULL, subset, na.action=na.retain, exclude.imputed=TRUE, ...) spearman(x, y) spearman.test(x, y, p=1) chiSquare(formula, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, ...) } \arguments{ \item{formula}{a formula with a single left side variable} \item{statinfo}{see \code{spearman2.formula} or \code{chiSquare} code} \item{data, subset, na.action}{ the usual options for models. Default for \code{na.action} is to retain all values, NA or not, so that NAs can be deleted in only a pairwise fashion. } \item{exclude.imputed}{ set to \code{FALSE} to include imputed values (created by \code{impute}) in the calculations. } \item{...}{other arguments that are passed to the function used to compute the bivariate statistics or to \code{dotchart3} for \code{plot}. } \item{na.rm}{logical; delete NA values?} \item{x}{ a numeric matrix with at least 5 rows and at least 2 columns (if \code{y} is absent). For \code{spearman2}, the first argument may be a vector of any type, including character or factor. The first argument may also be a formula, in which case all predictors are correlated individually with the response variable. \code{x} may be a formula for \code{spearman2} in which case \code{spearman2.formula} is invoked. Each predictor in the right hand side of the formula is separately correlated with the response variable. For \code{print} or \code{plot}, \code{x} is an object produced by \code{biVar}. For \code{spearman} and \code{spearman.test} \code{x} is a numeric vector, as is \code{y}. For \code{chiSquare}, \code{x} is a formula. } % \item{type}{ % specifies the type of correlations to compute. Spearman correlations % are the Pearson linear correlations computed on the ranks of non-missing % elements, using midranks for ties. % } \item{y}{ a numeric vector } \item{p}{ for numeric variables, specifies the order of the Spearman \code{rho^2} to use. The default is \code{p=1} to compute the ordinary \code{rho^2}. Use \code{p=2} to compute the quadratic rank generalization to allow non-monotonicity. \code{p} is ignored for categorical predictors. } \item{minlev}{ minimum relative frequency that a level of a categorical predictor should have before it is pooled with other categories (see \code{combine.levels}) in \code{spearman2} and \code{chiSquare} (in which case it also applies to the response). The default, \code{minlev=0} causes no pooling. } \item{what}{ specifies which statistic to plot. Possibilities include the column names that appear with the print method is used. } \item{sort.}{ set \code{sort.=FALSE} to suppress sorting variables by the statistic being plotted } \item{main}{ main title for plot. Default title shows the name of the response variable. } \item{xlab}{ x-axis label. Default constructed from \code{what}. } \item{vnames}{ set to \code{"labels"} to use variable labels in place of names for plotting. If a variable does not have a label the name is always used.} % \item{g}{number of quantile groups into which to categorize continuous % predictors having more than \code{g} unique values, for \code{chiSquare}} } \value{ \code{spearman2.default} (the function that is called for a single \code{x}, i.e., when there is no formula) returns a vector of statistics for the variable. \code{biVar}, \code{spearman2.formula}, and \code{chiSquare} return a matrix with rows corresponding to predictors. } \details{ Uses midranks in case of ties, as described by Hollander and Wolfe. P-values for Spearman, Wilcoxon, or Kruskal-Wallis tests are approximated by using the \code{t} or \code{F} distributions. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods. New York: Wiley. Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical Recipes in C. Cambridge: Cambridge University Press. } \seealso{ \code{\link{combine.levels}}, \code{\link{varclus}}, \code{\link{dotchart3}}, \code{\link{impute}}, \code{\link{chisq.test}}, \code{\link{cut2}}. } \examples{ x <- c(-2, -1, 0, 1, 2) y <- c(4, 1, 0, 1, 4) z <- c(1, 2, 3, 4, NA) v <- c(1, 2, 3, 4, 5) spearman2(x, y) plot(spearman2(z ~ x + y + v, p=2)) f <- chiSquare(z ~ x + y + v) f } \keyword{nonparametric} \keyword{htest} \keyword{category} Hmisc/man/rcspline.eval.Rd0000644000176200001440000001074112306624473015136 0ustar liggesusers\name{rcspline.eval} \alias{rcspline.eval} \title{ Restricted Cubic Spline Design Matrix } \description{ Computes matrix that expands a single variable into the terms needed to fit a restricted cubic spline (natural spline) function using the truncated power basis. Two normalization options are given for somewhat reducing problems of ill-conditioning. The antiderivative function can be optionally created. If knot locations are not given, they will be estimated from the marginal distribution of \code{x}. } \usage{ rcspline.eval(x, knots, nk=5, inclx=FALSE, knots.only=FALSE, type="ordinary", norm=2, rpm=NULL, pc=FALSE, fractied=0.05) } \arguments{ \item{x}{ a vector representing a predictor variable } \item{knots}{ knot locations. If not given, knots will be estimated using default quantiles of \code{x}. For 3 knots, the outer quantiles used are 0.10 and 0.90. For 4-6 knots, the outer quantiles used are 0.05 and 0.95. For \eqn{\code{nk}>6}, the outer quantiles are 0.025 and 0.975. The knots are equally spaced between these on the quantile scale. For fewer than 100 non-missing values of \code{x}, the outer knots are the 5th smallest and largest \code{x}. } \item{nk}{ number of knots. Default is 5. The minimum value is 3. } \item{inclx}{ set to \code{TRUE} to add \code{x} as the first column of the returned matrix } \item{knots.only}{ return the estimated knot locations but not the expanded matrix } \item{type}{ \samp{"ordinary"} to fit the function, \samp{"integral"} to fit its anti-derivative. } \item{norm}{ \samp{0} to use the terms as originally given by \cite{Devlin and Weeks (1986)}, \samp{1} to normalize non-linear terms by the cube of the spacing between the last two knots, \samp{2} to normalize by the square of the spacing between the first and last knots (the default). \code{norm=2} has the advantage of making all nonlinear terms beon the x-scale. } \item{rpm}{ If given, any \code{NA}s in \code{x} will be replaced with the value \code{rpm} after estimating any knot locations. } \item{pc}{ Set to \code{TRUE} to replace the design matrix with orthogonal (uncorrelated) principal components computed on the scaled, centered design matrix } \item{fractied}{ If the fraction of observations tied at the lowest and/or highest values of \code{x} is greater than or equal to \code{fractied}, the algorithm attempts to use a different algorithm for knot finding based on quantiles of \code{x} after excluding the one or two values with excessive ties. And if the number of unique \code{x} values excluding these values is small, the unique values will be used as the knots. If the number of knots to use other than these exterior values is only one, that knot will be at the median of the non-extreme \code{x}. This algorithm is not used if any interior values of \code{x} also have a proportion of ties equal to or exceeding \code{fractied}.} } \value{ If \code{knots.only=TRUE}, returns a vector of knot locations. Otherwise returns a matrix with \code{x} (if \code{inclx=TRUE}) followed by \eqn{\code{nk}-2} nonlinear terms. The matrix has an attribute \code{knots} which is the vector of knots used. When \code{pc} is \code{TRUE}, an additional attribute is stored: \code{pcparms}, which contains the \code{center} and \code{scale} vectors and the \code{rotation} matrix. } \references{ Devlin TF and Weeks BJ (1986): Spline functions for logistic regression modeling. Proc 11th Annual SAS Users Group Intnl Conf, p. 646--651. Cary NC: SAS Institute, Inc. } \seealso{ \code{\link[splines]{ns}}, \code{\link{rcspline.restate}}, \code{\link[rms]{rcs}} } \examples{ x <- 1:100 rcspline.eval(x, nk=4, inclx=TRUE) #lrm.fit(rcspline.eval(age,nk=4,inclx=TRUE), death) x <- 1:1000 attributes(rcspline.eval(x)) x <- c(rep(0, 744),rep(1,6), rep(2,4), rep(3,10),rep(4,2),rep(6,6), rep(7,3),rep(8,2),rep(9,4),rep(10,2),rep(11,9),rep(12,10),rep(13,13), rep(14,5),rep(15,5),rep(16,10),rep(17,6),rep(18,3),rep(19,11),rep(20,16), rep(21,6),rep(22,16),rep(23,17), 24, rep(25,8), rep(26,6),rep(27,3), rep(28,7),rep(29,9),rep(30,10),rep(31,4),rep(32,4),rep(33,6),rep(34,6), rep(35,4), rep(36,5), rep(38,6), 39, 39, 40, 40, 40, 41, 43, 44, 45) attributes(rcspline.eval(x, nk=3)) attributes(rcspline.eval(x, nk=5)) u <- c(rep(0,30), 1:4, rep(5,30)) attributes(rcspline.eval(u)) } \keyword{regression} \keyword{smooth} Hmisc/man/event.history.Rd0000644000176200001440000003544612243661443015221 0ustar liggesusers\name{event.history} \alias{event.history} \title{Produces event.history graph for survival data} \description{ Produces an event history graph for right-censored survival data, including time-dependent covariate status, as described in Dubin, Muller, and Wang (2001). Effectively, a Kaplan-Meier curve is produced with supplementary information regarding individual survival information, censoring information, and status over time of an individual time-dependent covariate or time-dependent covariate function for both uncensored and censored individuals. } \usage{ event.history(data, survtime.col, surv.col, surv.ind = c(1, 0), subset.rows = NULL, covtime.cols = NULL, cov.cols = NULL, num.colors = 1, cut.cov = NULL, colors = 1, cens.density = 10, mult.end.cens = 1.05, cens.mark.right =FALSE, cens.mark = "-", cens.mark.ahead = 0.5, cens.mark.cutoff = -1e-08, cens.mark.cex = 1, x.lab = "time under observation", y.lab = "estimated survival probability", title = "event history graph", ...) } \arguments{ \item{data}{ A matrix or data frame with rows corresponding to units (often individuals) and columns corresponding to survival time, event/censoring indicator. Also, multiple columns may be devoted to time-dependent covariate level and time change. } \item{survtime.col}{ Column (in data) representing minimum of time-to-event or right-censoring time for individual. } \item{surv.col}{ Column (in data) representing event indicator for an individual. Though, traditionally, such an indicator will be 1 for an event and 0 for a censored observation, this indicator can be represented by any two numbers, made explicit by the surv.ind argument. } \item{surv.ind}{ Two-element vector representing, respectively, the number for an event, as listed in \code{surv.col}, followed by the number for a censored observation. Default is traditional survival data represention, i.e., \code{c(1,0)}. } \item{subset.rows}{ Subset of rows of original matrix or data frame (data) to place in event history graph. Logical arguments may be used here (e.g., \code{treatment.arm == "a"}, if the data frame, data, has been attached to the search directory; } \item{covtime.cols}{ Column(s) (in data) representing the time when change of time-dependent covariate (or time-dependent covariate function) occurs. There should be a unique non-\code{NA} entry in the column for each such change (along with corresponding \code{cov.cols} column entry representing the value of the covariate or function at that change time). Default is \code{NULL}, meaning no time-dependent covariate information will be presented in the graph. } \item{cov.cols}{ Column(s) (in data) representing the level of the time-dependent covariate (or time-dependent covariate function). There should be a unique non-\code{NA} column entry representing each change in the level (along with a corresponding covtime.cols column entry representing the time of the change). Default is \code{NULL}, meaning no time-dependent covariate information will be presented in the graph. } \item{num.colors}{ Colors are utilized for the time-dependent covariate level for an individual. This argument provides the number of unique covariate levels which will be displayed by mapping the number of colors (via \code{num.colors}) to the number of desired covariate levels. This will divide the covariate span into roughly equally-sized intervals, via the S-Plus cut function. Default is one color, meaning no time-dependent information will be presented in the graph. Note that this argument will be ignored/superceded if a non-NULL argument is provided for the \code{cut.cov} parameter. } \item{cut.cov}{ This argument allows the user to explicitly state how to define the intervals for the time-dependent covariate, such that different colors will be allocated to the user-defined covariate levels. For example, for plotting five colors, six ordered points within the span of the data's covariate levels should be provided. Default is \code{NULL}, meaning that the \code{num.colors} argument value will dictate the number of breakpoints, with the covariate span defined into roughly equally-sized intervals via the S-Plus cut function. However, if \code{is.null(cut.cov) == FALSE}, then this argument supercedes any entry for the \code{num.colors} argument. } \item{colors}{ This is a vector argument defining the actual colors used for the time-dependent covariate levels in the plot, with the index of this vector corresponding to the ordered levels of the covariate. The number of colors (i.e., the length of the colors vector) should correspond to the value provided to the \code{num.colors} argument or the number of ordered points - 1 as defined in the \code{cut.cov} argument (with \code{cut.cov} superceding \code{num.colors} if \code{is.null(cut.cov) == FALSE}). The function, as currently written, allows for as much as twenty distinct colors. This argument effectively feeds into the col argument for the S-Plus polygon function. Default is \code{colors = 1}. See the col argument for the both the S-Plus par function and polygon function for more information. } \item{cens.density}{ This will provide the shading density at the end of the individual bars for those who are censored. For more information on shading density, see the density argument in the S-Plus polygon function. Default is \code{cens.density=10}. } \item{mult.end.cens}{ This is a multiplier that extends the length of the longest surviving individual bar (or bars, if a tie exists) if right-censored, presuming that no event times eventually follow this final censored time. Default extends the length 5 percent beyond the length of the observed right-censored survival time. } \item{cens.mark.right}{ A logical argument that states whether an explicit mark should be placed to the right of the individual right-censored survival bars. This argument is most useful for large sample sizes, where it may be hard to detect the special shading via cens.density, particularly for the short-term survivors. } \item{cens.mark}{ Character argument which describes the censored mark that should be used if \code{cens.mark.right = TRUE}. Default is \code{"-"}. } \item{cens.mark.ahead}{ A numeric argument, which specifies the absolute distance to be placed between the individual right-censored survival bars and the mark as defined in the above cens.mark argument. Default is 0.5 (that is, a half of day, if survival time is measured in days), but may very well need adjusting depending on the maximum survival time observed in the dataset. } \item{cens.mark.cutoff}{ A negative number very close to 0 (by default \code{cens.mark.cutoff = -1e-8}) to ensure that the censoring marks get plotted correctly. See \code{event.history} code in order to see its usage. This argument typically will not need adjustment. } \item{cens.mark.cex}{ Numeric argument defining the size of the mark defined in the \code{cens.mark} argument above. See more information by viewing the \code{cex} argument for the S-Plus \code{\link{par}} function. Default is \code{cens.mark.cex = 1.0}. } \item{x.lab}{Single label to be used for entire x-axis. Default is \code{"time under observation"}. } \item{y.lab}{Single label to be used for entire y-axis. Default is \code{"estimated survival probability"}. } \item{title}{Title for the event history graph. Default is \code{"event history graph"}. } \item{\dots}{ This allows arguments to the plot function call within the \code{event.history} function. So, for example, the axes representations can be manipulated with appropriate arguments, or particular areas of the \code{event.history} graph can be \dQuote{zoomed}. See the details section for more comments about zooming. } } \details{ In order to focus on a particular area of the event history graph, zooming can be performed. This is best done by specifying appropriate \code{xlim} and \code{ylim} arguments at the end of the \code{event.history} function call, taking advantage of the \code{\dots} argument link to the plot function. An example of zooming can be seen in Plate 4 of the paper referenced below. Please read the reference below to understand how the individual covariate and survival information is provided in the plot, how ties are handled, how right-censoring is handled, etc. } \references{ Dubin, J.A., Muller, H.-G., and Wang, J.-L. (2001). Event history graphs for censored survival data. \emph{Statistics in Medicine}, \bold{20}, 2951-2964. } \author{ Joel Dubin\cr \email{jdubin@uwaterloo.ca} } \note{ The authors have found better control of the use of color by producing the graphs via the postscript plotting device in S-Plus. In fact, the provided examples utilize the postscript function. However, your past experiences may be different, and you may prefer to control color directly (to the graphsheet in Windows environment, for example). The event.history function will work with either approach. } \section{WARNING}{ This function has been tested thoroughly, but only within a restricted version and environment, i.e., only within S-Plus 2000, Version 3, and within S-Plus 6.0, version 2, both on a Windows 2000 machine. Hence, we cannot currently vouch for the function's effectiveness in other versions of S-Plus (e.g., S-Plus 3.4) nor in other operating environments (e.g., Windows 95, Linux or Unix). The function has also been verified to work on R under Linux. } \seealso{ \code{\link{plot}},\code{\link{polygon}}, \code{\link{event.chart}}, \code{\link{par}} } \examples{ # Code to produce event history graphs for SIM paper # # before generating plots, some pre-processing needs to be performed, # in order to get dataset in proper form for event.history function; # need to create one line per subject and sort by time under observation, # with those experiencing event coming before those tied with censoring time; require('survival') data(heart) # creation of event.history version of heart dataset (call heart.one): heart.one <- matrix(nrow=length(unique(heart$id)), ncol=8) for(i in 1:length(unique(heart$id))) { if(length(heart$id[heart$id==i]) == 1) heart.one[i,] <- as.numeric(unlist(heart[heart$id==i, ])) else if(length(heart$id[heart$id==i]) == 2) heart.one[i,] <- as.numeric(unlist(heart[heart$id==i,][2,])) } heart.one[,3][heart.one[,3] == 0] <- 2 ## converting censored events to 2, from 0 if(is.factor(heart$transplant)) heart.one[,7] <- heart.one[,7] - 1 ## getting back to correct transplantation coding heart.one <- as.data.frame(heart.one[order(unlist(heart.one[,2]), unlist(heart.one[,3])),]) names(heart.one) <- names(heart) # back to usual censoring indicator: heart.one[,3][heart.one[,3] == 2] <- 0 # note: transplant says 0 (for no transplants) or 1 (for one transplant) # and event = 1 is death, while event = 0 is censored # plot single Kaplan-Meier curve from heart data, first creating survival object heart.surv <- survfit(Surv(stop, event) ~ 1, data=heart.one, conf.int = FALSE) # figure 3: traditional Kaplan-Meier curve # postscript('ehgfig3.ps', horiz=TRUE) # omi <- par(omi=c(0,1.25,0.5,1.25)) plot(heart.surv, ylab='estimated survival probability', xlab='observation time (in days)') title('Figure 3: Kaplan-Meier curve for Stanford data', cex=0.8) # dev.off() ## now, draw event history graph for Stanford heart data; use as Figure 4 # postscript('ehgfig4.ps', horiz=TRUE, colors = seq(0, 1, len=20)) # par(omi=c(0,1.25,0.5,1.25)) event.history(heart.one, survtime.col=heart.one[,2], surv.col=heart.one[,3], covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), num.colors=2, colors=c(6,10), x.lab = 'time under observation (in days)', title='Figure 4: Event history graph for\nStanford data', cens.mark.right =TRUE, cens.mark = '-', cens.mark.ahead = 30.0, cens.mark.cex = 0.85) # dev.off() # now, draw age-stratified event history graph for Stanford heart data; # use as Figure 5 # two plots, stratified by age status # postscript('c:\\temp\\ehgfig5.ps', horiz=TRUE, colors = seq(0, 1, len=20)) # par(omi=c(0,1.25,0.5,1.25)) par(mfrow=c(1,2)) event.history(data=heart.one, subset.rows = (heart.one[,4] < 0), survtime.col=heart.one[,2], surv.col=heart.one[,3], covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), num.colors=2, colors=c(6,10), x.lab = 'time under observation\n(in days)', title = 'Figure 5a:\nStanford data\n(age < 48)', cens.mark.right =TRUE, cens.mark = '-', cens.mark.ahead = 40.0, cens.mark.cex = 0.85, xlim=c(0,1900)) event.history(data=heart.one, subset.rows = (heart.one[,4] >= 0), survtime.col=heart.one[,2], surv.col=heart.one[,3], covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), num.colors=2, colors=c(6,10), x.lab = 'time under observation\n(in days)', title = 'Figure 5b:\nStanford data\n(age >= 48)', cens.mark.right =TRUE, cens.mark = '-', cens.mark.ahead = 40.0, cens.mark.cex = 0.85, xlim=c(0,1900)) # dev.off() # par(omi=omi) # we will not show liver cirrhosis data manipulation, as it was # a bit detailed; however, here is the # event.history code to produce Figure 7 / Plate 1 # Figure 7 / Plate 1 : prothrombin ehg with color \dontrun{ second.arg <- 1 ### second.arg is for shading third.arg <- c(rep(1,18),0,1) ### third.arg is for intensity # postscript('c:\\temp\\ehgfig7.ps', horiz=TRUE, # colors = cbind(seq(0, 1, len = 20), second.arg, third.arg)) # par(omi=c(0,1.25,0.5,1.25), col=19) event.history(cirrhos2.eh, subset.rows = NULL, survtime.col=cirrhos2.eh$time, surv.col=cirrhos2.eh$event, covtime.cols = as.matrix(cirrhos2.eh[, ((2:18)*2)]), cov.cols = as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]), cut.cov = as.numeric(quantile(as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]), c(0,.2,.4,.6,.8,1), na.rm=TRUE) + c(-1,0,0,0,0,1)), colors=c(20,4,8,11,14), x.lab = 'time under observation (in days)', title='Figure 7: Event history graph for liver cirrhosis data (color)', cens.mark.right =TRUE, cens.mark = '-', cens.mark.ahead = 100.0, cens.mark.cex = 0.85) # dev.off() } } \keyword{survival} Hmisc/man/impute.Rd0000644000176200001440000000651013714234051013664 0ustar liggesusers\name{impute} \alias{impute} \alias{impute.default} \alias{print.impute} \alias{summary.impute} \alias{[.impute} \alias{is.imputed} \title{ Generic Functions and Methods for Imputation } \description{ These functions do simple and \code{transcan} imputation and print, summarize, and subscript variables that have NAs filled-in with imputed values. The simple imputation method involves filling in NAs with constants, with a specified single-valued function of the non-NAs, or from a sample (with replacement) from the non-NA values (this is useful in multiple imputation). More complex imputations can be done with the \code{transcan} function, which also works with the generic methods shown here, i.e., \code{impute} can take a \code{transcan} object and use the imputed values created by \code{transcan} (with \code{imputed=TRUE}) to fill-in NAs. The \code{print} method places * after variable values that were imputed. The \code{summary} method summarizes all imputed values and then uses the next \code{summary} method available for the variable. The subscript method preserves attributes of the variable and subsets the list of imputed values corresponding with how the variable was subsetted. The \code{is.imputed} function is for checking if observations are imputed. } \usage{ impute(x, ...) \method{impute}{default}(x, fun=median, ...) \method{print}{impute}(x, ...) \method{summary}{impute}(object, ...) is.imputed(x) } \arguments{ \item{x}{ a vector or an object created by \code{transcan}, or a vector needing basic unconditional imputation. If there are no \code{NA}s and \code{x} is a vector, it is returned unchanged. } \item{fun}{ the name of a function to use in computing the (single) imputed value from the non-NAs. The default is \code{median}. If instead of specifying a function as \code{fun}, a single value or vector (numeric, or character if \code{object} is a factor) is specified, those values are used for insertion. \code{fun} can also be the character string \code{"random"} to draw random values for imputation, with the random values not forced to be the same if there are multiple NAs. For a vector of constants, the vector must be of length one (indicating the same value replaces all NAs) or must be as long as the number of NAs, in which case the values correspond to consecutive NAs to replace. For a factor \code{object}, constants for imputation may include character values not in the current levels of \code{object}. In that case new levels are added. If \code{object} is of class \code{"factor"}, \code{fun} is ignored and the most frequent category is used for imputation. } \item{object}{an object of class \code{"impute"}} \item{...}{ignored} } \value{ a vector with class \code{"impute"} placed in front of existing classes. For \code{is.imputed}, a vector of logical values is returned (all \code{TRUE} if \code{object} is not of class \code{impute}). } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{transcan}}, \code{\link{impute.transcan}}, \code{\link{describe}}, \code{\link{na.include}}, \code{\link{sample}} } \examples{ age <- c(1,2,NA,4) age.i <- impute(age) # Could have used impute(age,2.5), impute(age,mean), impute(age,"random") age.i summary(age.i) is.imputed(age.i) } \keyword{methods} \keyword{math} \keyword{htest} \keyword{models} Hmisc/man/reShape.Rd0000644000176200001440000002151113714234045013751 0ustar liggesusers\name{reShape} \alias{reShape} \title{Reshape Matrices and Serial Data} \description{ If the first argument is a matrix, \code{reShape} strings out its values and creates row and column vectors specifying the row and column each element came from. This is useful for sending matrices to Trellis functions, for analyzing or plotting results of \code{table} or \code{crosstabs}, or for reformatting serial data stored in a matrix (with rows representing multiple time points) into vectors. The number of observations in the new variables will be the product of the number of rows and number of columns in the input matrix. If the first argument is a vector, the \code{id} and \code{colvar} variables are used to restructure it into a matrix, with \code{NA}s for elements that corresponded to combinations of \code{id} and \code{colvar} values that did not exist in the data. When more than one vector is given, multiple matrices are created. This is useful for restructuring irregular serial data into regular matrices. It is also useful for converting data produced by \code{expand.grid} into a matrix (see the last example). The number of rows of the new matrices equals the number of unique values of \code{id}, and the number of columns equals the number of unique values of \code{colvar}. When the first argument is a vector and the \code{id} is a data frame (even with only one variable), \code{reShape} will produce a data frame, and the unique groups are identified by combinations of the values of all variables in \code{id}. If a data frame \code{constant} is specified, the variables in this data frame are assumed to be constant within combinations of \code{id} variables (if not, an arbitrary observation in \code{constant} will be selected for each group). A row of \code{constant} corresponding to the target \code{id} combination is then carried along when creating the data frame result. A different behavior of \code{reShape} is achieved when \code{base} and \code{reps} are specified. In that case \code{x} must be a list or data frame, and those data are assumed to contain one or more non-repeating measurements (e.g., baseline measurements) and one or more repeated measurements represented by variables named by pasting together the character strings in the vector \code{base} with the integers 1, 2, \dots, \code{reps}. The input data are rearranged by repeating each value of the baseline variables \code{reps} times and by transposing each observation's values of one of the set of repeated measurements as \code{reps} observations under the variable whose name does not have an integer pasted to the end. if \code{x} has a \code{row.names} attribute, those observation identifiers are each repeated \code{reps} times in the output object. See the last example. } \usage{ reShape(x, \dots, id, colvar, base, reps, times=1:reps, timevar='seqno', constant=NULL) } \arguments{ \item{x}{ a matrix or vector, or, when \code{base} is specified, a list or data frame } \item{\dots}{ other optional vectors, if \code{x} is a vector } \item{id}{ A numeric, character, category, or factor variable containing subject identifiers, or a data frame of such variables that in combination form groups of interest. Required if \code{x} is a vector, ignored otherwise. } \item{colvar}{ A numeric, character, category, or factor variable containing column identifiers. \code{colvar} is using a "time of data collection" variable. Required if \code{x} is a vector, ignored otherwise. } \item{base}{ vector of character strings containing base names of repeated measurements } \item{reps}{ number of times variables named in \code{base} are repeated. This must be a constant. } \item{times}{ when \code{base} is given, \code{times} is the vector of times to create if you do not want to use consecutive integers beginning with 1. } \item{timevar}{ specifies the name of the time variable to create if \code{times} is given, if you do not want to use \code{seqno} } \item{constant}{ a data frame with the same number of rows in \code{id} and \code{x}, containing auxiliary information to be merged into the resulting data frame. Logically, the rows of \code{constant} within each group should have the same value of all of its variables. } } \value{ If \code{x} is a matrix, returns a list containing the row variable, the column variable, and the \code{as.vector(x)} vector, named the same as the calling argument was called for \code{x}. If \code{x} is a vector and no other vectors were specified as \code{\dots}, the result is a matrix. If at least one vector was given to \code{\dots}, the result is a list containing \code{k} matrices, where \code{k} one plus the number of vectors in \code{\dots}. If \code{x} is a list or data frame, the same type of object is returned. If \code{x} is a vector and \code{id} is a data frame, a data frame will be the result. } \details{ In converting \code{dimnames} to vectors, the resulting variables are numeric if all elements of the matrix dimnames can be converted to numeric, otherwise the corresponding row or column variable remains character. When the \code{dimnames} if \code{x} have a \code{names} attribute, those two names become the new variable names. If \code{x} is a vector and another vector is also given (in \code{\dots}), the matrices in the resulting list are named the same as the input vector calling arguments. You can specify customized names for these on-the-fly by using e.g. \code{reShape(X=x, Y=y, id= , colvar= )}. The new names will then be \code{X} and \code{Y} instead of \code{x} and \code{y}. A new variable named \code{seqnno} is also added to the resulting object. \code{seqno} indicates the sequential repeated measurement number. When \code{base} and \code{times} are specified, this new variable is named the character value of \code{timevar} and the values are given by a table lookup into the vector \code{times}. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University School of Medicine\cr \email{fh@fharrell.com} } \seealso{ \code{\link[stats]{reshape}}, \code{\link[base:vector]{as.vector}}, \code{\link[base]{matrix}}, \code{\link[base]{dimnames}}, \code{\link[base]{outer}}, \code{\link[base]{table}} } \examples{ set.seed(1) Solder <- factor(sample(c('Thin','Thick'),200,TRUE),c('Thin','Thick')) Opening <- factor(sample(c('S','M','L'), 200,TRUE),c('S','M','L')) tab <- table(Opening, Solder) tab reShape(tab) # attach(tab) # do further processing # An example where a matrix is created from irregular vectors follow <- data.frame(id=c('a','a','b','b','b','d'), month=c(1, 2, 1, 2, 3, 2), cholesterol=c(225,226, 320,319,318, 270)) follow attach(follow) reShape(cholesterol, id=id, colvar=month) detach('follow') # Could have done : # reShape(cholesterol, triglyceride=trig, id=id, colvar=month) # Create a data frame, reshaping a long dataset in which groups are # formed not just by subject id but by combinations of subject id and # visit number. Also carry forward a variable that is supposed to be # constant within subject-visit number combinations. In this example, # it is not constant, so an arbitrary visit number will be selected. w <- data.frame(id=c('a','a','a','a','b','b','b','d','d','d'), visit=c( 1, 1, 2, 2, 1, 1, 2, 2, 2, 2), k=c('A','A','B','B','C','C','D','E','F','G'), var=c('x','y','x','y','x','y','y','x','y','z'), val=1:10) with(w, reShape(val, id=data.frame(id,visit), constant=data.frame(k), colvar=var)) # Get predictions from a regression model for 2 systematically # varying predictors. Convert the predictions into a matrix, with # rows corresponding to the predictor having the most values, and # columns corresponding to the other predictor # d <- expand.grid(x2=0:1, x1=1:100) # pred <- predict(fit, d) # reShape(pred, id=d$x1, colvar=d$x2) # makes 100 x 2 matrix # Reshape a wide data frame containing multiple variables representing # repeated measurements (3 repeats on 2 variables; 4 subjects) set.seed(33) n <- 4 w <- data.frame(age=rnorm(n, 40, 10), sex=sample(c('female','male'), n,TRUE), sbp1=rnorm(n, 120, 15), sbp2=rnorm(n, 120, 15), sbp3=rnorm(n, 120, 15), dbp1=rnorm(n, 80, 15), dbp2=rnorm(n, 80, 15), dbp3=rnorm(n, 80, 15), row.names=letters[1:n]) options(digits=3) w u <- reShape(w, base=c('sbp','dbp'), reps=3) u reShape(w, base=c('sbp','dbp'), reps=3, timevar='week', times=c(0,3,12)) } \keyword{manip} \keyword{array} \concept{trellis} \concept{lattice} \concept{repeated measures} \concept{longitudinal data} Hmisc/man/consolidate.Rd0000644000176200001440000000251512243661443014673 0ustar liggesusers\name{consolidate} \alias{consolidate} \alias{consolidate<-} \alias{consolidate.default} \title{ Element Merging } \description{ Merges an object by the names of its elements. Inserting elements in \code{value} into \code{x} that do not exists in \code{x} and replacing elements in \code{x} that exists in \code{value} with \code{value} elements if \code{protect} is false. } \usage{ consolidate(x, value, protect, \dots) \method{consolidate}{default}(x, value, protect=FALSE, \dots) consolidate(x, protect, \dots) <- value } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ named list or vector } \item{value}{ named list or vector } \item{protect}{ logical; should elements in \code{x} be kept instead of elements in \code{value}? } \item{\dots}{ currently does nothing; included if ever want to make generic. } } \author{ Charles Dupont } \seealso{ \code{\link{names}} } \examples{ x <- 1:5 names(x) <- LETTERS[x] y <- 6:10 names(y) <- LETTERS[y-2] x # c(A=1,B=2,C=3,D=4,E=5) y # c(D=6,E=7,F=8,G=9,H=10) consolidate(x, y) # c(A=1,B=2,C=3,D=6,E=7,F=8,G=9,H=10) consolidate(x, y, protect=TRUE) # c(A=1,B=2,C=3,D=4,E=5,F=8,G=9,H=10) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ utilities } Hmisc/man/panel.bpplot.Rd0000644000176200001440000003040313714234046014761 0ustar liggesusers\name{panel.bpplot} \alias{panel.bpplot} \alias{bpplotM} \alias{bpplt} \alias{bppltp} \title{ Box-Percentile Panel Function for Trellis } \description{ For all their good points, box plots have a high ink/information ratio in that they mainly display 3 quartiles. Many practitioners have found that the "outer values" are difficult to explain to non-statisticians and many feel that the notion of "outliers" is too dependent on (false) expectations that data distributions should be Gaussian. \code{panel.bpplot} is a \code{panel} function for use with \code{trellis}, especially for \code{bwplot}. It draws box plots (without the whiskers) with any number of user-specified "corners" (corresponding to different quantiles), but it also draws box-percentile plots similar to those drawn by Jeffrey Banfield's (\email{umsfjban@bill.oscs.montana.edu}) \code{bpplot} function. To quote from Banfield, "box-percentile plots supply more information about the univariate distributions. At any height the width of the irregular 'box' is proportional to the percentile of that height, up to the 50th percentile, and above the 50th percentile the width is proportional to 100 minus the percentile. Thus, the width at any given height is proportional to the percent of observations that are more extreme in that direction. As in boxplots, the median, 25th and 75th percentiles are marked with line segments across the box." \code{panel.bpplot} can also be used with base graphics to add extended box plots to an existing plot, by specifying \code{nogrid=TRUE, height=...}. \code{panel.bpplot} is a generalization of \code{bpplot} and \code{\link[lattice]{panel.bwplot}} in that it works with \code{trellis} (making the plots horizontal so that category labels are more visable), it allows the user to specify the quantiles to connect and those for which to draw reference lines, and it displays means (by default using dots). \code{bpplt} draws horizontal box-percentile plot much like those drawn by \code{panel.bpplot} but taking as the starting point a matrix containing quantiles summarizing the data. \code{bpplt} is primarily intended to be used internally by \code{plot.summary.formula.reverse} or \code{plot.summaryM} but when used with no arguments has a general purpose: to draw an annotated example box-percentile plot with the default quantiles used and with the mean drawn with a solid dot. This schematic plot is rendered nicely in postscript with an image height of 3.5 inches. \code{bppltp} is like \code{bpplt} but for \code{plotly} graphics, and it does not draw an annotated extended box plot example. \code{bpplotM} uses the \code{lattice} \code{bwplot} function to depict multiple numeric continuous variables with varying scales in a single \code{lattice} graph, after reshaping the dataset into a tall and thin format. } \usage{ panel.bpplot(x, y, box.ratio=1, means=TRUE, qref=c(.5,.25,.75), probs=c(.05,.125,.25,.375), nout=0, nloc=c('right lower', 'right', 'left', 'none'), cex.n=.7, datadensity=FALSE, scat1d.opts=NULL, violin=FALSE, violin.opts=NULL, font=box.dot$font, pch=box.dot$pch, cex.means =box.dot$cex, col=box.dot$col, nogrid=NULL, height=NULL, \dots) # E.g. bwplot(formula, panel=panel.bpplot, panel.bpplot.parameters) bpplt(stats, xlim, xlab='', box.ratio = 1, means=TRUE, qref=c(.5,.25,.75), qomit=c(.025,.975), pch=16, cex.labels=par('cex'), cex.points=if(prototype)1 else 0.5, grid=FALSE) bppltp(p=plotly::plot_ly(), stats, xlim, xlab='', box.ratio = 1, means=TRUE, qref=c(.5,.25,.75), qomit=c(.025,.975), teststat=NULL, showlegend=TRUE) bpplotM(formula=NULL, groups=NULL, data=NULL, subset=NULL, na.action=NULL, qlim=0.01, xlim=NULL, nloc=c('right lower','right','left','none'), vnames=c('labels', 'names'), cex.n=.7, cex.strip=1, outerlabels=TRUE, \dots) } \arguments{ \item{x}{ continuous variable whose distribution is to be examined } \item{y}{ grouping variable } \item{box.ratio}{ see \code{\link[lattice]{panel.bwplot}} } \item{means}{ set to \code{FALSE} to suppress drawing a character at the mean value } \item{qref}{ vector of quantiles for which to draw reference lines. These do not need to be included in \code{probs}. } \item{probs}{ vector of quantiles to display in the box plot. These should all be less than 0.5; the mirror-image quantiles are added automatically. By default, \code{probs} is set to \code{c(.05,.125,.25,.375)} so that intervals contain 0.9, 0.75, 0.5, and 0.25 of the data. To draw all 99 percentiles, i.e., to draw a box-percentile plot, set \code{probs=seq(.01,.49,by=.01)}. To make a more traditional box plot, use \code{probs=.25}. } \item{nout}{ tells the function to use \code{scat1d} to draw tick marks showing the \code{nout} smallest and \code{nout} largest values if \code{nout >= 1}, or to show all values less than the \code{nout} quantile or greater than the \code{1-nout} quantile if \code{0 < nout <= 0.5}. If \code{nout} is a whole number, only the first \code{n/2} observations are shown on either side of the median, where \code{n} is the total number of observations. } \item{nloc}{location to plot number of non-\code{NA} observations next to each box. Specify \code{nloc='none'} to suppress. For \code{panel.bpplot}, the default \code{nloc} is \code{'none'} if \code{nogrid=TRUE}.} \item{cex.n}{character size for \code{nloc}} \item{datadensity}{ set to \code{TRUE} to invoke \code{scat1d} to draw a data density (one-dimensional scatter diagram or rug plot) inside each box plot. } \item{scat1d.opts}{ a list containing named arguments (without abbreviations) to pass to \code{scat1d} when \code{datadensity=TRUE} or \code{nout > 0} } \item{violin}{set to \code{TRUE} to invoke \code{panel.violin} in addition to drawing box-percentile plots} \item{violin.opts}{a list of options to pass to \code{panel.violin}} \item{cex.means}{character size for dots representing means} \item{font,pch,col}{see \code{\link[lattice]{panel.bwplot}}} \item{nogrid}{set to \code{TRUE} to use in base graphics} \item{height}{if \code{nogrid=TRUE}, specifies the height of the box in user \code{y} units} \item{\dots}{arguments passed to \code{points} or \code{panel.bpplot} or \code{bwplot}} \item{stats,xlim,xlab,qomit,cex.labels,cex.points,grid}{ undocumented arguments to \code{bpplt}. For \code{bpplotM}, \code{xlim} is a list with elements named as the \code{x}-axis variables, to override the \code{qlim} calculations with user-specified \code{x}-axis limits for selected variables. Example: \code{xlim=list(age=c(20,60))}. } \item{p}{an already-started \code{plotly} object} \item{teststat}{an html expression containing a test statistic} \item{showlegend}{set to \code{TRUE} to have \code{plotly} include a legend. Not recommended when plotting more than one variable.} \item{formula}{a formula with continuous numeric analysis variables on the left hand side and stratification variables on the right. The first variable on the right is the one that will vary the fastest, forming the \code{y}-axis. \code{formula} may be omitted, in which case all numeric variables with more than 5 unique values in \code{data} will be analyzed. Or \code{formula} may be a vector of variable names in \code{data} to analyze. In the latter two cases (and only those cases), \code{groups} must be given, representing a character vector with names of stratification variables.} \item{groups}{see above} \item{data}{an optional data frame} \item{subset}{an optional subsetting expression or logical vector} \item{na.action}{specifies a function to possibly subset the data according to \code{NA}s (default is no such subsetting).} \item{qlim}{the outer quantiles to use for scaling each panel in \code{bpplotM}} \item{vnames}{default is to use variable \code{label} attributes when they exist, or use variable names otherwise. Specify \code{vnames='names'} to always use variable names for panel labels in \code{bpplotM}} \item{cex.strip}{character size for panel strip labels} \item{outerlabels}{if \code{TRUE}, pass the \code{lattice} graphics through the \code{latticeExtra} package's \code{useOuterStrips} function if there are two conditioning (paneling) variables, to put panel labels in outer margins.} } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \references{ Esty WW, Banfield J: The box-percentile plot. J Statistical Software 8 No. 17, 2003. } \seealso{ \code{\link{bpplot}}, \code{\link[lattice]{panel.bwplot}}, \code{\link{scat1d}}, \code{\link{quantile}}, \code{\link{Ecdf}}, \code{\link{summaryP}}, \code{\link[latticeExtra]{useOuterStrips}} } \examples{ set.seed(13) x <- rnorm(1000) g <- sample(1:6, 1000, replace=TRUE) x[g==1][1:20] <- rnorm(20)+3 # contaminate 20 x's for group 1 # default trellis box plot require(lattice) bwplot(g ~ x) # box-percentile plot with data density (rug plot) bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=TRUE) # add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size # when a group has > 125 observations # small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles bwplot(g ~ x, panel=panel.bpplot, cex.means=.3) # suppress means and reference lines for lower and upper quartiles bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=FALSE, qref=FALSE) # continuous plot up until quartiles ("Tootsie Roll plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01)) # start at quartiles then make it continuous ("coffin plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01)) # same as previous but add a spike to give 0.95 interval bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01))) # decile plot with reference lines at outer quintiles and median bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8)) # default plot with tick marks showing all observations outside the outer # box (.05 and .95 quantiles), with very small ticks bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01)) # show 5 smallest and 5 largest observations bwplot(g ~ x, panel=panel.bpplot, nout=5) # Use a scat1d option (preserve=TRUE) to ensure that the right peak extends # to the same position as the extreme scat1d bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001), datadensity=TRUE, scat1d.opt=list(preserve=TRUE)) # Add an extended box plot to an existing base graphics plot plot(x, 1:length(x)) panel.bpplot(x, 1070, nogrid=TRUE, pch=19, height=15, cex.means=.5) # Draw a prototype showing how to interpret the plots bpplt() # Example for bpplotM set.seed(1) n <- 800 d <- data.frame(treatment=sample(c('a','b'), n, TRUE), sex=sample(c('female','male'), n, TRUE), age=rnorm(n, 40, 10), bp =rnorm(n, 120, 12), wt =rnorm(n, 190, 30)) label(d$bp) <- 'Systolic Blood Pressure' units(d$bp) <- 'mmHg' bpplotM(age + bp + wt ~ treatment, data=d) bpplotM(age + bp + wt ~ treatment * sex, data=d, cex.strip=.8) bpplotM(age + bp + wt ~ treatment*sex, data=d, violin=TRUE, violin.opts=list(col=adjustcolor('blue', alpha.f=.15), border=FALSE)) bpplotM(c('age', 'bp', 'wt'), groups='treatment', data=d) # Can use Hmisc Cs function, e.g. Cs(age, bp, wt) bpplotM(age + bp + wt ~ treatment, data=d, nloc='left') # Without treatment: bpplotM(age + bp + wt ~ 1, data=d) \dontrun{ # Automatically find all variables that appear to be continuous getHdata(support) bpplotM(data=support, group='dzgroup', cex.strip=.4, cex.means=.3, cex.n=.45) # Separate displays for categorical vs. continuous baseline variables getHdata(pbc) pbc <- upData(pbc, moveUnits=TRUE) s <- summaryM(stage + sex + spiders ~ drug, data=pbc) plot(s) Key(0, .5) s <- summaryP(stage + sex + spiders ~ drug, data=pbc) plot(s, val ~ freq | var, groups='drug', pch=1:3, col=1:3, key=list(x=.6, y=.8)) bpplotM(bili + albumin + protime + age ~ drug, data=pbc) } } \keyword{nonparametric} \keyword{hplot} \keyword{distribution} \concept{trellis} \concept{lattice} Hmisc/man/plsmo.Rd0000644000176200001440000002507313076431722013525 0ustar liggesusers\name{plsmo} \alias{plsmo} \alias{panel.plsmo} \title{ Plot smoothed estimates } \description{ Plot smoothed estimates of x vs. y, handling missing data for lowess or supsmu, and adding axis labels. Optionally suppresses plotting extrapolated estimates. An optional \code{group} variable can be specified to compute and plot the smooth curves by levels of \code{group}. When \code{group} is present, the \code{datadensity} option will draw tick marks showing the location of the raw \code{x}-values, separately for each curve. \code{plsmo} has an option to plot connected points for raw data, with no smoothing. The non-panel version of \code{plsmo} allows \code{y} to be a matrix, for which smoothing is done separately over its columns. If both \code{group} and multi-column \code{y} are used, the number of curves plotted is the product of the number of groups and the number of \code{y} columns. \code{method='intervals'} is often used when y is binary, as it may be tricky to specify a reasonable smoothing parameter to \code{lowess} or \code{supsmu} in this case. The \code{'intervals'} method uses the \code{cut2} function to form intervals of x containing a target of \code{mobs} observations. For each interval the \code{ifun} function summarizes y, with the default being the mean (proportions for binary y). The results are plotted as step functions, with vertical discontinuities drawn with a saturation of 0.15 of the original color. A plus sign is drawn at the mean x within each interval. For this approach, the default x-range is the entire raw data range, and \code{trim} and \code{evaluate} are ignored. For \code{panel.plsmo} it is best to specify \code{type='l'} when using \code{'intervals'}. \code{panel.plsmo} is a \code{panel} function for \code{trellis} for the \code{xyplot} function that uses \code{plsmo} and its options to draw one or more nonparametric function estimates on each panel. This has advantages over using \code{xyplot} with \code{panel.xyplot} and \code{panel.loess}: (1) by default it will invoke \code{labcurve} to label the curves where they are most separated, (2) the \code{datadensity} option will put rug plots on each curve (instead of a single rug plot at the bottom of the graph), and (3) when \code{panel.plsmo} invokes \code{plsmo} it can use the "super smoother" (\code{supsmu} function) instead of \code{lowess}, or pass \code{method='intervals'}. \code{panel.plsmo} senses when a \code{group} variable is specified to \code{xyplot} so that it can invoke \code{\link[lattice]{panel.superpose}} instead of \code{panel.xyplot}. Using \code{panel.plsmo} through \code{trellis} has some advantages over calling \code{plsmo} directly in that conditioning variables are allowed and \code{trellis} uses nicer fonts etc. When a \code{group} variable was used, \code{panel.plsmo} creates a function \code{Key} in the session frame that the user can invoke to draw a key for individual data point symbols used for the \code{group}s. By default, the key is positioned at the upper right corner of the graph. If \code{Key(locator(1))} is specified, the key will appear so that its upper left corner is at the coordinates of the mouse click. For \code{ggplot2} graphics the counterparts are \code{\link{stat_plsmo}} and \code{\link{histSpikeg}}. } \usage{ plsmo(x, y, method=c("lowess","supsmu","raw","intervals"), xlab, ylab, add=FALSE, lty=1 : lc, col=par("col"), lwd=par("lwd"), iter=if(length(unique(y))>2) 3 else 0, bass=0, f=2/3, mobs=30, trim, fun, ifun=mean, group, prefix, xlim, ylim, label.curves=TRUE, datadensity=FALSE, scat1d.opts=NULL, lines.=TRUE, subset=TRUE, grid=FALSE, evaluate=NULL, \dots) #To use panel function: #xyplot(formula=y ~ x | conditioningvars, groups, # panel=panel.plsmo, type='b', # label.curves=TRUE, # lwd = superpose.line$lwd, # lty = superpose.line$lty, # pch = superpose.symbol$pch, # cex = superpose.symbol$cex, # font = superpose.symbol$font, # col = NULL, scat1d.opts=NULL, \dots) } \arguments{ \item{x}{ vector of x-values, NAs allowed } \item{y}{ vector or matrix of y-values, NAs allowed } \item{method}{ \code{"lowess"} (the default), \code{"supsmu"}, \code{"raw"} to not smooth at all, or \code{"intervals"} to use intervals (see above) } \item{xlab}{ x-axis label iff add=F. Defaults of label(x) or argument name. } \item{ylab}{ y-axis label, like xlab. } \item{add}{ Set to T to call lines instead of plot. Assumes axes already labeled. } \item{lty}{ line type, default=1,2,3,\dots, corresponding to columns of \code{y} and \code{group} combinations } \item{col}{ color for each curve, corresponding to \code{group}. Default is current \code{par("col")}. } \item{lwd}{ vector of line widths for the curves, corresponding to \code{group}. Default is current \code{par("lwd")}. \code{lwd} can also be specified as an element of \code{label.curves} if \code{label.curves} is a list. } \item{iter}{ iter parameter if \code{method="lowess"}, default=0 if \code{y} is binary, and 3 otherwise. } \item{bass}{ bass parameter if \code{method="supsmu"}, default=0. } \item{f}{passed to the \code{lowess} function, for \code{method="lowess"}} \item{mobs}{for \code{method='intervals'}, the target number of observations per interval} \item{trim}{ only plots smoothed estimates between trim and 1-trim quantiles of x. Default is to use 10th smallest to 10th largest x in the group if the number of observations in the group exceeds 200 (0 otherwise). Specify trim=0 to plot over entire range. } \item{fun}{ after computing the smoothed estimates, if \code{fun} is given the y-values are transformed by \code{fun()} } \item{ifun}{a summary statistic function to apply to the \code{y}-variable for \code{method='intervals'}. Default is \code{mean}.} \item{group}{ a variable, either a \code{factor} vector or one that will be converted to \code{factor} by \code{plsmo}, that is used to stratify the data so that separate smooths may be computed } \item{prefix}{ a character string to appear in group of group labels. The presence of \code{prefix} ensures that \code{labcurve} will be called even when \code{add=TRUE}. } \item{xlim}{ a vector of 2 x-axis limits. Default is observed range. } \item{ylim}{ a vector of 2 y-axis limits. Default is observed range. } \item{label.curves}{ set to \code{FALSE} to prevent \code{labcurve} from being called to label multiple curves corresponding to \code{group}s. Set to a list to pass options to \code{labcurve}. \code{lty} and \code{col} are passed to \code{labcurve} automatically. } \item{datadensity}{ set to \code{TRUE} to draw tick marks on each curve, using x-coordinates of the raw data \code{x} values. This is done using \code{scat1d}. } \item{scat1d.opts}{a list of options to hand to \code{scat1d}} \item{lines.}{ set to \code{FALSE} to suppress smoothed curves from being drawn. This can make sense if \code{datadensity=TRUE}. } \item{subset}{ a logical or integer vector specifying a subset to use for processing, with respect too all variables being analyzed } \item{grid}{ set to \code{TRUE} if the \R \code{grid} package drew the current plot} \item{evaluate}{ number of points to keep from smoother. If specified, an equally-spaced grid of \code{evaluate} \code{x} values will be obtained from the smoother using linear interpolation. This will keep from plotting an enormous number of points if the dataset contains a very large number of unique \code{x} values.} \item{\dots}{ optional arguments that are passed to \code{scat1d}, or optional parameters to pass to \code{plsmo} from \code{panel.plsmo}. See optional arguments for \code{plsmo} above. } \item{type}{ set to \code{p} to have \code{panel.plsmo} plot points (and not call \code{plsmo}), \code{l} to call \code{plsmo} and not plot points, or use the default \code{b} to plot both. } \item{pch,cex,font}{ vectors of graphical parameters corresponding to the \code{group}s (scalars if \code{group} is absent). By default, the parameters set up by \code{trellis} will be used. } } \value{ \code{plsmo} returns a list of curves (x and y coordinates) that was passed to \code{labcurve} } \section{Side Effects}{ plots, and \code{panel.plsmo} creates the \code{Key} function in the session frame. } \seealso{ \code{\link{lowess}}, \code{\link{supsmu}}, \code{\link{label}}, \code{\link{quantile}}, \code{\link{labcurve}}, \code{\link{scat1d}}, \code{\link[lattice]{xyplot}}, \code{\link[lattice]{panel.superpose}}, \code{\link[lattice]{panel.xyplot}}, \code{\link{stat_plsmo}}, \code{\link{histSpikeg}} } \examples{ set.seed(1) x <- 1:100 y <- x + runif(100, -10, 10) plsmo(x, y, "supsmu", xlab="Time of Entry") #Use label(y) or "y" for ylab plsmo(x, y, add=TRUE, lty=2) #Add lowess smooth to existing plot, with different line type age <- rnorm(500, 50, 15) survival.time <- rexp(500) sex <- sample(c('female','male'), 500, TRUE) race <- sample(c('black','non-black'), 500, TRUE) plsmo(age, survival.time < 1, fun=qlogis, group=sex) # plot logit by sex #Bivariate Y sbp <- 120 + (age - 50)/10 + rnorm(500, 0, 8) + 5 * (sex == 'male') dbp <- 80 + (age - 50)/10 + rnorm(500, 0, 8) - 5 * (sex == 'male') Y <- cbind(sbp, dbp) plsmo(age, Y) plsmo(age, Y, group=sex) #Plot points and smooth trend line using trellis # (add type='l' to suppress points or type='p' to suppress trend lines) require(lattice) xyplot(survival.time ~ age, panel=panel.plsmo) #Do this for multiple panels xyplot(survival.time ~ age | sex, panel=panel.plsmo) #Repeat this using equal sample size intervals (n=25 each) summarized by #the median, then a proportion (mean of binary y) xyplot(survival.time ~ age | sex, panel=panel.plsmo, type='l', method='intervals', mobs=25, ifun=median) ybinary <- ifelse(runif(length(sex)) < 0.5, 1, 0) xyplot(ybinary ~ age, groups=sex, panel=panel.plsmo, type='l', method='intervals', mobs=75, ifun=mean, xlim=c(0, 120)) #Do this for subgroups of points on each panel, show the data #density on each curve, and draw a key at the default location xyplot(survival.time ~ age | sex, groups=race, panel=panel.plsmo, datadensity=TRUE) Key() #Use wloess.noiter to do a fast weighted smooth plot(x, y) lines(wtd.loess.noiter(x, y)) lines(wtd.loess.noiter(x, y, weights=c(rep(1,50), 100, rep(1,49))), col=2) points(51, y[51], pch=18) # show overly weighted point #Try to duplicate this smooth by replicating 51st observation 100 times lines(wtd.loess.noiter(c(x,rep(x[51],99)),c(y,rep(y[51],99)), type='ordered all'), col=3) #Note: These two don't agree exactly } \keyword{smooth} \keyword{nonparametric} \keyword{hplot} \concept{trellis} \concept{lattice} Hmisc/man/latex.Rd0000644000176200001440000007076614300717750013520 0ustar liggesusers\encoding{latin1} \name{latex} \alias{latex} \alias{latex.default} \alias{latex.function} \alias{latex.list} \alias{latexTranslate} \alias{htmlTranslate} \alias{latexSN} \alias{htmlSN} \alias{latexVerbatim} \alias{dvi} \alias{print.dvi} \alias{dvi.latex} \alias{dvips} \alias{dvips.latex} \alias{dvips.dvi} \alias{dvigv} \alias{dvigv.latex} \alias{dvigv.dvi} \alias{print.latex} \alias{show.latex} \alias{show.dvi} \title{ Convert an S object to LaTeX, and Related Utilities } \description{ \code{latex} converts its argument to a \file{.tex} file appropriate for inclusion in a LaTeX2e document. \code{latex} is a generic function that calls one of \code{latex.default}, \code{latex.function}, \code{latex.list}. \code{latex.default} does appropriate rounding and decimal alignment and produces a file containing a LaTeX tabular environment to print the matrix or data.frame \code{x} as a table. \code{latex.function} prepares an S function for printing by issuing \code{sed} commands that are similar to those in the \code{S.to.latex} procedure in the \code{s.to.latex} package (Chambers and Hastie, 1993). \code{latex.function} can also produce \code{verbatim} output or output that works with the \code{Sweavel} LaTeX style at \url{https://biostat.app.vumc.org/wiki/Main/SweaveTemplate}. \code{latex.list} calls \code{latex} recursively for each element in the argument. \code{latexTranslate} translates particular items in character strings to LaTeX format, e.g., makes \samp{a^2 = a\$^2\$} for superscript within variable labels. LaTeX names of greek letters (e.g., \code{"alpha"}) will have backslashes added if \code{greek==TRUE}. Math mode is inserted as needed. \code{latexTranslate} assumes that input text always has matches, e.g. \code{[) [] (] ()}, and that surrounding by \samp{\$\$} is OK. \code{htmlTranslate} is similar to \code{latexTranslate} but for html translation. It doesn't need math mode and assumes dollar signs are just that. \code{latexSN} converts a vector floating point numbers to character strings using LaTeX exponents. Dollar signs to enter math mode are not added. Similarly, \code{htmlSN} converts to scientific notation in html. \code{latexVerbatim} on an object executes the object's \code{print} method, capturing the output for a file inside a LaTeX verbatim environment. \code{dvi} uses the system \code{latex} command to compile LaTeX code produced by \code{latex}, including any needed styles. \code{dvi} will put a \samp{\\documentclass\{report\}} and \samp{\\end\{document\}} wrapper around a file produced by \code{latex}. By default, the \samp{geometry} LaTeX package is used to omit all margins and to set the paper size to a default of 5.5in wide by 7in tall. The result of \code{dvi} is a .dvi file. To both format and screen display a non-default size, use for example \code{print(dvi(latex(x), width=3, height=4),width=3,height=4)}. Note that you can use something like \samp{xdvi -geometry 460x650 -margins 2.25in file} without changing LaTeX defaults to emulate this. \code{dvips} will use the system \code{dvips} command to print the .dvi file to the default system printer, or create a postscript file if \code{file} is specified. \code{dvigv} uses the system \code{dvips} command to convert the input object to a .dvi file, and uses the system \code{dvips} command to convert it to postscript. Then the postscript file is displayed using Ghostview (assumed to be the system command \command{gv}). There are \code{show} methods for displaying typeset LaTeX on the screen using the system \command{xdvi} command. If you \code{show} a LaTeX file created by \code{latex} without running it through \code{dvi} using \code{show.dvi(object)}, the \code{show} method will run it through \code{dvi} automatically. These \code{show} methods are not S Version 4 methods so you have to use full names such as \code{show.dvi} and \code{show.latex}. Use the \code{print} methods for more automatic display of typesetting, e.g. typing \code{latex(x)} will invoke xdvi to view the typeset document. } \usage{ latex(object, \dots) \method{latex}{default}(object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, label=title, rowlabel=title, rowlabel.just="l", cgroup=NULL, n.cgroup=NULL, rgroup=NULL, n.rgroup=NULL, cgroupTexCmd="bfseries", rgroupTexCmd="bfseries", rownamesTexCmd=NULL, colnamesTexCmd=NULL, cellTexCmds=NULL, rowname, cgroup.just=rep("c",length(n.cgroup)), colheads=NULL, extracolheads=NULL, extracolsize='scriptsize', dcolumn=FALSE, numeric.dollar=!dcolumn, cdot=FALSE, longtable=FALSE, draft.longtable=TRUE, ctable=FALSE, booktabs=FALSE, table.env=TRUE, here=FALSE, lines.page=40, caption=NULL, caption.lot=NULL, caption.loc=c('top','bottom'), star=FALSE, double.slash=FALSE, vbar=FALSE, collabel.just=rep("c",nc), na.blank=TRUE, insert.bottom=NULL, insert.bottom.width=NULL, insert.top=NULL, first.hline.double=!(booktabs | ctable), where='!tbp', size=NULL, center=c('center','centering','centerline','none'), landscape=FALSE, multicol=TRUE, math.row.names=FALSE, already.math.row.names=FALSE, math.col.names=FALSE, already.math.col.names=FALSE, hyperref=NULL, continued='continued', \dots) # x is a matrix or data.frame \method{latex}{function}( object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, assignment=TRUE, type=c('example','verbatim','Sinput'), width.cutoff=70, size='', \dots) \method{latex}{list}( object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, label, caption, caption.lot, caption.loc=c('top','bottom'), \dots) \method{print}{latex}(x, \dots) latexTranslate(object, inn=NULL, out=NULL, pb=FALSE, greek=FALSE, na='', \dots) htmlTranslate(object, inn=NULL, out=NULL, greek=FALSE, na='', code=htmlSpecialType(), \dots) latexSN(x) htmlSN(x, pretty=TRUE, \dots) latexVerbatim(x, title=first.word(deparse(substitute(x))), file=paste(title, ".tex", sep=""), append=FALSE, size=NULL, hspace=NULL, width=.Options$width, length=.Options$length, \dots) dvi(object, \dots) \method{dvi}{latex}(object, prlog=FALSE, nomargins=TRUE, width=5.5, height=7, \dots) \method{print}{dvi}(x, \dots) dvips(object, \dots) \method{dvips}{latex}(object, \dots) \method{dvips}{dvi}(object, file, \dots) \method{show}{latex}(object) # or show.dvi(object) or just object dvigv(object, \dots) \method{dvigv}{latex}(object, \dots) # or gvdvi(dvi(object)) \method{dvigv}{dvi}(object, \dots) } \arguments{ \item{object}{ For \code{latex}, any S object. For \code{dvi} or \code{dvigv}, an object created by \code{latex}. For \code{latexTranslate} is a vector of character strings to translate. Any \code{NA}s are set to blank strings before conversion. } \item{x}{ any object to be \code{print}ed verbatim for \code{latexVerbatim}. For \code{latexSN} or \code{htmlSN}, \code{x} is a numeric vector. } \item{title}{ name of file to create without the \samp{.tex} extension. If this option is not set, value/string of \code{x} (see above) is printed in the top left corner of the table. Set \code{title=''} to suppress this output. } \item{file}{ name of the file to create. The default file name is \file{x.tex} where \code{x} is the first word in the name of the argument for \code{x}. Set \code{file=""} to have the generated LaTeX code just printed to standard output. This is especially useful when running under Sweave in R using its \samp{results=tex} tag, to save having to manage many small external files. When \code{file=""}, \code{latex} keeps track of LaTeX styles that are called for by creating or modifying an object \code{latexStyles} (in \code{.GlobalTemp} in R or in frame 0 in S-Plus). \code{latexStyles} is a vector containing the base names of all the unique LaTeX styles called for so far in the current session. See the end of the examples section for a way to use this object to good effect. For \code{dvips}, \code{file} is the name of an output postscript file. } \item{append}{ defaults to \code{FALSE}. Set to \code{TRUE} to append output to an existing file. } \item{label}{ a text string representing a symbolic label for the table for referencing in the LaTeX \samp{\\label} and \samp{\\ref} commands. \code{label} is only used if \code{caption} is given. } \item{rowlabel}{ If \code{x} has row dimnames, \code{rowlabel} is a character string containing the column heading for the row dimnames. The default is the name of the argument for \code{x}. } \item{rowlabel.just}{ If \code{x} has row dimnames, specifies the justification for printing them. Possible values are \code{"l"}, \code{"r"}, \code{"c"}. The heading (\code{rowlabel}) itself is left justified if \code{rowlabel.just="l"}, otherwise it is centered. } \item{cgroup}{ a vector of character strings defining major column headings. The default is to have none. } \item{n.cgroup}{ a vector containing the number of columns for which each element in cgroup is a heading. For example, specify \code{cgroup=c("Major 1","Major 2")}, \code{n.cgroup=c(3,3)} if \code{"Major 1"} is to span columns 1-3 and \code{"Major 2"} is to span columns 4-6. \code{rowlabel} does not count in the column numbers. You can omit \code{n.cgroup} if all groups have the same number of columns. } \item{rgroup}{ a vector of character strings containing headings for row groups. \code{n.rgroup} must be present when \code{rgroup} is given. The first \code{n.rgroup[1]} rows are sectioned off and \code{rgroup[1]} is used as a bold heading for them. The usual row dimnames (which must be present if \code{rgroup} is) are indented. The next \code{n.rgroup[2]} rows are treated likewise, etc. } \item{n.rgroup}{ integer vector giving the number of rows in each grouping. If \code{rgroup} is not specified, \code{n.rgroup} is just used to divide off blocks of rows by horizontal lines. If \code{rgroup} is given but \code{n.rgroup} is omitted, \code{n.rgroup} will default so that each row group contains the same number of rows. } \item{cgroupTexCmd}{A character string specifying a LaTeX command to be used to format column group labels. The default, \code{"bfseries"}, sets the current font to \sQuote{bold}. It is possible to supply a vector of strings so that each column group label is formatted differently. Please note that the first item of the vector is used to format the title (even if a title is not used). Currently the user needs to handle these issue. Multiple effects can be achieved by creating custom LaTeX commands; for example, \code{"\providecommand{\redscshape}{\color{red}\scshape}"} creates a LaTeX command called \samp{\\redscshape} that formats the text in red small-caps. } \item{rgroupTexCmd}{A character string specifying a LaTeX command to be used to format row group labels. The default, \code{"bfseries"}, sets the current font to \sQuote{bold}. A vector of strings can be supplied to format each row group label differently. Normal recycling applies if the vector is shorter than \code{n.rgroups}. See also \code{cgroupTexCmd} above regarding multiple effects. } \item{rownamesTexCmd}{A character string specifying a LaTeX command to be used to format rownames. The default, \code{NULL}, applies no command. A vector of different commands can also be supplied. See also \code{cgroupTexCmd} above regarding multiple effects. } \item{colnamesTexCmd}{A character string specifying a LaTeX command to be used to format column labels. The default, \code{NULL}, applies no command. It is possible to supply a vector of strings to format each column label differently. If column groups are not used, the first item in the vector will be used to format the title. Please note that if column groups are used the first item of \code{cgroupTexCmd} and not \code{colnamesTexCmd} is used to format the title. The user needs to allow for these issues when supplying a vector of commands. See also \code{cgroupTexCmd} above regarding multiple effects. } \item{cellTexCmds}{A matrix of character strings which are LaTeX commands to be used to format each element, or cell, of the object. The matrix must have the same \code{NROW()} and \code{NCOL()} as the object. The default, NULL, applies no formats. Empty strings also apply no formats, and one way to start might be to create a matrix of empty strings with \code{matrix(rep("", NROW(x) * NCOL(x)), nrow=NROW(x))} and then selectively change appropriate elements of the matrix. Note that you might need to set \code{numeric.dollar=FALSE} (to disable math mode) for some effects to work. See also \code{cgroupTexCmd} above regarding multiple effects. } \item{na.blank}{ Set to \code{TRUE} to use blanks rather than \code{NA} for missing values. This usually looks better in \code{latex}. } \item{insert.bottom}{ an optional character string to typeset at the bottom of the table. For \code{"ctable"} style tables, this is placed in an unmarked footnote. } \item{insert.bottom.width}{ character string; a tex width controlling the width of the insert.bottom text. Currently only does something with using \code{longtable=TRUE}. } \item{insert.top}{a character string to insert as a heading right before beginning \code{tabular} environment. Useful for multiple sub-tables.} \item{first.hline.double}{ set to \code{FALSE} to use single horizontal rules for styles other than \code{"bookmark"} or \code{"ctable"} } \item{rowname}{ rownames for \code{tabular} environment. Default is rownames of matrix or data.frame. Specify \code{rowname=NULL} to suppress the use of row names. } \item{cgroup.just}{ justification for labels for column groups. Defaults to \code{"c"}. } \item{colheads}{a character vector of column headings if you don't want to use \code{dimnames(object)[[2]]}. Specify \code{colheads=FALSE} to suppress column headings.} \item{extracolheads}{ an optional vector of extra column headings that will appear under the main headings (e.g., sample sizes). This character vector does not need to include an empty space for any \code{rowname} in effect, as this will be added automatically. You can also form subheadings by splitting character strings defining the column headings using the usual backslash \code{n} newline character.} \item{extracolsize}{ size for \code{extracolheads} or for any second lines in column names; default is \code{"scriptsize"} } \item{dcolumn}{see \code{\link{format.df}}} \item{numeric.dollar}{ logical, default \code{!dcolumn}. Set to \code{TRUE} to place dollar signs around numeric values when \code{dcolumn=FALSE}. This assures that \code{latex} will use minus signs rather than hyphens to indicate negative numbers. Set to \code{FALSE} when \code{dcolumn=TRUE}, as \code{dcolumn.sty} automatically uses minus signs. } \item{math.row.names}{ logical, set true to place dollar signs around the row names. } \item{already.math.row.names}{set to \code{TRUE} to prevent any math mode changes to row names} \item{math.col.names}{ logical, set true to place dollar signs around the column names. } \item{already.math.col.names}{set to \code{TRUE} to prevent any math mode changes to column names} \item{hyperref}{if \code{table.env=TRUE} is a character string used to generate a LaTeX \code{hyperref} enclosure} \item{continued}{a character string used to indicate pages after the first when making a long table} \item{cdot}{see \code{\link{format.df}}} \item{longtable}{ Set to \code{TRUE} to use David Carlisle's LaTeX \code{longtable} style, allowing long tables to be split over multiple pages with headers repeated on each page. The \code{"style"} element is set to \code{"longtable"}. The \code{latex} \samp{\\usepackage} must reference \samp{[longtable]}. The file \file{longtable.sty} will need to be in a directory in your \env{TEXINPUTS} path. } \item{draft.longtable}{ I forgot what this does. } \item{ctable}{ set to \code{TRUE} to use Wybo Dekker's \samp{ctable} style from \acronym{CTAN}. Even though for historical reasons it is not the default, it is generally the preferred method. Thicker but not doubled \samp{\\hline}s are used to start a table when \code{ctable} is in effect. } \item{booktabs}{ set \code{booktabs=TRUE} to use the \samp{booktabs} style of horizontal rules for better tables. In this case, double \samp{\\hline}s are not used to start a table. } \item{table.env}{ Set \code{table.env=FALSE} to suppress enclosing the table in a LaTeX \samp{table} environment. \code{table.env} only applies when \code{longtable=FALSE}. You may not specify a \code{caption} if \code{table.env=FALSE}. } \item{here}{ Set to \code{TRUE} if you are using \code{table.env=TRUE} with \code{longtable=FALSE} and you have installed David Carlisle's \file{here.sty} LaTeX style. This will cause the LaTeX \samp{table} environment to be set up with option \samp{H} to guarantee that the table will appear exactly where you think it will in the text. The \code{"style"} element is set to \code{"here"}. The \code{latex} \samp{\\usepackage} must reference \samp{[here]}. The file \file{here.sty} will need to be in a directory in your \env{TEXINPUTS} path. \samp{here} is largely obsolete with LaTeX2e. } \item{lines.page}{ Applies if \code{longtable=TRUE}. No more than \code{lines.page} lines in the body of a table will be placed on a single page. Page breaks will only occur at \code{rgroup} boundaries. } \item{caption}{ a text string to use as a caption to print at the top of the first page of the table. Default is no caption. } \item{caption.lot}{ a text string representing a short caption to be used in the \dQuote{List of Tables}. By default, LaTeX will use \code{caption}. If you get inexplicable \samp{latex} errors, you may need to supply \code{caption.lot} to make the errors go away. } \item{caption.loc}{ set to \code{"bottom"} to position a caption below the table instead of the default of \code{"top"}. } \item{star}{ apply the star option for ctables to allow a table to spread over two columns when in twocolumn mode. } \item{double.slash}{ set to \code{TRUE} to output \samp{"\\"} as \samp{"\\\\"} in LaTeX commands. Useful when you are reading the output file back into an S vector for later output. } \item{vbar}{ logical. When \code{vbar==TRUE}, columns in the tabular environment are separated with vertical bar characters. When \code{vbar==FALSE}, columns are separated with white space. The default, \code{vbar==FALSE}, produces tables consistent with the style sheet for the Journal of the American Statistical Association. } \item{collabel.just}{ justification for column labels. } \item{assignment}{ logical. When \code{TRUE}, the default, the name of the function and the assignment arrow are printed to the file. } \item{where}{ specifies placement of floats if a table environment is used. Default is \code{"!tbp"}. To allow tables to appear in the middle of a page of text you might specify \code{where="!htbp"} to \code{latex.default}. } \item{size}{ size of table text if a size change is needed (default is no change). For example you might specify \code{size="small"} to use LaTeX font size \dQuote{small}. For \code{latex.function} is a character string that will be appended to \code{"Sinput"} such as \code{"small"}. } \item{center}{ default is \code{"center"} to enclose the table in a \samp{center} environment. Use \code{center="centering"} or \code{"centerline"} to instead use LaTeX \samp{centering} or \code{centerline} directives, or \code{center="none"} to use no centering. \code{centerline} can be useful when objects besides a \code{tabular} are enclosed in a single \code{table} environment. This option was implemented by Markus Jntti \email{markus.jantti@iki.fi} of Abo Akademi University. } \item{landscape}{ set to \code{TRUE} to enclose the table in a \samp{landscape} environment. When \code{ctable} is \code{TRUE}, will use the \code{rotate} argument to \code{ctable}. } \item{type}{ The default uses the S \code{alltt} environment for \code{latex.function}, Set \code{type="verbatim"} to instead use the LaTeX \samp{verbatim} environment. Use \code{type="Sinput"} if using \code{Sweave}, especially if you have customized the \code{Sinput} environment, for example using the \code{Sweavel} style which uses the \code{listings} LaTeX package. } \item{width.cutoff}{width of function text output in columns; see \code{deparse}} \item{\dots}{ other arguments are accepted and ignored except that \code{latex} passes arguments to \code{format.df} (e.g., \code{col.just} and other formatting options like \code{dec}, \code{rdec}, and \code{cdec}). For \code{latexVerbatim} these arguments are passed to the \code{print} function. Ignored for \code{latexTranslate} and \code{htmlTranslate}. For \code{htmlSN}, these arguments are passed to \code{prettyNum} or \code{format}. } \item{inn, out}{ specify additional input and translated strings over the usual defaults } \item{pb}{ If \code{pb=TRUE}, \code{latexTranslate} also translates \samp{[()]} to math mode using \samp{\\left, \\right}. } \item{greek}{set to \code{TRUE} to have \code{latexTranslate} put names for greek letters in math mode and add backslashes. For \code{htmlTranslate}, translates greek letters to corresponding html characters, ignoring "modes".} \item{na}{single character string to translate \code{NA} values to for \code{latexTranslate} and \code{htmlTranslate}} \item{code}{set to \code{'unicode'} to use HTML unicode characters or \code{'&'} to use the ampersand pound number format} \item{pretty}{set to \code{FALSE} to have \code{htmlSN} use \code{format} instead of \code{prettyNum}} \item{hspace}{ horizontal space, e.g., extra left margin for verbatim text. Default is none. Use e.g. \code{hspace="10ex"} to add 10 extra spaces to the left of the text. } \item{length}{for S-Plus only; is the length of the output page for printing and capturing verbatim text} \item{width,height}{ are the \code{options( )} to have in effect only for when \code{print} is executed. Defaults are current \code{options}. For \code{dvi} these specify the paper width and height in inches if \code{nomargins=TRUE}, with defaults of 5.5 and 7, respectively. } \item{prlog}{ set to \code{TRUE} to have \code{dvi} print, to the S-Plus session, the LaTeX .log file. } \item{multicol}{ set to \code{FALSE} to not use \samp{\\multicolumn} in header of table } \item{nomargins}{ set to \code{FALSE} to use default LaTeX margins when making the .dvi file } } \value{ \code{latex} and \code{dvi} return a list of class \code{latex} or \code{dvi} containing character string elements \code{file} and \code{style}. \code{file} contains the name of the generated file, and \code{style} is a vector (possibly empty) of styles to be included using the LaTeX2e \samp{\\usepackage} command. \code{latexTranslate} returns a vector of character strings } \section{Side Effects}{ creates various system files and runs various Linux/UNIX system commands which are assumed to be in the system path. } \details{ \command{latex.default} optionally outputs a LaTeX comment containing the calling statement. To output this comment, run \command{options(omitlatexcom=FALSE)} before running. The default behavior or suppressing the comment is helpful when running RMarkdown to produce pdf output using LaTeX, as this uses \command{pandoc} which is fooled into try to escape the percent comment symbol. If running under Windows and using MikTeX, \command{latex} and \command{yap} must be in your system path, and \command{yap} is used to browse \file{.dvi} files created by \command{latex}. You should install the \file{geometry.sty} and \file{ctable.sty} styles in MikTeX to make optimum use of \code{latex()}. On Mac OS X, you may have to append the \file{/usr/texbin} directory to the system path. Thanks to Kevin Thorpe (\email{kevin.thorpe@utoronto.ca}) one way to set up Mac OS X is to install \samp{X11} and \samp{X11SDK} if not already installed, start \samp{X11} within the R GUI, and issue the command \code{Sys.setenv( PATH=paste(Sys.getenv("PATH"),"/usr/texbin",sep=":") )}. To avoid any complications of using \samp{X11} under MacOS, users can install the \samp{TeXShop} package, which will associate \file{.dvi} files with a viewer that displays a \file{pdf} version of the file after a hidden conversion from \file{dvi} to \file{pdf}. System options can be used to specify external commands to be used. Defaults are given by \code{options(xdvicmd='xdvi')} or \code{options(xdvicmd='yap')}, \code{options(dvipscmd='dvips')}, \code{options(latexcmd='latex')}. For MacOS specify \code{options(xdvicmd='MacdviX')} or if TeXShop is installed, \code{options(xdvicmd='open')}. To use \samp{pdflatex} rather than \samp{latex}, set \code{options(latexcmd='pdflatex')}, \code{options(dviExtension='pdf')}, and set \code{options('xdvicmd')} to your chosen PDF previewer. If running S-Plus and your directory for temporary files is not \file{/tmp} (Unix/Linux) or \file{\\windows\\temp} (Windows), add your own \code{tempdir} function such as \code{ tempdir <- function() "/yourmaindirectory/yoursubdirectory"} To prevent the latex file from being displayed store the result of \code{latex} in an object, e.g. \code{w <- latex(object, file='foo.tex')}. } \author{ Frank E. Harrell, Jr.,\cr Department of Biostatistics,\cr Vanderbilt University,\cr \email{fh@fharrell.com} Richard M. Heiberger,\cr Department of Statistics,\cr Temple University, Philadelphia, PA.\cr \email{rmh@temple.edu} David R. Whiting,\cr School of Clinical Medical Sciences (Diabetes),\cr University of Newcastle upon Tyne, UK.\cr \email{david.whiting@ncl.ac.uk} } \seealso{ \code{\link{html}}, \code{\link{format.df}}, \code{\link[tools]{texi2dvi}} } \examples{ x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','this that'))) \dontrun{ latex(x) # creates x.tex in working directory # The result of the above command is an object of class "latex" # which here is automatically printed by the latex print method. # The latex print method prepends and appends latex headers and # calls the latex program in the PATH. If the latex program is # not in the PATH, you will get error messages from the operating # system. w <- latex(x, file='/tmp/my.tex') # Does not call the latex program as the print method was not invoked print.default(w) # Shows the contents of the w variable without attempting to latex it. d <- dvi(w) # compile LaTeX document, make .dvi # latex assumed to be in path d # or show(d) : run xdvi (assumed in path) to display w # or show(w) : run dvi then xdvi dvips(d) # run dvips to print document dvips(w) # run dvi then dvips library(tools) texi2dvi('/tmp/my.tex') # compile and produce pdf file in working dir. } latex(x, file="") # just write out LaTeX code to screen \dontrun{ # Use paragraph formatting to wrap text to 3 in. wide in a column d <- data.frame(x=1:2, y=c(paste("a", paste(rep("very",30),collapse=" "),"long string"), "a short string")) latex(d, file="", col.just=c("l", "p{3in}"), table.env=FALSE) } \dontrun{ # After running latex( ) multiple times with different special styles in # effect, make a file that will call for the needed LaTeX packages when # latex is run (especially when using Sweave with R) if(exists(latexStyles)) cat(paste('\\usepackage{',latexStyles,'}',sep=''), file='stylesused.tex', sep='\n') # Then in the latex job have something like: # \documentclass{article} # \input{stylesused} # \begin{document} # ... } } \keyword{utilities} \keyword{interface} \keyword{methods} \keyword{file} \keyword{character} \keyword{manip} Hmisc/man/gbayesSeqSim.Rd0000644000176200001440000001007014003044247014747 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gbayesSeqSim.r \name{gbayesSeqSim} \alias{gbayesSeqSim} \title{gbayesSeqSim} \usage{ gbayesSeqSim(est, asserts) } \arguments{ \item{est}{data frame created by \code{estSeqSim()}} \item{asserts}{list of lists. The first element of each list is the user-specified name for each assertion/prior combination, e.g., \code{"efficacy"}. The other elements are, in order, a character string equal to "<", ">", or "in", a parameter value \code{cutoff} (for "<" and ">") or a 2-vector specifying an interval for "in", and either a prior distribution mean and standard deviation named \code{mu} and \code{sigma} respectively, or a parameter value (\code{"cutprior"}) and tail area \code{"tailprob"}. If the latter is used, \code{mu} is assumed to be zero and \code{sigma} is solved for such that P(parameter > 'cutprior') = P(parameter < - 'cutprior') = \code{tailprob}.} } \value{ a data frame with number of rows equal to that of \code{est} with a number of new columns equal to the number of assertions added. The new columns are named \code{p1}, \code{p2}, \code{p3}, ... (posterior probabilities), \code{mean1}, \code{mean2}, ... (posterior means), and \code{sd1}, \code{sd2}, ... (posterior standard deviations). The returned data frame also has an attribute \code{asserts} added which is the original \code{asserts} augmented with any derived \code{mu} and \code{sigma} and converted to a data frame, and another attribute \code{alabels} which is a named vector used to map \code{p1}, \code{p2}, ... to the user-provided labels in \code{asserts}. } \description{ Simulate Bayesian Sequential Treatment Comparisons Using a Gaussian Model } \details{ Simulate a sequential trial under a Gaussian model for parameter estimates, and Gaussian priors using simulated estimates and variances returned by \code{estSeqSim}. For each row of the data frame \code{est} and for each prior/assertion combination, computes the posterior probability of the assertion. } \examples{ \dontrun{ # Simulate Bayesian operating characteristics for an unadjusted # proportional odds comparison (Wilcoxon test) # For 100 simulations, 5 looks, 2 true parameter values, and # 2 assertion/prior combinations, compute the posterior probability # Use a low-level logistic regression call to speed up simuluations # Use data.table to compute various summary measures # Total simulation time: 2s lfit <- function(x, y) { f <- rms::lrm.fit(x, y) k <- length(coef(f)) c(coef(f)[k], vcov(f)[k, k]) } gdat <- function(beta, n1, n2) { # Cell probabilities for a 7-category ordinal outcome for the control group p <- c(2, 1, 2, 7, 8, 38, 42) / 100 # Compute cell probabilities for the treated group p2 <- pomodm(p=p, odds.ratio=exp(beta)) y1 <- sample(1 : 7, n1, p, replace=TRUE) y2 <- sample(1 : 7, n2, p2, replace=TRUE) list(y1=y1, y2=y2) } # Assertion 1: log(OR) < 0 under prior with prior mean 0.1 and sigma 1 on log OR scale # Assertion 2: OR between 0.9 and 1/0.9 with prior mean 0 and sigma computed so that # P(OR > 2) = 0.05 asserts <- list(list('Efficacy', '<', 0, mu=0.1, sigma=1), list('Similarity', 'in', log(c(0.9, 1/0.9)), cutprior=log(2), tailprob=0.05)) set.seed(1) est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200), gendat=gdat, fitter=lfit, nsim=100) z <- gbayesSeqSim(est, asserts) head(z) attr(z, 'asserts') # Compute the proportion of simulations that hit targets (different target posterior # probabilities for efficacy vs. similarity) # For the efficacy assessment compute the first look at which the target # was hit (set to infinity if never hit) require(data.table) z <- data.table(z) u <- z[, .(first=min(p1 > 0.95])), by=.(parameter, sim)] # Compute the proportion of simulations that ever hit the target and # that hit it by the 100th subject u[, .(ever=mean(first < Inf)), by=.(parameter)] u[, .(by75=mean(first <= 100)), by=.(parameter)] } } \seealso{ \code{gbayes()}, \code{estSeqSim()}, \code{simMarkovOrd()}, \code{estSeqMarkovOrd()} } \author{ Frank Harrell } Hmisc/man/ciapower.Rd0000644000176200001440000000433313714234051014173 0ustar liggesusers\name{ciapower} \alias{ciapower} \title{ Power of Interaction Test for Exponential Survival } \description{ Uses the method of Peterson and George to compute the power of an interaction test in a 2 x 2 setup in which all 4 distributions are exponential. This will be the same as the power of the Cox model test if assumptions hold. The test is 2-tailed. The duration of accrual is specified (constant accrual is assumed), as is the minimum follow-up time. The maximum follow-up time is then \code{accrual + tmin}. Treatment allocation is assumed to be 1:1. } \usage{ ciapower(tref, n1, n2, m1c, m2c, r1, r2, accrual, tmin, alpha=0.05, pr=TRUE) } \arguments{ \item{tref}{ time at which mortalities estimated } \item{n1}{ total sample size, stratum 1 } \item{n2}{ total sample size, stratum 2 } \item{m1c}{ tref-year mortality, stratum 1 control } \item{m2c}{ tref-year mortality, stratum 2 control } \item{r1}{ \% reduction in \code{m1c} by intervention, stratum 1 } \item{r2}{ \% reduction in \code{m2c} by intervention, stratum 2 } \item{accrual}{ duration of accrual period } \item{tmin}{ minimum follow-up time } \item{alpha}{ type I error probability } \item{pr}{ set to \code{FALSE} to suppress printing of details }} \value{ power } \section{Side Effects}{ prints } \section{AUTHOR}{ Frank Harrell Department of Biostatistics Vanderbilt University \email{fh@fharrell.com} } \references{ Peterson B, George SL: Controlled Clinical Trials 14:511--522; 1993. } \seealso{ \code{\link{cpower}}, \code{\link{spower}} } \examples{ # Find the power of a race x treatment test. 25\% of patients will # be non-white and the total sample size is 14000. # Accrual is for 1.5 years and minimum follow-up is 5y. # Reduction in 5-year mortality is 15\% for whites, 0\% or -5\% for # non-whites. 5-year mortality for control subjects if assumed to # be 0.18 for whites, 0.23 for non-whites. n <- 14000 for(nonwhite.reduction in c(0,-5)) { cat("\n\n\n\% Reduction in 5-year mortality for non-whites:", nonwhite.reduction, "\n\n") pow <- ciapower(5, .75*n, .25*n, .18, .23, 15, nonwhite.reduction, 1.5, 5) cat("\n\nPower:",format(pow),"\n") } } \keyword{survival} \keyword{htest} \concept{power} \concept{study design} Hmisc/man/errbar.Rd0000644000176200001440000001005413166554716013652 0ustar liggesusers\name{errbar} \alias{errbar} \title{Plot Error Bars} \description{ Add vertical error bars to an existing plot or makes a new plot with error bars. } \usage{ errbar(x, y, yplus, yminus, cap=0.015, main = NULL, sub=NULL, xlab=as.character(substitute(x)), ylab=if(is.factor(x) || is.character(x)) "" else as.character(substitute(y)), add=FALSE, lty=1, type='p', ylim=NULL, lwd=1, pch=16, errbar.col, Type=rep(1, length(y)), \dots) } \arguments{ \item{x}{ vector of numeric x-axis values (for vertical error bars) or a factor or character variable (for horizontal error bars, \code{x} representing the group labels) } \item{y}{ vector of y-axis values. } \item{yplus}{ vector of y-axis values: the tops of the error bars. } \item{yminus}{ vector of y-axis values: the bottoms of the error bars. } \item{cap}{ the width of the little lines at the tops and bottoms of the error bars in units of the width of the plot. Defaults to \code{0.015}. } \item{main}{ a main title for the plot, passed to \code{plot}, see also \code{\link{title}}. } \item{sub}{ a sub title for the plot, passed to \code{plot} } \item{xlab}{ optional x-axis labels if \code{add=FALSE}. } \item{ylab}{ optional y-axis labels if \code{add=FALSE}. Defaults to blank for horizontal charts. } \item{add}{ set to \code{TRUE} to add bars to an existing plot (available only for vertical error bars) } \item{lty}{ type of line for error bars } \item{type}{ type of point. Use \code{type="b"} to connect dots. } \item{ylim}{ y-axis limits. Default is to use range of \code{y}, \code{yminus}, and \code{yplus}. For horizonal charts, \code{ylim} is really the \code{x}-axis range, excluding differences. } \item{lwd}{ line width for line segments (not main line) } \item{pch}{ character to use as the point. } \item{errbar.col}{ color to use for drawing error bars. } \item{Type}{ used for horizontal bars only. Is an integer vector with values \code{1} if corresponding values represent simple estimates, \code{2} if they represent differences. } \item{...}{ other parameters passed to all graphics functions. } } \details{ \code{errbar} adds vertical error bars to an existing plot or makes a new plot with error bars. It can also make a horizontal error bar plot that shows error bars for group differences as well as bars for groups. For the latter type of plot, the lower x-axis scale corresponds to group estimates and the upper scale corresponds to differences. The spacings of the two scales are identical but the scale for differences has its origin shifted so that zero may be included. If at least one of the confidence intervals includes zero, a vertical dotted reference line at zero is drawn. } \author{ Charles Geyer, University of Chicago. Modified by Frank Harrell, Vanderbilt University, to handle missing data, to add the parameters \code{add} and \code{lty}, and to implement horizontal charts with differences. } \examples{ set.seed(1) x <- 1:10 y <- x + rnorm(10) delta <- runif(10) errbar( x, y, y + delta, y - delta ) # Show bootstrap nonparametric CLs for 3 group means and for # pairwise differences on same graph group <- sample(c('a','b','d'), 200, TRUE) y <- runif(200) + .25*(group=='b') + .5*(group=='d') cla <- smean.cl.boot(y[group=='a'],B=100,reps=TRUE) # usually B=1000 a <- attr(cla,'reps') clb <- smean.cl.boot(y[group=='b'],B=100,reps=TRUE) b <- attr(clb,'reps') cld <- smean.cl.boot(y[group=='d'],B=100,reps=TRUE) d <- attr(cld,'reps') a.b <- quantile(a-b,c(.025,.975)) a.d <- quantile(a-d,c(.025,.975)) b.d <- quantile(b-d,c(.025,.975)) errbar(c('a','b','d','a - b','a - d','b - d'), c(cla[1],clb[1],cld[1],cla[1]-clb[1],cla[1]-cld[1],clb[1]-cld[1]), c(cla[3],clb[3],cld[3],a.b[2],a.d[2],b.d[2]), c(cla[2],clb[2],cld[2],a.b[1],a.d[1],b.d[1]), Type=c(1,1,1,2,2,2), xlab='', ylab='') } \keyword{hplot} % Converted by Sd2Rd version 1.21. Hmisc/man/deff.Rd0000644000176200001440000000202013714234051013255 0ustar liggesusers\name{deff} \alias{deff} \title{ Design Effect and Intra-cluster Correlation } \description{ Computes the Kish design effect and corresponding intra-cluster correlation for a single cluster-sampled variable } \usage{ deff(y, cluster) } \arguments{ \item{y}{ variable to analyze } \item{cluster}{ a variable whose unique values indicate cluster membership. Any type of variable is allowed. } } \value{ a vector with named elements \code{n} (total number of non-missing observations), \code{clusters} (number of clusters after deleting missing data), \code{rho}(intra-cluster correlation), and \code{deff} (design effect). } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link[rms]{bootcov}}, \code{\link[rms]{robcov}} } \examples{ set.seed(1) blood.pressure <- rnorm(1000, 120, 15) clinic <- sample(letters, 1000, replace=TRUE) deff(blood.pressure, clinic) } \keyword{htest} \concept{study design} \concept{cluster sampling} Hmisc/man/transace.Rd0000644000176200001440000006104414275454254014200 0ustar liggesusers\name{transace} \alias{transace} \alias{areg.boot} \alias{print.areg.boot} \alias{plot.areg.boot} \alias{predict.areg.boot} \alias{summary.areg.boot} \alias{print.summary.areg.boot} \alias{Function.areg.boot} \alias{Mean} \alias{Mean.areg.boot} \alias{Quantile} \alias{Quantile.areg.boot} \alias{monotone} \alias{smearingEst} \title{ Additive Regression and Transformations using ace or avas } \description{ \code{transace} is \code{\link[acepack]{ace}} packaged for easily automatically transforming all variables in a matrix. \code{transace} is a fast one-iteration version of \code{\link{transcan}} without imputation of \code{NA}s. \code{areg.boot} uses \code{\link{areg}} or \code{\link[acepack]{avas}} to fit additive regression models allowing all variables in the model (including the left-hand-side) to be transformed, with transformations chosen so as to optimize certain criteria. The default method uses \code{\link{areg}} whose goal it is to maximize \eqn{R^2}. \code{method="avas"} explicity tries to transform the response variable so as to stabilize the variance of the residuals. All-variables-transformed models tend to inflate \code{R^2} and it can be difficult to get confidence limits for each transformation. \code{areg.boot} solves both of these problems using the bootstrap. As with the \code{\link[rms]{validate}} function in the \pkg{rms} library, the Efron bootstrap is used to estimate the optimism in the apparent \eqn{R^2}, and this optimism is subtracted from the apparent \eqn{R^2} to optain a bias-corrected \eqn{R^2}. This is done however on the transformed response variable scale. Tests with 3 predictors show that the \code{\link[acepack]{avas}} and \code{\link[acepack]{ace}} estimates are unstable unless the sample size exceeds 350. Apparent \eqn{R^2} with low sample sizes can be very inflated, and bootstrap estimates of \eqn{R^2} can be even more unstable in such cases, resulting in optimism-corrected \eqn{R^2} that are much lower even than the actual \eqn{R^2}. The situation can be improved a little by restricting predictor transformations to be monotonic. On the other hand, the \code{areg} approach allows one to control overfitting by specifying the number of knots to use for each continuous variable in a restricted cubic spline function. For \code{method="avas"} the response transformation is restricted to be monotonic. You can specify restrictions for transformations of predictors (and linearity for the response). When the first argument is a formula, the function automatically determines which variables are categorical (i.e., \code{factor}, \code{category}, or character vectors). Specify linear transformations by enclosing variables by the identify function (\code{I()}), and specify monotonicity by using \code{monotone(variable)}. Monotonicity restrictions are not allowed with \code{method="areg"}. The \code{\link{summary}} method for \code{areg.boot} computes bootstrap estimates of standard errors of differences in predicted responses (usually on the original scale) for selected levels of each predictor against the lowest level of the predictor. The smearing estimator (see below) can be used here to estimate differences in predicted means, medians, or many other statistics. By default, quartiles are used for continuous predictors and all levels are used for categorical ones. See \cite{Details} below. There is also a \code{\link{plot}} method for plotting transformation estimates, transformations for individual bootstrap re-samples, and pointwise confidence limits for transformations. Unless you already have a \code{par(mfrow=)} in effect with more than one row or column, \code{plot} will try to fit the plots on one page. A \code{\link{predict}} method computes predicted values on the original or transformed response scale, or a matrix of transformed predictors. There is a \code{\link{Function}} method for producing a list of \R functions that perform the final fitted transformations. There is also a \code{\link{print}} method for \code{areg.boot} objects. When estimated means (or medians or other statistical parameters) are requested for models fitted with \code{areg.boot} (by \code{summary.areg.boot} or \code{predict.areg.boot}), the \dQuote{smearing} estimator of \cite{Duan (1983)} is used. Here we estimate the mean of the untransformed response by computing the arithmetic mean of \eqn{ginverse(lp + residuals)}, where ginverse is the inverse of the nonparametric transformation of the response (obtained by reverse linear interpolation), lp is the linear predictor for an individual observation on the transformed scale, and residuals is the entire vector of residuals estimated from the fitted model, on the transformed scales (n residuals for n original observations). The \code{smearingEst} function computes the general smearing estimate. For efficiency \code{smearingEst} recognizes that quantiles are transformation-preserving, i.e., when one wishes to estimate a quantile of the untransformed distribution one just needs to compute the inverse transformation of the transformed estimate after the chosen quantile of the vector of residuals is added to it. When the median is desired, the estimate is \eqn{ginverse(lp + \mbox{median}(residuals))}{ ginverse(lp + median(residuals))}. See the last example for how \code{smearingEst} can be used outside of \code{areg.boot}. \code{Mean} is a generic function that returns an \R function to compute the estimate of the mean of a variable. Its input is typically some kind of model fit object. Likewise, \code{Quantile} is a generic quantile function-producing function. \code{Mean.areg.boot} and \code{Quantile.areg.boot} create functions of a vector of linear predictors that transform them into the smearing estimates of the mean or quantile of the response variable, respectively. \code{Quantile.areg.boot} produces exactly the same value as \code{predict.areg.boot} or \code{smearingEst}. \code{Mean} approximates the mapping of linear predictors to means over an evenly spaced grid of by default 200 points. Linear interpolation is used between these points. This approximate method is much faster than the full smearing estimator once \code{Mean} creates the function. These functions are especially useful in \code{\link[rms]{nomogram}} (see the example on hypothetical data). } \usage{ transace(x, monotonic=NULL, categorical=NULL, binary=NULL, pl=TRUE) areg.boot(x, data, weights, subset, na.action=na.delete, B=100, method=c("areg","avas"), nk=4, evaluation=100, valrsq=TRUE, probs=c(.25,.5,.75), tolerance=NULL) \method{print}{areg.boot}(x, \dots) \method{plot}{areg.boot}(x, ylim, boot=TRUE, col.boot=2, lwd.boot=.15, conf.int=.95, \dots) smearingEst(transEst, inverseTrans, res, statistic=c('median','quantile','mean','fitted','lp'), q) \method{summary}{areg.boot}(object, conf.int=.95, values, adj.to, statistic='median', q, \dots) \method{print}{summary.areg.boot}(x, \dots) \method{predict}{areg.boot}(object, newdata, statistic=c("lp", "median", "quantile", "mean", "fitted", "terms"), q=NULL, \dots) \method{Function}{areg.boot}(object, type=c('list','individual'), ytype=c('transformed','inverse'), prefix='.', suffix='', pos=-1, \dots) Mean(object, \dots) Quantile(object, \dots) \method{Mean}{areg.boot}(object, evaluation=200, \dots) \method{Quantile}{areg.boot}(object, q=.5, \dots) } \arguments{ \item{x}{ for \code{transace} a numeric matrix. For \code{areg.boot} \code{x} is a formula. For \code{print} or \code{plot}, an object created by \code{areg.boot}. For \code{print.summary.areg.boot}, and object created by \code{summary.areg.boot}. } \item{object}{ an object created by \code{areg.boot}, or a model fit object suitable for \code{Mean} or \code{Quantile}. } \item{transEst}{ a vector of transformed values. In log-normal regression these could be predicted log(Y) for example. } \item{inverseTrans}{ a function specifying the inverse transformation needed to change \code{transEst} to the original untransformed scale. \code{inverseTrans} may also be a 2-element list defining a mapping from the transformed values to untransformed values. Linear interpolation is used in this case to obtain untransform values. } \item{binary, categorical, monotonic}{ These are vectors of variable names specifying what to assume about each column of \code{x} for \code{transace}. Binary variables are not transformed, of course. } \item{pl}{ set \code{pl=FALSE} to prevent \code{transace} from plotting each fitted transformation } \item{data}{ data frame to use if \code{x} is a formula and variables are not already in the search list } \item{weights}{ a numeric vector of observation weights. By default, all observations are weighted equally. } \item{subset}{ an expression to subset data if \code{x} is a formula } \item{na.action}{ a function specifying how to handle \code{NA}s. Default is \code{\link{na.delete}}. } \item{B}{ number of bootstrap samples (default=100) } \item{method}{ \code{"areg"} (the default) or \code{"avas"} } \item{nk}{ number of knots for continuous variables not restricted to be linear. Default is 4. One or two is not allowed. \code{nk=0} forces linearity for all continuous variables. } \item{evaluation}{ number of equally-spaced points at which to evaluate (and save) the nonparametric transformations derived by \code{\link[acepack]{avas}} or \code{\link[acepack]{ace}}. Default is 100. For \code{Mean.areg.boot}, \code{evaluation} is the number of points at which to evaluate exact smearing estimates, to approximate them using linear interpolation (default is 200). } \item{valrsq}{ set to \code{TRUE} to more quickly do bootstrapping without validating \eqn{R^2} } \item{probs}{ vector probabilities denoting the quantiles of continuous predictors to use in estimating effects of those predictors } \item{tolerance}{ singularity criterion; list source code for the \code{\link{lm.fit.qr.bare}} function. } \item{res}{ a vector of residuals from the transformed model. Not required when \code{statistic="lp"} or \code{statistic="fitted"}. } \item{statistic}{ statistic to estimate with the smearing estimator. For \code{smearingEst}, the default results in computation of the sample median of the model residuals, then \code{smearingEst} adds the median residual and back-transforms to get estimated median responses on the original scale. \code{statistic="lp"} causes predicted transformed responses to be computed. For \code{smearingEst}, the result (for \code{statistic="lp"}) is the input argument \code{transEst}. \code{statistic="fitted"} gives predicted untransformed responses, i.e., \eqn{ginverse(lp)}, where ginverse is the inverse of the estimated response transformation, estimated by reverse linear interpolation on the tabulated nonparametric response transformation or by using an explicit analytic function. \code{statistic="quantile"} generalizes \code{"median"} to any single quantile \code{q} which must be specified. \code{"mean"} causes the population mean response to be estimated. For \code{predict.areg.boot}, \code{statistic="terms"} returns a matrix of transformed predictors. \code{statistic} can also be any \R function that computes a single value on a vector of values, such as \code{statistic=var}. Note that in this case the function name is not quoted. } \item{q}{ a single quantile of the original response scale to estimate, when \code{statistic="quantile"}, or for \code{Quantile.areg.boot}. } \item{ylim}{ 2-vector of y-axis limits } \item{boot}{ set to \code{FALSE} to not plot any bootstrapped transformations. Set it to an integer k to plot the first k bootstrap estimates. } \item{col.boot}{ color for bootstrapped transformations } \item{lwd.boot}{ line width for bootstrapped transformations } \item{conf.int}{ confidence level (0-1) for pointwise bootstrap confidence limits and for estimated effects of predictors in \code{summary.areg.boot}. The latter assumes normality of the estimated effects. } \item{values}{ a list of vectors of settings of the predictors, for predictors for which you want to overide settings determined from \code{probs}. The list must have named components, with names corresponding to the predictors. Example: \code{values=list(x1=c(2,4,6,8), x2=c(-1,0,1))} specifies that \code{summary} is to estimate the effect on \code{y} of changing \code{x1} from 2 to 4, 2 to 6, 2 to 8, and separately, of changing \code{x2} from -1 to 0 and -1 to 1. } \item{adj.to}{ a named vector of adjustment constants, for setting all other predictors when examining the effect of a single predictor in \code{summary}. The more nonlinear is the transformation of \code{y} the more the adjustment settings will matter. Default values are the medians of the values defined by \code{values} or \code{probs}. You only need to name the predictors for which you are overriding the default settings. Example: \code{adj.to=c(x2=0,x5=10)} will set \code{x2} to 0 and \code{x5} to 10 when assessing the impact of variation in the other predictors. } \item{newdata}{ a data frame or list containing the same number of values of all of the predictors used in the fit. For \code{\link{factor}} predictors the \samp{levels} attribute do not need to be in the same order as those used in the original fit, and not all levels need to be represented. If \code{newdata} is omitted, you can still obtain linear predictors (on the transformed response scale) and fitted values (on the original response scale), but not \code{"terms"}. } \item{type}{ specifies how \code{\link{Function}} is to return the series of functions that define the transformations of all variables. By default a list is created, with the names of the list elements being the names of the variables. Specify \code{type="individual"} to have separate functions created in the current environment (\code{pos=-1}, the default) or in location defined by \code{pos} if \code{where} is specified. For the latter method, the names of the objects created are the names of the corresponding variables, prefixed by \code{prefix} and with \code{suffix} appended to the end. If any of \code{pos}, \code{prefix}, or \code{suffix} is specified, \code{type} is automatically set to \code{"individual"}. } \item{ytype}{ By default the first function created by \code{Function} is the y-transformation. Specify \code{ytype="inverse"} to instead create the inverse of the transformation, to be able to obtain originally scaled y-values. } \item{prefix}{ character string defining the prefix for function names created when \code{type="individual"}. By default, the function specifying the transformation for variable \code{x} will be named \code{.x}. } \item{suffix}{ character string defining the suffix for the function names } \item{pos}{ See \code{\link{assign}}. } \item{\dots}{ arguments passed to other functions } } \value{ \code{transace} returns a matrix like \code{x} but containing transformed values. This matrix has attributes \code{rsq} (vector of \eqn{R^2} with which each variable can be predicted from the others) and \code{omitted} (row numbers of \code{x} that were deleted due to \code{NA}s). \code{areg.boot} returns a list of class \samp{areg.boot} containing many elements, including (if \code{valrsq} is \code{TRUE}) \code{rsquare.app} and \code{rsquare.val}. \code{summary.areg.boot} returns a list of class \samp{summary.areg.boot} containing a matrix of results for each predictor and a vector of adjust-to settings. It also contains the call and a \samp{label} for the statistic that was computed. A \code{print} method for these objects handles the printing. \code{predict.areg.boot} returns a vector unless \code{statistic="terms"}, in which case it returns a matrix. \code{Function.areg.boot} returns by default a list of functions whose argument is one of the variables (on the original scale) and whose returned values are the corresponding transformed values. The names of the list of functions correspond to the names of the original variables. When \code{type="individual"}, \code{Function.areg.boot} invisibly returns the vector of names of the created function objects. \code{Mean.areg.boot} and \code{Quantile.areg.boot} also return functions. \code{smearingEst} returns a vector of estimates of distribution parameters of class \samp{labelled} so that \code{print.labelled} wil print a label documenting the estimate that was used (see \code{\link{label}}). This label can be retrieved for other purposes by using e.g. \code{label(obj)}, where obj was the vector returned by \code{smearingEst}. } \details{ As \code{transace} only does one iteration over the predictors, it may not find optimal transformations and it will be dependent on the order of the predictors in \code{x}. \code{\link[acepack]{ace}} and \code{\link[acepack]{avas}} standardize transformed variables to have mean zero and variance one for each bootstrap sample, so if a predictor is not important it will still consistently have a positive regression coefficient. Therefore using the bootstrap to estimate standard errors of the additive least squares regression coefficients would not help in drawing inferences about the importance of the predictors. To do this, \code{summary.areg.boot} computes estimates of, e.g., the inter-quartile range effects of predictors in predicting the response variable (after untransforming it). As an example, at each bootstrap repetition the estimated transformed value of one of the predictors is computed at the lower quartile, median, and upper quartile of the raw value of the predictor. These transformed x values are then multipled by the least squares estimate of the partial regression coefficient for that transformed predictor in predicting transformed y. Then these weighted transformed x values have the weighted transformed x value corresponding to the lower quartile subtracted from them, to estimate an x effect accounting for nonlinearity. The last difference computed is then the standardized effect of raising x from its lowest to its highest quartile. Before computing differences, predicted values are back-transformed to be on the original y scale in a way depending on \code{statistic} and \code{q}. The sample standard deviation of these effects (differences) is taken over the bootstrap samples, and this is used to compute approximate confidence intervals for effects andapproximate P-values, both assuming normality. \code{predict} does not re-insert \code{NA}s corresponding to observations that were dropped before the fit, when \code{newdata} is omitted. \code{statistic="fitted"} estimates the same quantity as \code{statistic="median"} if the residuals on the transformed response have a symmetric distribution. The two provide identical estimates when the sample median of the residuals is exactly zero. The sample mean of the residuals is constrained to be exactly zero although this does not simplify anything. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University School of Medicine \cr \email{fh@fharrell.com} } \seealso{ \code{\link[acepack]{avas}}, \code{\link[acepack]{ace}}, \code{\link[rms]{ols}}, \code{\link[rms]{validate}}, \code{\link[rms]{predab.resample}}, \code{\link{label}}, \code{\link[rms]{nomogram}} } \references{ Harrell FE, Lee KL, Mark DB (1996): Stat in Med 15:361--387. Duan N (1983): Smearing estimate: A nonparametric retransformation method. JASA 78:605--610. Wang N, Ruppert D (1995): Nonparametric estimation of the transformation in the transform-both-sides regression model. JASA 90:522--534. See \code{\link[acepack]{avas}}, \code{\link[acepack]{ace}} for primary references. } \examples{ # xtrans <- transace(cbind(age,sex,blood.pressure,race.code), # binary='sex', monotonic='age', # categorical='race.code') # Generate random data from the model y = exp(x1 + epsilon/3) where # x1 and epsilon are Gaussian(0,1) set.seed(171) # to be able to reproduce example x1 <- rnorm(200) x2 <- runif(200) # a variable that is really unrelated to y] x3 <- factor(sample(c('cat','dog','cow'), 200,TRUE)) # also unrelated to y y <- exp(x1 + rnorm(200)/3) f <- areg.boot(y ~ x1 + x2 + x3, B=40) f plot(f) # Note that the fitted transformation of y is very nearly log(y) # (the appropriate one), the transformation of x1 is nearly linear, # and the transformations of x2 and x3 are essentially flat # (specifying monotone(x2) if method='avas' would have resulted # in a smaller confidence band for x2) summary(f) # use summary(f, values=list(x2=c(.2,.5,.8))) for example if you # want to use nice round values for judging effects # Plot Y hat vs. Y (this doesn't work if there were NAs) plot(fitted(f), y) # or: plot(predict(f,statistic='fitted'), y) # Show fit of model by varying x1 on the x-axis and creating separate # panels for x2 and x3. For x2 using only a few discrete values newdat <- expand.grid(x1=seq(-2,2,length=100),x2=c(.25,.75), x3=c('cat','dog','cow')) yhat <- predict(f, newdat, statistic='fitted') # statistic='mean' to get estimated mean rather than simple inverse trans. xYplot(yhat ~ x1 | x2, groups=x3, type='l', data=newdat) \dontrun{ # Another example, on hypothetical data f <- areg.boot(response ~ I(age) + monotone(blood.pressure) + race) # use I(response) to not transform the response variable plot(f, conf.int=.9) # Check distribution of residuals plot(fitted(f), resid(f)) qqnorm(resid(f)) # Refit this model using ols so that we can draw a nomogram of it. # The nomogram will show the linear predictor, median, mean. # The last two are smearing estimators. Function(f, type='individual') # create transformation functions f.ols <- ols(.response(response) ~ age + .blood.pressure(blood.pressure) + .race(race)) # Note: This model is almost exactly the same as f but there # will be very small differences due to interpolation of # transformations meanr <- Mean(f) # create function of lp computing mean response medr <- Quantile(f) # default quantile is .5 nomogram(f.ols, fun=list(Mean=meanr,Median=medr)) # Create S functions that will do the transformations # This is a table look-up with linear interpolation g <- Function(f) plot(blood.pressure, g$blood.pressure(blood.pressure)) # produces the central curve in the last plot done by plot(f) } # Another simulated example, where y has a log-normal distribution # with mean x and variance 1. Untransformed y thus has median # exp(x) and mean exp(x + .5sigma^2) = exp(x + .5) # First generate data from the model y = exp(x + epsilon), # epsilon ~ Gaussian(0, 1) set.seed(139) n <- 1000 x <- rnorm(n) y <- exp(x + rnorm(n)) f <- areg.boot(y ~ x, B=20) plot(f) # note log shape for y, linear for x. Good! xs <- c(-2, 0, 2) d <- data.frame(x=xs) predict(f, d, 'fitted') predict(f, d, 'median') # almost same; median residual=-.001 exp(xs) # population medians predict(f, d, 'mean') exp(xs + .5) # population means # Show how smearingEst works res <- c(-1,0,1) # define residuals y <- 1:5 ytrans <- log(y) ys <- seq(.1,15,length=50) trans.approx <- list(x=log(ys), y=ys) options(digits=4) smearingEst(ytrans, exp, res, 'fitted') # ignores res smearingEst(ytrans, trans.approx, res, 'fitted') # ignores res smearingEst(ytrans, exp, res, 'median') # median res=0 smearingEst(ytrans, exp, res+.1, 'median') # median res=.1 smearingEst(ytrans, trans.approx, res, 'median') smearingEst(ytrans, exp, res, 'mean') mean(exp(ytrans[2] + res)) # should equal 2nd # above smearingEst(ytrans, trans.approx, res, 'mean') smearingEst(ytrans, trans.approx, res, mean) # Last argument can be any statistical function operating # on a vector that returns a single value } \keyword{nonparametric} \keyword{smooth} \keyword{multivariate} \keyword{nonlinear} \keyword{regression} \concept{bootstrap} \concept{model validation} Hmisc/man/score.binary.Rd0000644000176200001440000000533612243661443014771 0ustar liggesusers\name{score.binary} \alias{score.binary} \title{ Score a Series of Binary Variables } \description{ Creates a new variable from a series of logical conditions. The new variable can be a hierarchical category or score derived from considering the rightmost \code{TRUE} value among the input variables, an additive point score, a union, or any of several others by specifying a function using the \code{fun} argument. } \usage{ score.binary(\dots, fun=max, points=1:p, na.rm=funtext == "max", retfactor=TRUE) } \arguments{ \item{...}{ a list of variables or expressions which are considered to be binary or logical } \item{fun}{ a function to compute on each row of the matrix represented by a specific observation of all the variables in \code{\dots} } \item{points}{ points to assign to successive elements of \code{\dots} . The default is \code{1, 2, \dots, p}, where \code{p} is the number of elements. If you specify one number for \code{points}, that number will be duplicated (i.e., equal weights are assumed). } \item{na.rm}{ set to \code{TRUE} to remove \code{NA}s from consideration when processing each row of the matrix of variables in \code{\dots} . For \code{fun=max}, \code{na.rm=TRUE} is the default since \code{score.binary} assumes that a hierarchical scale is based on available information. Otherwise, \code{na.rm=FALSE} is assumed. For \code{fun=mean} you may want to specify \code{na.rm=TRUE}. } \item{retfactor}{ applies if \code{fun=max}, in which case \code{retfactor=TRUE} makes \code{score.binary} return a \code{factor} object since a hierarchical scale implies a unique choice. }} \value{ a \code{factor} object if \code{retfactor=TRUE} and \code{fun=max} or a numeric vector otherwise. Will not contain NAs if \code{na.rm=TRUE} unless every variable in a row is \code{NA}. If a \code{factor} object is returned, it has levels \code{"none"} followed by character string versions of the arguments given in \code{\dots} . } \seealso{ \code{\link{any}}, \code{\link{sum}}, \code{\link{max}}, \code{\link{factor}} } \examples{ set.seed(1) age <- rnorm(25, 70, 15) previous.disease <- sample(0:1, 25, TRUE) #Hierarchical scale, highest of 1:age>70 2:previous.disease score.binary(age>70, previous.disease, retfactor=FALSE) #Same as above but return factor variable with levels "none" "age>70" # "previous.disease" score.binary(age>70, previous.disease) #Additive scale with weights 1:age>70 2:previous.disease score.binary(age>70, previous.disease, fun=sum) #Additive scale, equal weights score.binary(age>70, previous.disease, fun=sum, points=c(1,1)) #Same as saying points=1 #Union of variables, to create a new binary variable score.binary(age>70, previous.disease, fun=any) } \keyword{manip} % Converted by Sd2Rd version 1.21. Hmisc/man/aregImpute.Rd0000644000176200001440000006517613714234051014500 0ustar liggesusers\name{aregImpute} \alias{aregImpute} \alias{print.aregImpute} \alias{plot.aregImpute} \alias{reformM} \title{ Multiple Imputation using Additive Regression, Bootstrapping, and Predictive Mean Matching } \description{ The \code{transcan} function creates flexible additive imputation models but provides only an approximation to true multiple imputation as the imputation models are fixed before all multiple imputations are drawn. This ignores variability caused by having to fit the imputation models. \code{aregImpute} takes all aspects of uncertainty in the imputations into account by using the bootstrap to approximate the process of drawing predicted values from a full Bayesian predictive distribution. Different bootstrap resamples are used for each of the multiple imputations, i.e., for the \code{i}th imputation of a sometimes missing variable, \code{i=1,2,\dots n.impute}, a flexible additive model is fitted on a sample with replacement from the original data and this model is used to predict all of the original missing and non-missing values for the target variable. \code{areg} is used to fit the imputation models. By default, linearity is assumed for target variables (variables being imputed) and \code{nk=3} knots are assumed for continuous predictors transformed using restricted cubic splines. If \code{nk} is three or greater and \code{tlinear} is set to \code{FALSE}, \code{areg} simultaneously finds transformations of the target variable and of all of the predictors, to get a good fit assuming additivity, maximizing \eqn{R^2}, using the same canonical correlation method as \code{transcan}. Flexible transformations may be overridden for specific variables by specifying the identity transformation for them. When a categorical variable is being predicted, the flexible transformation is Fisher's optimum scoring method. Nonlinear transformations for continuous variables may be nonmonotonic. If \code{nk} is a vector, \code{areg}'s bootstrap and \code{crossval=10} options will be used to help find the optimum validating value of \code{nk} over values of that vector, at the last imputation iteration. For the imputations, the minimum value of \code{nk} is used. Instead of defaulting to taking random draws from fitted imputation models using random residuals as is done by \code{transcan}, \code{aregImpute} by default uses predictive mean matching with optional weighted probability sampling of donors rather than using only the closest match. Predictive mean matching works for binary, categorical, and continuous variables without the need for iterative maximum likelihood fitting for binary and categorical variables, and without the need for computing residuals or for curtailing imputed values to be in the range of actual data. Predictive mean matching is especially attractive when the variable being imputed is also being transformed automatically. See Details below for more information about the algorithm. A \code{"regression"} method is also available that is similar to that used in \code{transcan}. This option should be used when mechanistic missingness requires the use of extrapolation during imputation. A \code{print} method summarizes the results, and a \code{plot} method plots distributions of imputed values. Typically, \code{fit.mult.impute} will be called after \code{aregImpute}. If a target variable is transformed nonlinearly (i.e., if \code{nk} is greater than zero and \code{tlinear} is set to \code{FALSE}) and the estimated target variable transformation is non-monotonic, imputed values are not unique. When \code{type='regression'}, a random choice of possible inverse values is made. The \code{reformM} function provides two ways of recreating a formula to give to \code{aregImpute} by reordering the variables in the formula. This is a modified version of a function written by Yong Hao Pua. One can specify \code{nperm} to obtain a list of \code{nperm} randomly permuted variables. The list is converted to a single ordinary formula if \code{nperm=1}. If \code{nperm} is omitted, variables are sorted in descending order of the number of \code{NA}s. \code{reformM} also prints a recommended number of multiple imputations to use, which is a minimum of 5 and the percent of incomplete observations. } \usage{ aregImpute(formula, data, subset, n.impute=5, group=NULL, nk=3, tlinear=TRUE, type=c('pmm','regression','normpmm'), pmmtype=1, match=c('weighted','closest','kclosest'), kclosest=3, fweighted=0.2, curtail=TRUE, boot.method=c('simple', 'approximate bayesian'), burnin=3, x=FALSE, pr=TRUE, plotTrans=FALSE, tolerance=NULL, B=75) \method{print}{aregImpute}(x, digits=3, \dots) \method{plot}{aregImpute}(x, nclass=NULL, type=c('ecdf','hist'), datadensity=c("hist", "none", "rug", "density"), diagnostics=FALSE, maxn=10, \dots) reformM(formula, data, nperm) } \arguments{ \item{formula}{ an S model formula. You can specify restrictions for transformations of variables. The function automatically determines which variables are categorical (i.e., \code{factor}, \code{category}, or character vectors). Binary variables are automatically restricted to be linear. Force linear transformations of continuous variables by enclosing variables by the identify function (\code{I()}). It is recommended that \code{factor()} or \code{as.factor()} do not appear in the formula but instead variables be converted to factors as needed and stored in the data frame. That way imputations for factor variables (done using \code{\link{impute.transcan}} for example) will be correct. Currently \code{reformM} does not handle variables that are enclosed in functions such as \code{I()}. } \item{x}{ an object created by \code{aregImpute}. For \code{aregImpute}, set \code{x} to \code{TRUE} to save the data matrix containing the final (number \code{n.impute}) imputations in the result. This is needed if you want to later do out-of-sample imputation. Categorical variables are coded as integers in this matrix. } \item{data}{input raw data} \item{subset}{ These may be also be specified. You may not specify \code{na.action} as \code{na.retain} is always used. } \item{n.impute}{ number of multiple imputations. \code{n.impute=5} is frequently recommended but 10 or more doesn't hurt. } \item{group}{a character or factor variable the same length as the number of observations in \code{data} and containing no \code{NA}s. When \code{group} is present, causes a bootstrap sample of the observations corresponding to non-\code{NA}s of a target variable to have the same frequency distribution of \code{group} as the that in the non-\code{NA}s of the original sample. This can handle k-sample problems as well as lower the chance that a bootstrap sample will have a missing cell when the original cell frequency was low. } \item{nk}{number of knots to use for continuous variables. When both the target variable and the predictors are having optimum transformations estimated, there is more instability than with normal regression so the complexity of the model should decrease more sharply as the sample size decreases. Hence set \code{nk} to 0 (to force linearity for non-categorical variables) or 3 (minimum number of knots possible with a linear tail-restricted cubic spline) for small sample sizes. Simulated problems as in the examples section can assist in choosing \code{nk}. Set \code{nk} to a vector to get bootstrap-validated and 10-fold cross-validated \eqn{R^2} and mean and median absolute prediction errors for imputing each sometimes-missing variable, with \code{nk} ranging over the given vector. The errors are on the original untransformed scale. The mean absolute error is the recommended basis for choosing the number of knots (or linearity). } \item{tlinear}{set to \code{FALSE} to allow a target variable (variable being imputed) to have a nonlinear left-hand-side transformation when \code{nk} is 3 or greater} \item{type}{ The default is \code{"pmm"} for predictive mean matching, which is a more nonparametric approach that will work for categorical as well as continuous predictors. Alternatively, use \code{"regression"} when all variables that are sometimes missing are continuous and the missingness mechanism is such that entire intervals of population values are unobserved. See the Details section for more information. Another method, \code{type="normpmm"}, only works when variables containing \code{NA}s are continuous and \code{tlinear} is \code{TRUE} (the default), meaning that the variable being imputed is not transformed when it is on the left hand model side. \code{normpmm} assumes that the imputation regression parameter estimates are multivariately normally distributed and that the residual variance has a scaled chi-squared distribution. For each imputation a random draw of the estimates is taken and a random draw from sigma is combined with those to get a random draw from the posterior predicted value distribution. Predictive mean matching is then done matching these predicted values from incomplete observations with predicted values from complete potential donor observations, where the latter predictions are based on the imputation model least squares parameter estimates and not on random draws from the posterior. For the \code{plot} method, specify \code{type="hist"} to draw histograms of imputed values with rug plots at the top, or \code{type="ecdf"} (the default) to draw empirical CDFs with spike histograms at the bottom. } \item{pmmtype}{type of matching to be used for predictive mean matching when \code{type="pmm"}. \code{pmmtype=2} means that predicted values for both target incomplete and complete observations come from a fit from the same bootstrap sample. \code{pmmtype=1}, the default, means that predicted values for complete observations are based on additive regression fits on original complete observations (using last imputations for non-target variables as with the other methds), and using fits on a bootstrap sample to get predicted values for missing target variables. See van Buuren (2012) section 3.4.2 where \code{pmmtype=1} is said to work much better when the number of variables is small. \code{pmmtype=3} means that complete observation predicted values come from a bootstrap sample fit whereas target incomplete observation predicted values come from a sample with replacement from the bootstrap fit (approximate Bayesian bootstrap).} \item{match}{ Defaults to \code{match="weighted"} to do weighted multinomial probability sampling using the tricube function (similar to lowess) as the weights. The argument of the tricube function is the absolute difference in transformed predicted values of all the donors and of the target predicted value, divided by a scaling factor. The scaling factor in the tricube function is \code{fweighted} times the mean absolute difference between the target predicted value and all the possible donor predicted values. Set \code{match="closest"} to find as the donor the observation having the closest predicted transformed value, even if that same donor is found repeatedly. Set \code{match="kclosest"} to use a slower implementation that finds, after jittering the complete case predicted values, the \code{kclosest} complete cases on the target variable being imputed, then takes a random sample of one of these \code{kclosest} cases.} \item{kclosest}{see \code{match}} \item{fweighted}{ Smoothing parameter (multiple of mean absolute difference) used when \code{match="weighted"}, with a default value of 0.2. Set \code{fweighted} to a number between 0.02 and 0.2 to force the donor to have a predicted value closer to the target, and set \code{fweighted} to larger values (but seldom larger than 1.0) to allow donor values to be less tightly matched. See the examples below to learn how to study the relationship between \code{fweighted} and the standard deviation of multiple imputations within individuals.} \item{curtail}{applies if \code{type='regression'}, causing imputed values to be curtailed at the observed range of the target variable. Set to \code{FALSE} to allow extrapolation outside the data range.} \item{boot.method}{By default, simple boostrapping is used in which the target variable is predicted using a sample with replacement from the observations with non-missing target variable. Specify \code{boot.method='approximate bayesian'} to build the imputation models from a sample with replacement from a sample with replacement of the observations with non-missing targets. Preliminary simulations have shown this results in good confidence coverage of the final model parameters when \code{type='regression'} is used. Not implemented when \code{group} is used.} \item{burnin}{ \code{aregImpute} does \code{burnin + n.impute} iterations of the entire modeling process. The first \code{burnin} imputations are discarded. More burn-in iteractions may be requied when multiple variables are missing on the same observations. When only one variable is missing, no burn-ins are needed and \code{burnin} is set to zero if unspecified.} \item{pr}{ set to \code{FALSE} to suppress printing of iteration messages } \item{plotTrans}{ set to \code{TRUE} to plot \code{ace} or \code{avas} transformations for each variable for each of the multiple imputations. This is useful for determining whether transformations are reasonable. If transformations are too noisy or have long flat sections (resulting in "lumps" in the distribution of imputed values), it may be advisable to place restrictions on the transformations (monotonicity or linearity). } \item{tolerance}{singularity criterion; list the source code in the \code{lm.fit.qr.bare} function for details} \item{B}{number of bootstrap resamples to use if \code{nk} is a vector} \item{digits}{number of digits for printing} \item{nclass}{number of bins to use in drawing histogram} \item{datadensity}{see \code{\link{Ecdf}}} \item{diagnostics}{ Specify \code{diagnostics=TRUE} to draw plots of imputed values against sequential imputation numbers, separately for each missing observations and variable. } \item{maxn}{ Maximum number of observations shown for diagnostics. Default is \code{maxn=10}, which limits the number of observations plotted to at most the first 10. } \item{nperm}{number of random formula permutations for \code{reformM}; omit to sort variables by descending missing count.} \item{...}{other arguments that are ignored} } \value{ a list of class \code{"aregImpute"} containing the following elements: \item{call}{ the function call expression } \item{formula}{ the formula specified to \code{aregImpute} } \item{match}{ the \code{match} argument } \item{fweighted}{ the \code{fweighted} argument } \item{n}{ total number of observations in input dataset } \item{p}{ number of variables } \item{na}{ list of subscripts of observations for which values were originally missing } \item{nna}{ named vector containing the numbers of missing values in the data } \item{type}{ vector of types of transformations used for each variable (\code{"s","l","c"} for smooth spline, linear, or categorical with dummy variables) } \item{tlinear}{value of \code{tlinear} parameter} \item{nk}{number of knots used for smooth transformations} \item{cat.levels}{ list containing character vectors specifying the \code{levels} of categorical variables } \item{df}{degrees of freedom (number of parameters estimated) for each variable} \item{n.impute}{ number of multiple imputations per missing value } \item{imputed}{ a list containing matrices of imputed values in the same format as those created by \code{transcan}. Categorical variables are coded using their integer codes. Variables having no missing values will have \code{NULL} matrices in the list. } \item{x}{if \code{x} is \code{TRUE}, the original data matrix with integer codes for categorical variables} \item{rsq}{ for the last round of imputations, a vector containing the R-squares with which each sometimes-missing variable could be predicted from the others by \code{ace} or \code{avas}.} } \details{ The sequence of steps used by the \code{aregImpute} algorithm is the following. \cr (1) For each variable containing m \code{NA}s where m > 0, initialize the \code{NA}s to values from a random sample (without replacement if a sufficient number of non-missing values exist) of size m from the non-missing values. \cr (2) For \code{burnin+n.impute} iterations do the following steps. The first \code{burnin} iterations provide a burn-in, and imputations are saved only from the last \code{n.impute} iterations. \cr (3) For each variable containing any \code{NA}s, draw a sample with replacement from the observations in the entire dataset in which the current variable being imputed is non-missing. Fit a flexible additive model to predict this target variable while finding the optimum transformation of it (unless the identity transformation is forced). Use this fitted flexible model to predict the target variable in all of the original observations. Impute each missing value of the target variable with the observed value whose predicted transformed value is closest to the predicted transformed value of the missing value (if \code{match="closest"} and \code{type="pmm"}), or use a draw from a multinomial distribution with probabilities derived from distance weights, if \code{match="weighted"} (the default). \cr (4) After these imputations are computed, use these random draw imputations the next time the curent target variable is used as a predictor of other sometimes-missing variables. When \code{match="closest"}, predictive mean matching does not work well when fewer than 3 variables are used to predict the target variable, because many of the multiple imputations for an observation will be identical. In the extreme case of one right-hand-side variable and assuming that only monotonic transformations of left and right-side variables are allowed, every bootstrap resample will give predicted values of the target variable that are monotonically related to predicted values from every other bootstrap resample. The same is true for Bayesian predicted values. This causes predictive mean matching to always match on the same donor observation. When the missingness mechanism for a variable is so systematic that the distribution of observed values is truncated, predictive mean matching does not work. It will only yield imputed values that are near observed values, so intervals in which no values are observed will not be populated by imputed values. For this case, the only hope is to make regression assumptions and use extrapolation. With \code{type="regression"}, \code{aregImpute} will use linear extrapolation to obtain a (hopefully) reasonable distribution of imputed values. The \code{"regression"} option causes \code{aregImpute} to impute missing values by adding a random sample of residuals (with replacement if there are more \code{NA}s than measured values) on the transformed scale of the target variable. After random residuals are added, predicted random draws are obtained on the original untransformed scale using reverse linear interpolation on the table of original and transformed target values (linear extrapolation when a random residual is large enough to put the random draw prediction outside the range of observed values). The bootstrap is used as with \code{type="pmm"} to factor in the uncertainty of the imputation model. As model uncertainty is high when the transformation of a target variable is unknown, \code{tlinear} defaults to \code{TRUE} to limit the variance in predicted values when \code{nk} is positive. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ van Buuren, Stef. Flexible Imputation of Missing Data. Chapman & Hall/CRC, Boca Raton FL, 2012. Little R, An H. Robust likelihood-based analysis of multivariate data with missing values. Statistica Sinica 14:949-968, 2004. van Buuren S, Brand JPL, Groothuis-Oudshoorn CGM, Rubin DB. Fully conditional specifications in multivariate imputation. J Stat Comp Sim 72:1049-1064, 2006. de Groot JAH, Janssen KJM, Zwinderman AH, Moons KGM, Reitsma JB. Multiple imputation to correct for partial verification bias revisited. Stat Med 27:5880-5889, 2008. Siddique J. Multiple imputation using an iterative hot-deck with distance-based donor selection. Stat Med 27:83-102, 2008. White IR, Royston P, Wood AM. Multiple imputation using chained equations: Issues and guidance for practice. Stat Med 30:377-399, 2011. } \seealso{ \code{\link{fit.mult.impute}}, \code{\link{transcan}}, \code{\link{areg}}, \code{\link{naclus}}, \code{\link{naplot}}, \code{\link[mice]{mice}}, \code{\link{dotchart3}}, \code{\link{Ecdf}} } \examples{ # Check that aregImpute can almost exactly estimate missing values when # there is a perfect nonlinear relationship between two variables # Fit restricted cubic splines with 4 knots for x1 and x2, linear for x3 set.seed(3) x1 <- rnorm(200) x2 <- x1^2 x3 <- runif(200) m <- 30 x2[1:m] <- NA a <- aregImpute(~x1+x2+I(x3), n.impute=5, nk=4, match='closest') a matplot(x1[1:m]^2, a$imputed$x2) abline(a=0, b=1, lty=2) x1[1:m]^2 a$imputed$x2 # Multiple imputation and estimation of variances and covariances of # regression coefficient estimates accounting for imputation # Example 1: large sample size, much missing data, no overlap in # NAs across variables x1 <- factor(sample(c('a','b','c'),1000,TRUE)) x2 <- (x1=='b') + 3*(x1=='c') + rnorm(1000,0,2) x3 <- rnorm(1000) y <- x2 + 1*(x1=='c') + .2*x3 + rnorm(1000,0,2) orig.x1 <- x1[1:250] orig.x2 <- x2[251:350] x1[1:250] <- NA x2[251:350] <- NA d <- data.frame(x1,x2,x3,y, stringsAsFactors=TRUE) # Find value of nk that yields best validating imputation models # tlinear=FALSE means to not force the target variable to be linear f <- aregImpute(~y + x1 + x2 + x3, nk=c(0,3:5), tlinear=FALSE, data=d, B=10) # normally B=75 f # Try forcing target variable (x1, then x2) to be linear while allowing # predictors to be nonlinear (could also say tlinear=TRUE) f <- aregImpute(~y + x1 + x2 + x3, nk=c(0,3:5), data=d, B=10) f \dontrun{ # Use 100 imputations to better check against individual true values f <- aregImpute(~y + x1 + x2 + x3, n.impute=100, data=d) f par(mfrow=c(2,1)) plot(f) modecat <- function(u) { tab <- table(u) as.numeric(names(tab)[tab==max(tab)][1]) } table(orig.x1,apply(f$imputed$x1, 1, modecat)) par(mfrow=c(1,1)) plot(orig.x2, apply(f$imputed$x2, 1, mean)) fmi <- fit.mult.impute(y ~ x1 + x2 + x3, lm, f, data=d) sqrt(diag(vcov(fmi))) fcc <- lm(y ~ x1 + x2 + x3) summary(fcc) # SEs are larger than from mult. imputation } \dontrun{ # Example 2: Very discriminating imputation models, # x1 and x2 have some NAs on the same rows, smaller n set.seed(5) x1 <- factor(sample(c('a','b','c'),100,TRUE)) x2 <- (x1=='b') + 3*(x1=='c') + rnorm(100,0,.4) x3 <- rnorm(100) y <- x2 + 1*(x1=='c') + .2*x3 + rnorm(100,0,.4) orig.x1 <- x1[1:20] orig.x2 <- x2[18:23] x1[1:20] <- NA x2[18:23] <- NA #x2[21:25] <- NA d <- data.frame(x1,x2,x3,y, stringsAsFactors=TRUE) n <- naclus(d) plot(n); naplot(n) # Show patterns of NAs # 100 imputations to study them; normally use 5 or 10 f <- aregImpute(~y + x1 + x2 + x3, n.impute=100, nk=0, data=d) par(mfrow=c(2,3)) plot(f, diagnostics=TRUE, maxn=2) # Note: diagnostics=TRUE makes graphs similar to those made by: # r <- range(f$imputed$x2, orig.x2) # for(i in 1:6) { # use 1:2 to mimic maxn=2 # plot(1:100, f$imputed$x2[i,], ylim=r, # ylab=paste("Imputations for Obs.",i)) # abline(h=orig.x2[i],lty=2) # } table(orig.x1,apply(f$imputed$x1, 1, modecat)) par(mfrow=c(1,1)) plot(orig.x2, apply(f$imputed$x2, 1, mean)) fmi <- fit.mult.impute(y ~ x1 + x2, lm, f, data=d) sqrt(diag(vcov(fmi))) fcc <- lm(y ~ x1 + x2) summary(fcc) # SEs are larger than from mult. imputation } \dontrun{ # Study relationship between smoothing parameter for weighting function # (multiplier of mean absolute distance of transformed predicted # values, used in tricube weighting function) and standard deviation # of multiple imputations. SDs are computed from average variances # across subjects. match="closest" same as match="weighted" with # small value of fweighted. # This example also shows problems with predicted mean # matching almost always giving the same imputed values when there is # only one predictor (regression coefficients change over multiple # imputations but predicted values are virtually 1-1 functions of each # other) set.seed(23) x <- runif(200) y <- x + runif(200, -.05, .05) r <- resid(lsfit(x,y)) rmse <- sqrt(sum(r^2)/(200-2)) # sqrt of residual MSE y[1:20] <- NA d <- data.frame(x,y) f <- aregImpute(~ x + y, n.impute=10, match='closest', data=d) # As an aside here is how to create a completed dataset for imputation # number 3 as fit.mult.impute would do automatically. In this degenerate # case changing 3 to 1-2,4-10 will not alter the results. imputed <- impute.transcan(f, imputation=3, data=d, list.out=TRUE, pr=FALSE, check=FALSE) sd <- sqrt(mean(apply(f$imputed$y, 1, var))) ss <- c(0, .01, .02, seq(.05, 1, length=20)) sds <- ss; sds[1] <- sd for(i in 2:length(ss)) { f <- aregImpute(~ x + y, n.impute=10, fweighted=ss[i]) sds[i] <- sqrt(mean(apply(f$imputed$y, 1, var))) } plot(ss, sds, xlab='Smoothing Parameter', ylab='SD of Imputed Values', type='b') abline(v=.2, lty=2) # default value of fweighted abline(h=rmse, lty=2) # root MSE of residuals from linear regression } \dontrun{ # Do a similar experiment for the Titanic dataset getHdata(titanic3) h <- lm(age ~ sex + pclass + survived, data=titanic3) rmse <- summary(h)$sigma set.seed(21) f <- aregImpute(~ age + sex + pclass + survived, n.impute=10, data=titanic3, match='closest') sd <- sqrt(mean(apply(f$imputed$age, 1, var))) ss <- c(0, .01, .02, seq(.05, 1, length=20)) sds <- ss; sds[1] <- sd for(i in 2:length(ss)) { f <- aregImpute(~ age + sex + pclass + survived, data=titanic3, n.impute=10, fweighted=ss[i]) sds[i] <- sqrt(mean(apply(f$imputed$age, 1, var))) } plot(ss, sds, xlab='Smoothing Parameter', ylab='SD of Imputed Values', type='b') abline(v=.2, lty=2) # default value of fweighted abline(h=rmse, lty=2) # root MSE of residuals from linear regression } d <- data.frame(x1=rnorm(50), x2=c(rep(NA, 10), runif(40)), x3=c(runif(4), rep(NA, 11), runif(35))) reformM(~ x1 + x2 + x3, data=d) reformM(~ x1 + x2 + x3, data=d, nperm=2) # Give result or one of the results as the first argument to aregImpute } \keyword{smooth} \keyword{regression} \keyword{multivariate} \keyword{methods} \keyword{models} \concept{bootstrap} \concept{predictive mean matching} \concept{imputation} \concept{NA} \concept{missing data} Hmisc/man/na.delete.Rd0000644000176200001440000000156113714234047014226 0ustar liggesusers\name{na.delete} \alias{na.delete} \title{ Row-wise Deletion na.action } \description{ Does row-wise deletion as \code{na.omit}, but adds frequency of missing values for each predictor to the \code{"na.action"} attribute of the returned model frame. Optionally stores further details if \code{options(na.detail.response=TRUE)}. } \usage{ na.delete(frame) } \arguments{ \item{frame}{ a model frame }} \value{ a model frame with rows deleted and the \code{"na.action"} attribute added. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{na.omit}}, \code{\link{na.keep}}, \code{\link{na.detail.response}}, \code{\link{model.frame.default}}, \code{\link{naresid}}, \code{\link{naprint}} } \examples{ # options(na.action="na.delete") # ols(y ~ x) } \keyword{models} % Converted by Sd2Rd version 1.21. Hmisc/man/contents.Rd0000644000176200001440000001247314370566550014236 0ustar liggesusers\name{contents} \alias{contents} \alias{contents.data.frame} \alias{print.contents.data.frame} \alias{html.contents.data.frame} \alias{contents.list} \alias{print.contents.list} \title{Metadata for a Data Frame} \description{ \code{contents} is a generic method for which \code{contents.data.frame} is currently the only method. \code{contents.data.frame} creates an object containing the following attributes of the variables from a data frame: names, labels (if any), units (if any), number of factor levels (if any), factor levels, class, storage mode, and number of NAs. \code{print.contents.data.frame} will print the results, with options for sorting the variables. \code{html.contents.data.frame} creates HTML code for displaying the results. This code has hyperlinks so that if the user clicks on the number of levels the browser jumps to the correct part of a table of factor levels for all the \code{factor} variables. If long labels are present (\code{"longlabel"} attributes on variables), these are printed at the bottom and the \code{html} method links to them through the regular labels. Variables having the same \code{levels} in the same order have the levels factored out for brevity. \code{contents.list} prints a directory of datasets when \code{\link{sasxport.get}} imported more than one SAS dataset. If \code{options(prType='html')} is in effect, calling \code{print} on an object that is the contents of a data frame will result in rendering the HTML version. If run from the console a browser window will open. } \usage{ contents(object, \dots) \method{contents}{data.frame}(object, sortlevels=FALSE, id=NULL, range=NULL, values=NULL, \dots) \method{print}{contents.data.frame}(x, sort=c('none','names','labels','NAs'), prlevels=TRUE, maxlevels=Inf, number=FALSE, \dots) \method{html}{contents.data.frame}(object, sort=c('none','names','labels','NAs'), prlevels=TRUE, maxlevels=Inf, levelType=c('list','table'), number=FALSE, nshow=TRUE, \dots) \method{contents}{list}(object, dslabels, \dots) \method{print}{contents.list}(x, sort=c('none','names','labels','NAs','vars'), \dots) } \arguments{ \item{object}{ a data frame. For \code{html} is an object created by \code{contents}. For \code{contents.list} is a list of data frames. } \item{sortlevels}{set to \code{TRUE} to sort levels of all factor variables into alphabetic order. This is especially useful when two variables use the same levels but in different orders. They will still be recognized by the \code{html} method as having identical levels if sorted.} \item{id}{an optional subject ID variable name that if present in \code{object} will cause the number of unique IDs to be printed in the contents header} \item{range}{an optional variable name that if present in \code{object} will cause its range to be printed in the contents header} \item{values}{an optional variable name that if present in \code{object} will cause its unique values to be printed in the contents header} \item{x}{ an object created by \code{contents} } \item{sort}{ Default is to print the variables in their original order in the data frame. Specify one of \code{"names"}, \code{"labels"}, or \code{"NAs"} to sort the variables by, respectively, alphabetically by names, alphabetically by labels, or by increaseing order of number of missing values. For \code{contents.list}, \code{sort} may also be the value \code{"vars"} to cause sorting by the number of variables in the dataset. } \item{prlevels}{ set to \code{FALSE} to not print all levels of \code{factor} variables } \item{maxlevels}{maximum number of levels to print for a \code{factor} variable} \item{number}{ set to \code{TRUE} to have the \code{print} and \code{latex} methods number the variables by their order in the data frame } \item{nshow}{set to \code{FALSE} to suppress outputting number of observations and number of \code{NA}s; useful when these counts would unblind information to blinded reviewers} \item{levelType}{ By default, bullet lists of category levels are constructed in html. Set \code{levelType='table'} to put levels in html table format. } \item{\dots}{ arguments passed from \code{html} to \code{format.df}, unused otherwise } \item{dslabels}{ named vector of SAS dataset labels, created for example by \code{\link{sasdsLabels}} } } \value{ an object of class \code{"contents.data.frame"} or \code{"contents.list"}. For the \code{html} method is an \code{html} character vector object. } \author{ Frank Harrell \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{describe}}, \code{\link{html}}, \code{\link{upData}} } \examples{ set.seed(1) dfr <- data.frame(x=rnorm(400),y=sample(c('male','female'),400,TRUE), stringsAsFactors=TRUE) contents(dfr) dfr <- upData(dfr, labels=c(x='Label for x', y='Label for y')) attr(dfr$x, 'longlabel') <- 'A very long label for x that can continue onto multiple long lines of text' k <- contents(dfr) print(k, sort='names', prlevels=FALSE) \dontrun{ html(k) html(contents(dfr)) # same result latex(k$contents) # latex.default just the main information } } \keyword{data} \keyword{interface} \concept{html} Hmisc/man/Hmisc-internal.Rd0000644000176200001440000000351413504411625015240 0ustar liggesusers\name{Hmisc-internal} \title{Internal Hmisc functions} \alias{dataDensityString} \alias{aregTran} \alias{as.double.Cbind} \alias{as.numeric.Cbind} \alias{formatSep} \alias{as.data.frame.impute} \alias{as.data.frame.roundN} \alias{as.data.frame.special.miss} \alias{as.data.frame.substi} \alias{substi} \alias{substi.source} \alias{[.substi} \alias{bpx} \alias{convertPdate} \alias{ddmmmyy} \alias{expr.tree} \alias{fillin} \alias{formatCats} \alias{formatCons} \alias{formatDateTime} \alias{formatTestStats} \alias{format.timePOSIXt} \alias{ftuss} \alias{ftupwr} \alias{get2rowHeads} \alias{groupn} \alias{htmlGreek} \alias{htmlSpecial} \alias{markupSpecs} \alias{importConvertDateTime} \alias{is.present} \alias{lookupSASContents} \alias{makeNames} \alias{mask} \alias{nafitted.delete} \alias{na.include} \alias{Names2names} \alias{GetModelFrame} \alias{naprint.keep} \alias{naresid.keep} \alias{naprint.delete} \alias{naresid.delete} \alias{napredict.delete} \alias{oPar} \alias{optionsCmds} \alias{ordGridFun} \alias{parGrid} \alias{pasteFit} \alias{plotpsummaryM} \alias{print.substi} \alias{print.timePOSIXt} \alias{read.xportDataload} \alias{readSAScsv} \alias{rowsumFast} \alias{sas.get.macro} \alias{setParNro} \alias{StatPlsmo} \alias{stepfun.eval} \alias{stripChart} \alias{termsDrop} \alias{testDateTime} \alias{uncbind} \alias{var.inner} \alias{xInch} \alias{xySortNoDupNoNA} \alias{yInch} \alias{zoom} \alias{latex.responseSummary} \alias{print.responseSummary} \alias{responseSummary} \alias{combine} \alias{combine<-} \alias{F_cidxcn} \alias{F_cidxcp} \alias{F_do_mchoice_match} \alias{F_do_nstr} \alias{F_hoeffd} \alias{F_jacklins} \alias{F_largrec} \alias{F_maxempr} \alias{F_rcorr} \alias{F_wclosepw} \alias{F_wclosest} \description{Internal Hmisc functions.} \details{These are not to be called by the user or are undocumented.} \keyword{internal} Hmisc/man/approxExtrap.Rd0000644000176200001440000000170412243661443015063 0ustar liggesusers\name{approxExtrap} \alias{approxExtrap} \title{Linear Extrapolation} \description{ Works in conjunction with the \code{\link{approx}} function to do linear extrapolation. \code{\link{approx}} in R does not support extrapolation at all, and it is buggy in S-Plus 6. } \usage{ approxExtrap(x, y, xout, method = "linear", n = 50, rule = 2, f = 0, ties = "ordered", na.rm = FALSE) } \arguments{ \item{x,y,xout,method,n,rule,f}{ see \code{\link{approx}} } \item{ties}{ applies only to R. See \code{\link{approx}} } \item{na.rm}{ set to \code{TRUE} to remove \code{NA}s in \code{x} and \code{y} before proceeding } } \details{ Duplicates in \code{x} (and corresponding \code{y} elements) are removed before using \code{approx}. } \value{ a vector the same length as \code{xout} } \author{Frank Harrell} \seealso{\code{\link{approx}}} \examples{ approxExtrap(1:3,1:3,xout=c(0,4)) } \keyword{arith} \keyword{dplot} Hmisc/man/stata.get.Rd0000644000176200001440000000473213714231663014265 0ustar liggesusers\name{stata.get} \alias{stata.get} \title{Enhanced Importing of STATA Files} \description{ Reads a file in Stata version 5-11 binary format format into a data frame. } \usage{ stata.get(file, lowernames = FALSE, convert.dates = TRUE, convert.factors = TRUE, missing.type = FALSE, convert.underscore = TRUE, warn.missing.labels = TRUE, force.single = TRUE, allow=NULL, charfactor=FALSE, \dots) } \arguments{ \item{file}{input \acronym{SPSS} save file. May be a file on the \acronym{WWW}, indicated by \code{file} starting with \samp{'https://'}.} \item{lowernames}{set to \code{TRUE} to convert variable names to lower case} \item{convert.dates}{see \code{\link[foreign]{read.dta}}} \item{convert.factors}{see \code{\link[foreign]{read.dta}}} \item{missing.type}{see \code{\link[foreign]{read.dta}}} \item{convert.underscore}{see \code{\link[foreign]{read.dta}}} \item{warn.missing.labels}{see \code{\link[foreign]{read.dta}}} \item{force.single}{set to \code{FALSE} to prevent integer-valued variables from being converted from storage mode \code{double} to \code{integer}} \item{allow}{a vector of characters allowed by \R that should not be converted to periods in variable names. By default, underscores in variable names are converted to periods as with \R before version 1.9.} \item{charfactor}{set to \code{TRUE} to change character variables to factors if they have fewer than n/2 unique values. Blanks and null strings are converted to \code{NA}s.} \item{\dots}{arguments passed to \code{\link[foreign]{read.dta}}.} } \details{ \code{stata.get} invokes the \code{\link[foreign]{read.dta}} function in the \pkg{foreign} package to read an STATA file, with a default output format of \code{\link{data.frame}}. The \code{\link{label}} function is used to attach labels to individual variables instead of to the data frame as done by \code{\link[foreign]{read.dta}}. By default, integer-valued variables are converted to a storage mode of integer unless \code{force.single=FALSE}. Date variables are converted to \R \code{\link{Date}} variables. By default, underscores in names are converted to periods. } \value{A data frame} \author{Charles Dupont} \seealso{\code{\link[foreign]{read.dta}},\code{\link{cleanup.import}},\code{\link{label}},\code{\link{data.frame}},\code{\link{Date}}} \examples{ \dontrun{ w <- stata.get('/tmp/my.dta') } } \keyword{interface} \keyword{manip} \keyword{file} \concept{STATA data file} Hmisc/man/all.is.numeric.Rd0000644000176200001440000000253514335150447015215 0ustar liggesusers\name{all.is.numeric} \alias{all.is.numeric} \title{Check if All Elements in Character Vector are Numeric} \description{ Tests, without issuing warnings, whether all elements of a character vector are legal numeric values, or optionally converts the vector to a numeric vector. Leading and trailing blanks in \code{x} are ignored. } \usage{ all.is.numeric(x, what = c("test", "vector", "nonnum"), extras=c('.','NA')) } \arguments{ \item{x}{a character vector} \item{what}{specify \code{what="vector"} to return a numeric vector if it passes the test, or the original character vector otherwise, the default \code{"test"} to return \code{FALSE} if there are no non-missing non-\code{extra} values of \code{x} or there is at least one non-numeric value of \code{x}, or \code{"nonnum"} to return the vector of non-\code{extra}, non-NA, non-numeric values of \code{x}.} \item{extras}{a vector of character strings to count as numeric values, other than \code{""}.} } \value{a logical value if \code{what="test"} or a vector otherwise} \author{Frank Harrell} \seealso{\code{\link{as.numeric}}} \examples{ all.is.numeric(c('1','1.2','3')) all.is.numeric(c('1','1.2','3a')) all.is.numeric(c('1','1.2','3'),'vector') all.is.numeric(c('1','1.2','3a'),'vector') all.is.numeric(c('1','',' .'),'vector') all.is.numeric(c('1', '1.2', '3a'), 'nonnum') } \keyword{character} Hmisc/man/translate.Rd0000644000176200001440000000271712250353207014362 0ustar liggesusers\name{translate} \alias{translate} \title{ Translate Vector or Matrix of Text Strings } \description{ Uses the UNIX tr command to translate any character in \code{old} in \code{text} to the corresponding character in \code{new}. If multichar=T or \code{old} and \code{new} have more than one element, or each have one element but they have different numbers of characters, uses the UNIX \code{sed} command to translate the series of characters in \code{old} to the series in \code{new} when these characters occur in \code{text}. If \code{old} or \code{new} contain a backslash, you sometimes have to quadruple it to make the UNIX command work. If they contain a forward slash, preceed it by two backslashes. Invokes the builtin chartr function if \code{multichar=FALSE}. } \usage{ translate(text, old, new, multichar=FALSE) } \arguments{ \item{text}{ scalar, vector, or matrix of character strings to translate. } \item{old}{ vector old characters } \item{new}{ corresponding vector of new characters } \item{multichar}{See above.} } \value{ an object like text but with characters translated } \seealso{grep} \examples{ translate(c("ABC","DEF"),"ABCDEFG", "abcdefg") translate("23.12","[.]","\\\\cdot ") # change . to \cdot translate(c("dog","cat","tiger"),c("dog","cat"),c("DOG","CAT")) # S-Plus gives [1] "DOG" "CAT" "tiger" - check discrepency translate(c("dog","cat2","snake"),c("dog","cat"),"animal") # S-Plus gives [1] "animal" "animal2" "snake" } \keyword{character} Hmisc/man/equalBins.Rd0000644000176200001440000000232712243661443014313 0ustar liggesusers\name{equalBins} \alias{equalBins} %- Also NEED an '\alias' for EACH other topic documented here. \title{Multicolumn Formating} \description{ Expands the width either supercolumns or the subcolumns so that the the sum of the supercolumn widths is the same as the sum of the subcolumn widths. } \usage{ equalBins(widths, subwidths) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{widths}{widths of the supercolumns.} \item{subwidths}{list of widths of the subcolumns for each supercolumn.} } \details{ This determins the correct subwidths of each of various columns in a table for printing. The correct width of the multicolumns is deterimed by summing the widths of it subcolumns. } \value{ widths of the the columns for a table. } \author{Charles Dupont} \seealso{\code{\link{nchar}}, \code{\link{stringDims}}} \examples{ mcols <- c("Group 1", "Group 2") mwidth <- nchar(mcols, type="width") spancols <- c(3,3) ccols <- c("a", "deer", "ad", "cat", "help", "bob") cwidth <- nchar(ccols, type="width") subwidths <- partition.vector(cwidth, spancols) equalBins(mwidth, subwidths) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{print} Hmisc/man/rcorr.cens.Rd0000644000176200001440000001116714275452467014463 0ustar liggesusers\name{rcorr.cens} \alias{rcorr.cens} \alias{rcorrcens} \alias{rcorrcens.formula} \title{ Rank Correlation for Censored Data } \description{ Computes the c index and the corresponding generalization of Somers' Dxy rank correlation for a censored response variable. Also works for uncensored and binary responses, although its use of all possible pairings makes it slow for this purpose. Dxy and c are related by \eqn{Dxy=2(c-0.5)}{Dxy = 2*(c - 0.5)}. \code{rcorr.cens} handles one predictor variable. \code{rcorrcens} computes rank correlation measures separately by a series of predictors. In addition, \code{rcorrcens} has a rough way of handling categorical predictors. If a categorical (factor) predictor has two levels, it is coverted to a numeric having values 1 and 2. If it has more than 2 levels, an indicator variable is formed for the most frequently level vs. all others, and another indicator for the second most frequent level and all others. The correlation is taken as the maximum of the two (in absolute value). } \usage{ rcorr.cens(x, S, outx=FALSE) \method{rcorrcens}{formula}(formula, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, outx=FALSE, \dots) } \arguments{ \item{x}{ a numeric predictor variable } \item{S}{ an \code{Surv} object or a vector. If a vector, assumes that every observation is uncensored. } \item{outx}{ set to \code{TRUE} to not count pairs of observations tied on \code{x} as a relevant pair. This results in a Goodman--Kruskal gamma type rank correlation. } \item{formula}{ a formula with a \code{Surv} object or a numeric vector on the left-hand side } \item{data, subset, na.action}{ the usual options for models. Default for \code{na.action} is to retain all values, NA or not, so that NAs can be deleted in only a pairwise fashion. } \item{exclude.imputed}{ set to \code{FALSE} to include imputed values (created by \code{impute}) in the calculations. } \item{\dots}{ extra arguments passed to \code{\link{biVar}}. } } \value{ \code{rcorr.cens} returns a vector with the following named elements: \code{C Index}, \code{Dxy}, \code{S.D.}, \code{n}, \code{missing}, \code{uncensored}, \code{Relevant Pairs}, \code{Concordant}, and \code{Uncertain} \item{n}{number of observations not missing on any input variables} \item{missing}{number of observations missing on \code{x} or \code{S}} \item{relevant}{number of pairs of non-missing observations for which \code{S} could be ordered} \item{concordant}{number of relevant pairs for which \code{x} and \code{S} are concordant.} \item{uncertain}{number of pairs of non-missing observations for which censoring prevents classification of concordance of \code{x} and \code{S}. } \code{rcorrcens.formula} returns an object of class \code{biVar} which is documented with the \code{\link{biVar}} function. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{somers2}}, \code{\link{biVar}}, \code{\link{rcorrp.cens}} } \references{ Newson R: Confidence intervals for rank statistics: Somers' D and extensions. Stata Journal 6:309-334; 2006. } \examples{ set.seed(1) x <- round(rnorm(200)) y <- rnorm(200) rcorr.cens(x, y, outx=TRUE) # can correlate non-censored variables library(survival) age <- rnorm(400, 50, 10) bp <- rnorm(400,120, 15) bp[1] <- NA d.time <- rexp(400) cens <- runif(400,.5,2) death <- d.time <= cens d.time <- pmin(d.time, cens) rcorr.cens(age, Surv(d.time, death)) r <- rcorrcens(Surv(d.time, death) ~ age + bp) r plot(r) # Show typical 0.95 confidence limits for ROC areas for a sample size # with 24 events and 62 non-events, for varying population ROC areas # Repeat for 138 events and 102 non-events set.seed(8) par(mfrow=c(2,1)) for(i in 1:2) { n1 <- c(24,138)[i] n0 <- c(62,102)[i] y <- c(rep(0,n0), rep(1,n1)) deltas <- seq(-3, 3, by=.25) C <- se <- deltas j <- 0 for(d in deltas) { j <- j + 1 x <- c(rnorm(n0, 0), rnorm(n1, d)) w <- rcorr.cens(x, y) C[j] <- w['C Index'] se[j] <- w['S.D.']/2 } low <- C-1.96*se; hi <- C+1.96*se print(cbind(C, low, hi)) errbar(deltas, C, C+1.96*se, C-1.96*se, xlab='True Difference in Mean X', ylab='ROC Area and Approx. 0.95 CI') title(paste('n1=',n1,' n0=',n0,sep='')) abline(h=.5, v=0, col='gray') true <- 1 - pnorm(0, deltas, sqrt(2)) lines(deltas, true, col='blue') } par(mfrow=c(1,1)) } \keyword{survival} \keyword{nonparametric} \concept{predictive accuracy} \concept{logistic regression model} % Converted by Sd2Rd version 1.21. Hmisc/man/plotlyM.Rd0000644000176200001440000001351113632313445014024 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotlyM.r \name{plotlyM} \alias{plotlyM} \title{plotly Multiple} \usage{ plotlyM( data, x = ~x, y = ~y, xhi = ~xhi, yhi = ~yhi, htext = NULL, multplot = NULL, strata = NULL, fitter = NULL, color = NULL, size = NULL, showpts = !length(fitter), rotate = FALSE, xlab = NULL, ylab = NULL, ylabpos = c("top", "y"), xlim = NULL, ylim = NULL, shareX = TRUE, shareY = FALSE, height = NULL, width = NULL, nrows = NULL, ncols = NULL, colors = NULL, alphaSegments = 1, alphaCline = 0.3, digits = 4, zeroline = TRUE ) } \arguments{ \item{data}{input data frame} \item{x}{formula specifying the x-axis variable} \item{y}{formula for y-axis variable} \item{xhi}{formula for upper x variable limits (\code{x} taken to be lower value)} \item{yhi}{formula for upper y variable limit (\code{y} taken to be lower value)} \item{htext}{formula for hovertext variable} \item{multplot}{formula specifying a variable in \code{data} that when stratified on produces a separate plot} \item{strata}{formula specifying an optional stratification variable} \item{fitter}{a fitting such as \code{loess} that comes with a \code{predict} method. Alternatively specify \code{fitter='ecdf'} to use an internal function for computing and displaying ECDFs, which moves the analysis variable from the y-axis to the x-axis} \item{color}{\code{plotly} formula specifying a color variable or e.g. \code{~ I('black')}. To keep colors constant over multiple plots you will need to specify an AsIs color when you don't have a variable representing color groups.} \item{size}{\code{plotly} formula specifying a symbol size variable or AsIs} \item{showpts}{if \code{fitter} is given, set to \code{TRUE} to show raw data points in addition to smooth fits} \item{rotate}{set to \code{TRUE} to reverse the roles of \code{x} and \code{y}, for example to get horizontal dot charts with error bars} \item{xlab}{x-axis label. May contain html.} \item{ylab}{a named vector of y-axis labels, possibly containing html (see example below). The names of the vector must correspond to levels of the \code{multplot} variable. \code{ylab} can be unnamed if \code{multplot} is not used.} \item{ylabpos}{position of y-axis labels. Default is on top left of plot. Specify \code{ylabpos='y'} for usual y-axis placement.} \item{xlim}{2-vector of x-axis limits, optional} \item{ylim}{2-vector of y-axis limits, optional} \item{shareX}{specifies whether x-axes should be shared when they align vertically over multiple plots} \item{shareY}{specifies whether y-axes should be shared when they align horizontally over multiple plots} \item{height}{height of the combined image in pixels} \item{width}{width of the combined image in pixels} \item{nrows}{the number of rows to produce using \code{subplot}} \item{ncols}{the number of columns to produce using \code{subplot} (specify at most one of \code{nrows,ncols})} \item{colors}{the color palette. Leave unspecified to use the default \code{plotly} palette} \item{alphaSegments}{alpha transparency for line segments (when \code{xhi} or \code{yhi} is not \code{NA})} \item{alphaCline}{alpha transparency for lines used to connect points} \item{digits}{number of significant digits to use in constructing hovertext} \item{zeroline}{set to \code{FALSE} to suppress vertical line at x=0} } \value{ \code{plotly} object produced by \code{subplot} } \description{ Generates multiple plotly graphics, driven by specs in a data frame } \details{ Generates multiple \code{plotly} traces and combines them with \code{plotly::subplot}. The traces are controlled by specifications in data frame \code{data} plus various arguments. \code{data} must contain these variables: \code{x}, \code{y}, and \code{tracename} (if \code{color} is not an "AsIs" color such as \code{~ I('black')}), and can contain these optional variables: \code{xhi}, \code{yhi} (rows containing \code{NA} for both \code{xhi} and \code{yhi} represent points, and those with non-\code{NA} \code{xhi} or \code{yhi} represent segments, \code{connect} (set to \code{TRUE} for rows for points, to connect the symbols), \code{legendgroup} (see \code{plotly} documentation), and \code{htext} (hovertext). If the \code{color} argument is given and it is not an "AsIs" color, the variable named in the \code{color} formula must also be in \code{data}. Likewise for \code{size}. If the \code{multplot} is given, the variable given in the formula must be in \code{data}. If \code{strata} is present, another level of separate plots is generated by levels of \code{strata}, within levels of \code{multplot}. If \code{fitter} is specified, x,y coordinates for an individual plot are run through \code{fitter}, and a line plot is made instead of showing data points. Alternatively you can specify \code{fitter='ecdf'} to compute and plot emirical cumulative distribution functions. } \examples{ \dontrun{ set.seed(1) pts <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'), yhi=NA, tracename='mean', legendgroup='mean', connect=TRUE, size=4) pts$y <- round(runif(nrow(pts)), 2) segs <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'), tracename='limits', legendgroup='limits', connect=NA, size=6) segs$y <- runif(nrow(pts)) segs$yhi <- segs$y + runif(nrow(pts), .05, .15) z <- rbind(pts, segs) xlab <- labelPlotmath('X12', 'm/sec2', html=TRUE) ylab <- c(y1=labelPlotmath('Y1', 'cm', html=TRUE), y2='Y2', y3=labelPlotmath('Y3', 'mm', html=TRUE)) W=plotlyM(z, multplot=~v, color=~g, xlab=xlab, ylab=ylab, ncols=2, colors=c('black', 'blue')) W2=plotlyM(z, multplot=~v, color=~I('black'), xlab=xlab, ylab=ylab, colors=c('black', 'blue')) } } \author{ Frank Harrell } Hmisc/man/addMarginal.Rd0000644000176200001440000000340113626750061014566 0ustar liggesusers\name{addMarginal} \alias{addMarginal} \title{Add Marginal Observations} \usage{ addMarginal(data, ..., label = "All", margloc=c('last', 'first'), nested) } \arguments{ \item{data}{a data frame} \item{\dots}{a list of names of variables to marginalize} \item{label}{category name for added marginal observations} \item{margloc}{location for marginal category within factor variable specifying categories. Set to \code{"first"} to override the default - to put a category with value \code{label} as the first category.} \item{nested}{a single unquoted variable name if used} } \description{ Given a data frame and the names of variable, doubles the data frame for each variable with a new category \code{"All"} by default, or by the value of \code{label}. A new variable \code{.marginal.} is added to the resulting data frame, with value \code{""} if the observation is an original one, and with value equal to the names of the variable being marginalized (separated by commas) otherwise. If there is another stratification variable besides the one in \dots, and that variable is nested inside the variable in \dots, specify \code{nested=variable name} to have the value of that variable set fo \code{label} whenever marginal observations are created for \dots. See the state-city example below. } \examples{ d <- expand.grid(sex=c('female', 'male'), country=c('US', 'Romania'), reps=1:2) addMarginal(d, sex, country) # Example of nested variables d <- data.frame(state=c('AL', 'AL', 'GA', 'GA', 'GA'), city=c('Mobile', 'Montgomery', 'Valdosto', 'Augusta', 'Atlanta'), x=1:5, stringsAsFactors=TRUE) addMarginal(d, state, nested=city) # cite set to 'All' when state is } \keyword{utilities} \keyword{manip} Hmisc/man/legendfunctions.Rd0000644000176200001440000000044612622411243015547 0ustar liggesusers\name{legendfunctions} \alias{legendfunctions} \alias{Key} \alias{sKey} \alias{Key2} \title{Legend Creation Functions} \description{ Wrapers to plot defined legend ploting functions } \usage{ Key(...) Key2(...) sKey(...) } \arguments{ \item{\dots}{arguments to pass to wrapped functions} } Hmisc/man/rcorrp.cens.Rd0000644000176200001440000001362614275452554014642 0ustar liggesusers\name{rcorrp.cens} \alias{rcorrp.cens} \alias{improveProb} \alias{print.improveProb} \title{ Rank Correlation for Paired Predictors with a Possibly Censored Response, and Integrated Discrimination Index } \description{ Computes U-statistics to test for whether predictor X1 is more concordant than predictor X2, extending \code{rcorr.cens}. For \code{method=1}, estimates the fraction of pairs for which the \code{x1} difference is more impressive than the \code{x2} difference. For \code{method=2}, estimates the fraction of pairs for which \code{x1} is concordant with \code{S} but \code{x2} is not. For binary responses the function \code{improveProb} provides several assessments of whether one set of predicted probabilities is better than another, using the methods describe in \cite{Pencina et al (2007)}. This involves NRI and IDI to test for whether predictions from model \code{x1} are significantly different from those obtained from predictions from model \code{x2}. This is a distinct improvement over comparing ROC areas, sensitivity, or specificity. } \usage{ rcorrp.cens(x1, x2, S, outx=FALSE, method=1) improveProb(x1, x2, y) \method{print}{improveProb}(x, digits=3, conf.int=.95, \dots) } \arguments{ \item{x1}{ first predictor (a probability, for \code{improveProb}) } \item{x2}{ second predictor (a probability, for \code{improveProb}) } \item{S}{ a possibly right-censored \code{\link[survival]{Surv}} object. If \code{S} is a vector instead, it is converted to a \code{\link[survival]{Surv}} object and it is assumed that no observations are censored. } \item{outx}{ set to \code{TRUE} to exclude pairs tied on \code{x1} or \code{x2} from consideration } \item{method}{ see above } \item{y}{ a binary 0/1 outcome variable } \item{x}{ the result from \code{improveProb} } \item{digits}{ number of significant digits for use in printing the result of \code{improveProb} } \item{conf.int}{ level for confidence limits } \item{\dots}{ unused } } \details{ If \code{x1},\code{x2} represent predictions from models, these functions assume either that you are using a separate sample from the one used to build the model, or that the amount of overfitting in \code{x1} equals the amount of overfitting in \code{x2}. An example of the latter is giving both models equal opportunity to be complex so that both models have the same number of effective degrees of freedom, whether a predictor was included in the model or was screened out by a variable selection scheme. Note that in the first part of their paper, \cite{Pencina et al.} presented measures that required binning the predicted probabilities. Those measures were then replaced with better continuous measures that are implementedhere. } \value{ a vector of statistics for \code{rcorrp.cens}, or a list with class \code{improveProb} of statistics for \code{improveProb}: \cr \item{n}{number of cases} \item{na}{number of events} \item{nb}{number of non-events} \item{pup.ev}{ mean of pairwise differences in probabilities for those with events and a pairwise difference of \eqn{\mbox{probabilities}>0} } \item{pup.ne}{ mean of pairwise differences in probabilities for those without events and a pairwise difference of \eqn{\mbox{probabilities}>0} } \item{pdown.ev}{ mean of pairwise differences in probabilities for those with events and a pairwise difference of \eqn{\mbox{probabilities}>0} } \item{pdown.ne}{ mean of pairwise differences in probabilities for those without events and a pairwise difference of \eqn{\mbox{probabilities}>0} } \item{nri}{ Net Reclassification Index = \eqn{(pup.ev-pdown.ev)-(pup.ne-pdown.ne)} } \item{se.nri}{standard error of NRI} \item{z.nri}{Z score for NRI} \item{nri.ev}{Net Reclassification Index = \eqn{pup.ev-pdown.ev}} \item{se.nri.ev}{SE of NRI of events} \item{z.nri.ev}{Z score for NRI of events} \item{nri.ne}{Net Reclassification Index = \eqn{pup.ne-pdown.ne}} \item{se.nri.ne}{SE of NRI of non-events} \item{z.nri.ne}{Z score for NRI of non-events} \item{improveSens}{improvement in sensitivity} \item{improveSpec}{improvement in specificity} \item{idi}{Integrated Discrimination Index} \item{se.idi}{SE of IDI} \item{z.idi}{Z score of IDI} } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} Scott Williams \cr Division of Radiation Oncology \cr Peter MacCallum Cancer Centre, Melbourne, Australia \cr \email{scott.williams@petermac.org} } \references{ Pencina MJ, D'Agostino Sr RB, D'Agostino Jr RB, Vasan RS (2008): Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond. Stat in Med 27:157-172. DOI: 10.1002/sim.2929 Pencina MJ, D'Agostino Sr RB, D'Agostino Jr RB, Vasan RS: Rejoinder: Comments on Integrated discrimination and net reclassification improvements-Practical advice. Stat in Med 2007; DOI: 10.1002/sim.3106 Pencina MJ, D'Agostino RB, Steyerberg EW (2011): Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Stat in Med 30:11-21; DOI: 10.1002/sim.4085 } \seealso{ \code{\link{rcorr.cens}}, \code{\link{somers2}}, \code{\link[survival]{Surv}}, \code{\link[rms]{val.prob}} } \examples{ set.seed(1) library(survival) x1 <- rnorm(400) x2 <- x1 + rnorm(400) d.time <- rexp(400) + (x1 - min(x1)) cens <- runif(400,.5,2) death <- d.time <= cens d.time <- pmin(d.time, cens) rcorrp.cens(x1, x2, Surv(d.time, death)) #rcorrp.cens(x1, x2, y) ## no censoring set.seed(1) x1 <- runif(1000) x2 <- runif(1000) y <- sample(0:1, 1000, TRUE) rcorrp.cens(x1, x2, y) improveProb(x1, x2, y) } \keyword{survival} \keyword{nonparametric} \keyword{regression} \concept{logistic regression model} \concept{predictive accuracy} Hmisc/man/subplot.Rd0000755000176200001440000000774612243661443014075 0ustar liggesusers\name{subplot} \alias{subplot} %- Also NEED an '\alias' for EACH other topic documented here. \title{Embed a new plot within an existing plot} \description{ Subplot will embed a new plot within an existing plot at the coordinates specified (in user units of the existing plot). } \usage{ subplot(fun, x, y, size=c(1,1), vadj=0.5, hadj=0.5, pars=NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{fun}{an expression or function defining the new plot to be embedded.} \item{x}{\code{x}-coordinate(s) of the new plot (in user coordinates of the existing plot).} \item{y}{\code{y}-coordinate(s) of the new plot, \code{x} and \code{y} can be specified in any of the ways understood by \code{xy.coords}.} \item{size}{The size of the embedded plot in inches if \code{x} and \code{y} have length 1.} \item{vadj}{vertical adjustment of the plot when \code{y} is a scalar, the default is to center vertically, 0 means place the bottom of the plot at \code{y}, 1 places the top of the plot at \code{y}.} \item{hadj}{horizontal adjustment of the plot when \code{x} is a scalar, the default is to center horizontally, 0 means place the left edge of the plot at \code{x}, and 1 means place the right edge of the plot at \code{x}.} \item{pars}{a list of parameters to be passed to \code{par} before running \code{fun}.} } \details{ The coordinates \code{x} and \code{y} can be scalars or vectors of length 2. If vectors of length 2 then they determine the opposite corners of the rectangle for the embedded plot (and the parameters \code{size}, \code{vadj}, and \code{hadj} are all ignored. If \code{x} and \code{y} are given as scalars then the plot position relative to the point and the size of the plot will be determined by the arguments \code{size}, \code{vadj}, and \code{hadj}. The default is to center a 1 inch by 1 inch plot at \code{x,y}. Setting \code{vadj} and \code{hadj} to \code{(0,0)} will position the lower left corner of the plot at \code{(x,y)}. The rectangle defined by \code{x}, \code{y}, \code{size}, \code{vadj}, and \code{hadj} will be used as the plotting area of the new plot. Any tick marks, axis labels, main and sub titles will be outside of this rectangle. Any graphical parameter settings that you would like to be in place before \code{fun} is evaluated can be specified in the \code{pars} argument (warning: specifying layout parameters here (\code{plt}, \code{mfrow}, etc.) may cause unexpected results). After the function completes the graphical parameters will have been reset to what they were before calling the function (so you can continue to augment the original plot). } \value{ An invisible list with the graphical parameters that were in effect when the subplot was created. Passing this list to \code{par} will enable you to augment the embedded plot. } %\references{ ~put references to the literature/web site here ~ } \author{Greg Snow \email{greg.snow@imail.org}} %\note{ ~~further notes~~ } % ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{cnvrt.coords}}, \code{\link{par}}, \code{\link{symbols}}} \examples{ # make an original plot plot( 11:20, sample(51:60) ) # add some histograms subplot( hist(rnorm(100)), 15, 55) subplot( hist(runif(100),main='',xlab='',ylab=''), 11, 51, hadj=0, vadj=0) subplot( hist(rexp(100, 1/3)), 20, 60, hadj=1, vadj=1, size=c(0.5,2) ) subplot( hist(rt(100,3)), c(12,16), c(57,59), pars=list(lwd=3,ask=FALSE) ) tmp <- rnorm(25) qqnorm(tmp) qqline(tmp) tmp2 <- subplot( hist(tmp,xlab='',ylab='',main=''), cnvrt.coords(0.1,0.9,'plt')$usr, vadj=1, hadj=0 ) abline(v=0, col='red') # wrong way to add a reference line to histogram # right way to add a reference line to histogram op <- par(no.readonly=TRUE) par(tmp2) abline(v=0, col='green') par(op) } \keyword{aplot}% at least one, from doc/KEYWORDS \keyword{dplot} Hmisc/man/event.convert.Rd0000644000176200001440000000343312243661443015167 0ustar liggesusers\name{event.convert} \alias{event.convert} \title{ Event Conversion for Time-to-Event Data } \description{ Convert a two-column data matrix with event time and event code into multiple column event time with one event in each column } \usage{ event.convert(data2, event.time = 1, event.code = 2) } \arguments{ \item{data2}{ a matrix or dataframe with at least 2 columns; by default, the first column contains the event time and the second column contains the k event codes (e.g. 1=dead, 0=censord) } \item{event.time}{ the column number in data contains the event time } \item{event.code}{ the column number in data contains the event code } } \details{ In the survival analysis, the data typically come in two columns: one column containing survival time and the other containing censoring indicator or event code. The \code{event.convert} function converts this type of data into multiple columns of event times, one column of each event type, suitable for the \code{event.chart} function. } \author{ J. Jack Lee and Kenneth R. Hess \cr Department of Biostatistics \cr University of Texas \cr M.D. Anderson Cancer Center \cr Houston, TX 77030 \cr \email{jjlee@mdanderson.org}, \email{khess@mdanderson.org} Joel A. Dubin \cr Department of Statistics \cr University of Waterloo \cr \email{jdubin@uwaterloo.ca} } \seealso{ \code{\link{event.history}}, \code{\link{Date}}, \code{\link{event.chart}} } \examples{ # To convert coded time-to-event data, then, draw an event chart: surv.time <- c(5,6,3,1,2) cens.ind <- c(1,0,1,1,0) surv.data <- cbind(surv.time,cens.ind) event.data <- event.convert(surv.data) event.chart(cbind(rep(0,5),event.data),x.julian=TRUE,x.reference=1) } \keyword{hplot} \keyword{survival} Hmisc/man/summaryP.Rd0000644000176200001440000003256513714234042014207 0ustar liggesusers\name{summaryP} \alias{summaryP} \alias{plot.summaryP} \alias{ggplot.summaryP} \alias{latex.summaryP} \title{Multi-way Summary of Proportions} \description{ \code{summaryP} produces a tall and thin data frame containing numerators (\code{freq}) and denominators (\code{denom}) after stratifying the data by a series of variables. A special capability to group a series of related yes/no variables is included through the use of the \code{\link{ynbind}} function, for which the user specials a final argument \code{label} used to label the panel created for that group of related variables. If \code{options(grType='plotly')} is not in effect, the \code{plot} method for \code{summaryP} displays proportions as a multi-panel dot chart using the \code{lattice} package's \code{dotplot} function with a special \code{panel} function. Numerators and denominators of proportions are also included as text, in the same colors as used by an optional \code{groups} variable. The \code{formula} argument used in the \code{dotplot} call is constructed, but the user can easily reorder the variables by specifying \code{formula}, with elements named \code{val} (category levels), \code{var} (classification variable name), \code{freq} (calculated result) plus the overall cross-classification variables excluding \code{groups}. If \code{options(grType='plotly')} is in effect, the \code{plot} method makes an entirely different display using \code{Hmisc::dotchartpl} with code{plotly} if \code{marginVal} is specified, whereby a stratification variable causes more finely stratified estimates to be shown slightly below the lines, with smaller and translucent symbols if \code{data} has been run through \code{addMarginal}. The marginal summaries are shown as the main estimates and the user can turn off display of the stratified estimates, or view their details with hover text. The \code{ggplot} method for \code{summaryP} does not draw numerators and denominators but the chart is more compact than using the \code{plot} method with base graphics because \code{ggplot2} does not repeat category names the same way as \code{lattice} does. Variable names that are too long to fit in panel strips are renamed (1), (2), etc. and an attribute \code{"fnvar"} is added to the result; this attribute is a character string defining the abbreviations, useful in a figure caption. The \code{ggplot2} object has \code{label}s for points plotted, used by \code{plotly::ggplotly} as hover text (see example). The \code{latex} method produces one or more LaTeX \code{tabular}s containing a table representation of the result, with optional side-by-side display if \code{groups} is specified. Multiple \code{tabular}s result from the presence of non-group stratification factors. } \usage{ summaryP(formula, data = NULL, subset = NULL, na.action = na.retain, sort=TRUE, asna = c("unknown", "unspecified"), \dots) \method{plot}{summaryP}(x, formula=NULL, groups=NULL, marginVal=NULL, marginLabel=marginVal, refgroup=NULL, exclude1=TRUE, xlim = c(-.05, 1.05), text.at=NULL, cex.values = 0.5, key = list(columns = length(groupslevels), x = 0.75, y = -0.04, cex = 0.9, col = trellis.par.get('superpose.symbol')$col, corner=c(0,1)), outerlabels=TRUE, autoarrange=TRUE, col=colorspace::rainbow_hcl, \dots) \method{ggplot}{summaryP}(data, mapping, groups=NULL, exclude1=TRUE, xlim=c(0, 1), col=NULL, shape=NULL, size=function(n) n ^ (1/4), sizerange=NULL, abblen=5, autoarrange=TRUE, addlayer=NULL, \dots, environment) \method{latex}{summaryP}(object, groups=NULL, exclude1=TRUE, file='', round=3, size=NULL, append=TRUE, \dots) } \arguments{ \item{formula}{a formula with the variables for whose levels proportions are computed on the left hand side, and major classification variables on the right. The formula need to include any variable later used as \code{groups}, as the data summarization does not distinguish between superpositioning and paneling. For the plot method, \code{formula} can provide an overall to the default formula for \code{dotplot()}.} \item{data}{an optional data frame. For \code{ggplot.summaryP} \code{data} is the result of \code{summaryP}.} \item{subset}{an optional subsetting expression or vector} \item{na.action}{function specifying how to handle \code{NA}s. The default is to keep all \code{NA}s in the analysis frame.} \item{sort}{set to \code{FALSE} to not sort category levels in descending order of global proportions} \item{asna}{character vector specifying level names to consider the same as \code{NA}. Set \code{asna=NULL} to not consider any.} \item{x}{an object produced by \code{summaryP}} \item{groups}{a character string containing the name of a superpositioning variable for obtaining further stratification within a horizontal line in the dot chart.} \item{marginVal}{if \code{options(grType='plotly')} is in effect and the data given to \code{summaryP} were run through \code{addMarginal}, specifies the category name that represents marginal summaries (usually \code{"All"}).} \item{marginLabel}{specifies a different character string to use than the value of \code{marginVal}. For example, if marginal proportions were computed over all \code{region}s, one may specify \code{marginVal="All", marginLabel="All Regions"}. \code{marginLabel} is only used for formatting graphical output.} \item{refgroup}{used when doing a \code{plotly} chart and a two-level group variable was used, resulting in the half-width confidence interval for the difference in two proportions to be shown, and the actual confidence limits and the difference added to hover text. See \code{dotchartpl} for more details.} \item{exclude1}{By default, \code{ggplot}, \code{plot}, and \code{latex} methods for \code{summaryP} remove redundant entries from tables for variables with only two levels. For example, if you print the proportion of females, you don't need to print the proportion of males. To override this, set \code{exclude1=FALSE}.} \item{xlim}{\code{x}-axis limits. Default is \code{c(0,1)}.} \item{text.at}{specify to leave unused space to the right of each panel to prevent numerators and denominators from touching data points. \code{text.at} is the upper limit for scaling panels' \code{x}-axes but tick marks are only labeled up to \code{max(xlim)}.} \item{cex.values}{character size to use for plotting numerators and denominators} \item{key}{a list to pass to the \code{auto.key} argument of \code{dotplot}. To place a key above the entire chart use \code{auto.key=list(columns=2)} for example.} \item{outerlabels}{by default if there are two conditioning variables besides \code{groups}, the \code{latticeExtra} package's \code{useOuterStrips} function is used to put strip labels in the margins, usually resulting in a much prettier chart. Set to \code{FALSE} to prevent usage of \code{useOuterStrips}.} \item{autoarrange}{If \code{TRUE}, the formula is re-arranged so that if there are two conditioning (paneling) variables, the variable with the most levels is taken as the vertical condition.} \item{col}{a vector of colors to use to override defaults in \code{ggplot}. When \code{options(grType='plotly')}, see \code{dotchartpl}.} \item{shape}{a vector of plotting symbols to override \code{ggplot} defaults} \item{mapping, environment}{not used; needed because of rules for generics} \item{size}{for \code{ggplot}, a function that transforms denominators into metrics used for the \code{size} aesthetic. Default is the fourth root function so that the area of symbols is proportional to the square root of sample size. Specify \code{NULL} to not vary point sizes. \code{size=sqrt} is a reasonable alternative. Set \code{size} to an integer to categorize the denominators into \code{size} quantile groups using \code{cut2}. Unless \code{size} is an integer, the legend for sizes uses the minimum and maximum denominators and 6-tiles using \code{quantile(..., type=1)} so that actually occurring sample sizes are used as labels. \code{size} is overridden to \code{NULL} if the range in denominators is less than 10 or the ratio of the maximum to the minimum is less than 1.2. For \code{latex}, \code{size} is an optional font size such as \code{"small"}} \item{sizerange}{a 2-vector specifying the \code{range} argument to the \code{ggplot2} \code{scale_size_...} function, which is the range of sizes allowed for the points according to the denominator. The default is \code{sizerange=c(.7, 3.25)} but the lower limit is increased according to the ratio of maximum to minimum sample sizes.} \item{abblen}{labels of variables having only one level and having their name longer than \code{abblen} characters are abbreviated and documented in \code{fnvar} (described elsewhere here). The default \code{abblen=5} is good for labels plotted vertically. If labels are rotated using \code{theme} a better value would be 12.} \item{\dots}{used only for \code{plotly} graphics and these arguments are passed to \code{dotchartpl}} \item{object}{an object produced by \code{summaryP}} \item{file}{file name, defaults to writing to console} \item{round}{number of digits to the right of the decimal place for proportions} \item{append}{set to \code{FALSE} to start output over} \item{addlayer}{a \code{ggplot} layer to add to the plot object} } \value{\code{summaryP} produces a data frame of class \code{"summaryP"}. The \code{plot} method produces a \code{lattice} object of class \code{"trellis"}. The \code{latex} method produces an object of class \code{"latex"} with an additional attribute \code{ngrouplevels} specifying the number of levels of any \code{groups} variable and an attribute \code{nstrata} specifying the number of strata. } \author{Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com}} \seealso{\code{\link{bpplotM}}, \code{\link{summaryM}}, \code{\link{ynbind}}, \code{\link{pBlock}}, \code{\link[ggplot2]{ggplot}}, \code{\link{colorFacet}} } \examples{ n <- 100 f <- function(na=FALSE) { x <- sample(c('N', 'Y'), n, TRUE) if(na) x[runif(100) < .1] <- NA x } set.seed(1) d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE), age=rnorm(n, 50, 10), race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex=sample(c('Female', 'Male'), n, TRUE), treat=sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE)) d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines', x5='Pregnant', x6='Other event', x7='MD withdrawal', race='Race', sex='Sex')) dasna <- subset(d, region=='North America') with(dasna, table(race, treat)) s <- summaryP(race + sex + ynbind(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~ region + treat, data=d) # add exclude1=FALSE below to include female category plot(s, groups='treat') ggplot(s, groups='treat') plot(s, val ~ freq | region * var, groups='treat', outerlabels=FALSE) # Much better looking if omit outerlabels=FALSE; see output at # https://hbiostat.org/R/Hmisc/summaryFuns.pdf # See more examples under bpplotM ## For plotly interactive graphic that does not handle variable size ## panels well: ## require(plotly) ## g <- ggplot(s, groups='treat') ## ggplotly(g, tooltip='text') ## For nice plotly interactive graphic: ## options(grType='plotly') ## s <- summaryP(race + sex + ynbind(x1, x2, x3, x4, x5, x6, x7, ## label='Exclusions') ~ ## treat, data=subset(d, region='Europe')) ## ## plot(s, groups='treat', refgroup='A') # refgroup='A' does B-A differences # Make a chart where there is a block of variables that # are only analyzed for males. Keep redundant sex in block for demo. # Leave extra space for numerators, denominators sb <- summaryP(race + sex + pBlock(race, sex, label='Race: Males', subset=sex=='Male') ~ region, data=d) plot(sb, text.at=1.3) plot(sb, groups='region', layout=c(1,3), key=list(space='top'), text.at=1.15) ggplot(sb, groups='region') \dontrun{ plot(s, groups='treat') # plot(s, groups='treat', outerlabels=FALSE) for standard lattice output plot(s, groups='region', key=list(columns=2, space='bottom')) colorFacet(ggplot(s)) plot(summaryP(race + sex ~ region, data=d), exclude1=FALSE, col='green') # Make your own plot using data frame created by summaryP useOuterStrips(dotplot(val ~ freq | region * var, groups=treat, data=s, xlim=c(0,1), scales=list(y='free', rot=0), xlab='Fraction', panel=function(x, y, subscripts, ...) { denom <- s$denom[subscripts] x <- x / denom panel.dotplot(x=x, y=y, subscripts=subscripts, ...) })) # Show marginal summary for all regions combined s <- summaryP(race + sex ~ region, data=addMarginal(d, region)) plot(s, groups='region', key=list(space='top'), layout=c(1,2)) # Show marginal summaries for both race and sex s <- summaryP(ynbind(x1, x2, x3, x4, label='Exclusions', sort=FALSE) ~ race + sex, data=addMarginal(d, race, sex)) plot(s, val ~ freq | sex*race) } } \keyword{hplot} \keyword{category} \keyword{manip} \concept{grouping} \concept{stratification} \concept{aggregation} \concept{cross-classification} Hmisc/man/format.df.Rd0000644000176200001440000001730214027243321014240 0ustar liggesusers\name{format.df} \alias{format.df} \title{ Format a Data Frame or Matrix for LaTeX or HTML } \description{ \code{format.df} does appropriate rounding and decimal alignment, and outputs a character matrix containing the formatted data. If \code{x} is a \code{data.frame}, then do each component separately. If \code{x} is a matrix, but not a data.frame, make it a data.frame with individual components for the columns. If a component \code{x$x} is a matrix, then do all columns the same. } \usage{ format.df(x, digits, dec=NULL, rdec=NULL, cdec=NULL, numeric.dollar=!dcolumn, na.blank=FALSE, na.dot=FALSE, blank.dot=FALSE, col.just=NULL, cdot=FALSE, dcolumn=FALSE, matrix.sep=' ', scientific=c(-4,4), math.row.names=FALSE, already.math.row.names=FALSE, math.col.names=FALSE, already.math.col.names=FALSE, double.slash=FALSE, format.Date="\%m/\%d/\%Y", format.POSIXt="\%m/\%d/\%Y \%H:\%M:\%OS", \dots) } \arguments{ \item{x}{ a matrix (usually numeric) or data frame } \item{digits}{ causes all values in the table to be formatted to \code{digits} significant digits. \code{dec} is usually preferred. } \item{dec}{ If \code{dec} is a scalar, all elements of the matrix will be rounded to \code{dec} decimal places to the right of the decimal. \code{dec} can also be a matrix whose elements correspond to \code{x}, for customized rounding of each element. A matrix \code{dec} must have number of columns equal to number of columns of input \code{x}. A scalar \code{dec} is expanded to a vector \code{cdec} with number of items equal to number of columns of input \code{x}. } \item{rdec}{ a vector specifying the number of decimal places to the right for each row (\code{cdec} is more commonly used than \code{rdec}) A vector \code{rdec} must have number of items equal to number of rows of input \code{x}. \code{rdec} is expanded to matrix \code{dec}. } \item{cdec}{ a vector specifying the number of decimal places for each column. The vector must have number of items equal to number of columns or components of input x. } \item{cdot}{ Set to \code{TRUE} to use centered dots rather than ordinary periods in numbers. The output uses a syntax appropriate for \code{latex}. } \item{na.blank}{ Set to \code{TRUE} to use blanks rather than \code{NA} for missing values. This usually looks better in \code{latex}. } \item{dcolumn}{ Set to \code{TRUE} to use David Carlisle's dcolumn style for decimal alignment in \code{latex}. Default is \code{FALSE}. You will probably want to use \code{dcolumn} if you use \code{rdec}, as a column may then contain varying number of places to the right of the decimal. \code{dcolumn} can line up all such numbers on the decimal point, with integer values right justified at the decimal point location of numbers that actually contain decimal places. When you use \code{dcolumn = TRUE}, \code{numeric.dollar} is set by default to \code{FALSE}. When you use \code{dcolumn = TRUE}, the object attribute \code{"style"} set to \samp{dcolumn} as the \code{latex} \code{usepackage} must reference \code{[dcolumn]}. The three files \file{dcolumn.sty}, \file{newarray.sty}, and \file{array.sty} will need to be in a directory in your \env{TEXINPUTS} path. When you use \code{dcolumn=TRUE}, \code{numeric.dollar} should be set to \code{FALSE}. } \item{numeric.dollar}{ logical, default \code{!dcolumn}. Set to \code{TRUE} to place dollar signs around numeric values when \code{dcolumn = FALSE}. This assures that \code{latex} will use minus signs rather than hyphens to indicate negative numbers. Set to \code{FALSE} when \code{dcolumn = TRUE}, as \code{dcolumn.sty} automatically uses minus signs. } \item{math.row.names}{ logical, set true to place dollar signs around the row names. } \item{already.math.row.names}{set to \code{TRUE} to prevent any math mode changes to row names} \item{math.col.names}{ logical, set true to place dollar signs around the column names. } \item{already.math.col.names}{set to \code{TRUE} to prevent any math mode changes to column names} \item{na.dot}{ Set to \code{TRUE} to use periods rather than \code{NA} for missing numeric values. This works with the \acronym{SAS} convention that periods indicate missing values. } \item{blank.dot}{ Set to \code{TRUE} to use periods rather than blanks for missing character values. This works with the \acronym{SAS} convention that periods indicate missing values. } \item{col.just}{ Input vector \code{col.just} must have number of columns equal to number of columns of the output matrix. When \code{NULL}, the default, the \code{col.just} attribute of the result is set to \samp{l} for character columns and to \samp{r} for numeric columns. The user can override the default by an argument vector whose length is equal to the number of columns of the result matrix. When \code{format.df} is called by \code{latex.default}, the \code{col.just} is used as the \code{cols} argument to the \code{tabular} environment and the letters \samp{l}, \samp{r}, and \samp{c} are valid values. When \code{format.df} is called by \acronym{SAS}, the \code{col.just} is used to determine whether a \samp{\$} is needed on the \samp{input} line of the \file{sysin} file, and the letters \samp{l} and \samp{r} are valid values. You can pass specifications other than \code{l,r,c} in \code{col.just}, e.g., \code{"p{3in}"} to get paragraph-formatted columns from \code{latex()}. } \item{matrix.sep}{ When \code{x} is a data frame containing a matrix, so that new column names are constructed from the name of the matrix object and the names of the individual columns of the matrix, \code{matrix.sep} specifies the character to use to separate object names from individual column names. } \item{scientific}{ specifies ranges of exponents (or a logical vector) specifying values not to convert to scientific notation. See \code{format.default} for details. } \item{double.slash}{ should escaping backslashes be themselves escaped. } \item{format.Date}{ String used to format objects of the Date class. } \item{format.POSIXt}{ String used to format objects of the POSIXt class. } \item{\dots}{ other arguments are accepted and passed to \code{format.default}. For \code{latexVerbatim} these arguments are passed to the \code{print} function. } } \value{ a character matrix with character images of properly rounded \code{x}. Matrix components of input \code{x} are now just sets of columns of character matrix. Object attribute\code{"col.just"} repeats the value of the argument \code{col.just} when provided, otherwise, it includes the recommended justification for columns of output. See the discussion of the argument \code{col.just}. The default justification is \samp{l} for characters and factors, \samp{r} for numeric. When \code{dcolumn==TRUE}, numerics will have \samp{.} as the justification character. } \author{ Frank E. Harrell, Jr., \cr Department of Biostatistics, \cr Vanderbilt University, \cr \email{fh@fharrell.com} Richard M. Heiberger, \cr Department of Statistics, \cr Temple University, Philadelphia, PA. \cr \email{rmh@temple.edu} } \seealso{ \code{\link{latex}} } \examples{ \dontrun{ x <- data.frame(a=1:2, b=3:4) x$m <- 10000*matrix(5:8,nrow=2) names(x) dim(x) x format.df(x, big.mark=",") dim(format.df(x)) } } \keyword{utilities} \keyword{interface} \keyword{methods} \keyword{file} \keyword{character} \keyword{manip} Hmisc/man/Ecdf.Rd0000644000176200001440000002364613714234051013233 0ustar liggesusers\name{Ecdf} \alias{Ecdf} \alias{Ecdf.default} \alias{Ecdf.data.frame} \alias{Ecdf.formula} \alias{panel.Ecdf} \alias{prepanel.Ecdf} \title{Empirical Cumulative Distribution Plot} \description{ Computes coordinates of cumulative distribution function of x, and by defaults plots it as a step function. A grouping variable may be specified so that stratified estimates are computed and (by default) plotted. If there is more than one group, the \code{labcurve} function is used (by default) to label the multiple step functions or to draw a legend defining line types, colors, or symbols by linking them with group labels. A \code{weights} vector may be specified to get weighted estimates. Specify \code{normwt} to make \code{weights} sum to the length of \code{x} (after removing NAs). Other wise the total sample size is taken to be the sum of the weights. \code{Ecdf} is actually a method, and \code{Ecdf.default} is what's called for a vector argument. \code{Ecdf.data.frame} is called when the first argument is a data frame. This function can automatically set up a matrix of ECDFs and wait for a mouse click if the matrix requires more than one page. Categorical variables, character variables, and variables having fewer than a set number of unique values are ignored. If \code{par(mfrow=..)} is not set up before \code{Ecdf.data.frame} is called, the function will try to figure the best layout depending on the number of variables in the data frame. Upon return the original \code{mfrow} is left intact. When the first argument to \code{Ecdf} is a formula, a Trellis/Lattice function \code{Ecdf.formula} is called. This allows for multi-panel conditioning, superposition using a \code{groups} variable, and other Trellis features, along with the ability to easily plot transformed ECDFs using the \code{fun} argument. For example, if \code{fun=qnorm}, the inverse normal transformation will be used for the y-axis. If the transformed curves are linear this indicates normality. Like the \code{xYplot} function, \code{Ecdf} will create a function \code{Key} if the \code{groups} variable is used. This function can be invoked by the user to define the keys for the groups. } \usage{ Ecdf(x, \dots) \method{Ecdf}{default}(x, what=c('F','1-F','f','1-f'), weights=rep(1, length(x)), normwt=FALSE, xlab, ylab, q, pl=TRUE, add=FALSE, lty=1, col=1, group=rep(1,length(x)), label.curves=TRUE, xlim, subtitles=TRUE, datadensity=c('none','rug','hist','density'), side=1, frac=switch(datadensity,none=NA,rug=.03,hist=.1,density=.1), dens.opts=NULL, lwd=1, log='', \dots) \method{Ecdf}{data.frame}(x, group=rep(1,nrows), weights=rep(1, nrows), normwt=FALSE, label.curves=TRUE, n.unique=10, na.big=FALSE, subtitles=TRUE, vnames=c('labels','names'),\dots) \method{Ecdf}{formula}(x, data=sys.frame(sys.parent()), groups=NULL, prepanel=prepanel.Ecdf, panel=panel.Ecdf, \dots, xlab, ylab, fun=function(x)x, what=c('F','1-F','f','1-f'), subset=TRUE) } \arguments{ \item{x}{a numeric vector, data frame, or Trellis/Lattice formula} \item{what}{ The default is \code{"F"} which results in plotting the fraction of values <= x. Set to \code{"1-F"} to plot the fraction > x or \code{"f"} to plot the cumulative frequency of values <= x. Use \code{"1-f"} to plot the cumulative frequency of values >= x. } \item{weights}{ numeric vector of weights. Omit or specify a zero-length vector or NULL to get unweighted estimates. } \item{normwt}{see above} \item{xlab}{ x-axis label. Default is label(x) or name of calling argument. For \code{Ecdf.formula}, \code{xlab} defaults to the \code{label} attribute of the x-axis variable. } \item{ylab}{ y-axis label. Default is \code{"Proportion <= x"}, \code{"Proportion > x"}, or "Frequency <= x" depending on value of \code{what}. } \item{q}{ a vector for quantiles for which to draw reference lines on the plot. Default is not to draw any. } \item{pl}{set to F to omit the plot, to just return estimates} \item{add}{ set to TRUE to add the cdf to an existing plot. Does not apply if using lattice graphics (i.e., if a formula is given as the first argument). } \item{lty}{ integer line type for plot. If \code{group} is specified, this can be a vector. } \item{lwd}{ line width for plot. Can be a vector corresponding to \code{group}s. } \item{log}{ see \code{\link{plot}}. Set \code{log='x'} to use log scale for \code{x}-axis. } \item{col}{ color for step function. Can be a vector. } \item{group}{ a numeric, character, or \code{factor} categorical variable used for stratifying estimates. If \code{group} is present, as many ECDFs are drawn as there are non--missing group levels. } \item{label.curves}{ applies if more than one \code{group} exists. Default is \code{TRUE} to use \code{labcurve} to label curves where they are farthest apart. Set \code{label.curves} to a \code{list} to specify options to \code{labcurve}, e.g., \code{label.curves=list(method="arrow", cex=.8)}. These option names may be abbreviated in the usual way arguments are abbreviated. Use for example \code{label.curves=list(keys=1:5)} to draw symbols periodically (as in \code{pch=1:5} - see \code{points}) on the curves and automatically position a legend in the most empty part of the plot. Set \code{label.curves=FALSE} to suppress drawing curve labels. The \code{col}, \code{lty}, and \code{type} parameters are automatically passed to \code{labcurve}, although you can override them here. You can set \code{label.curves=list(keys="lines")} to have different line types defined in an automatically positioned key. } \item{xlim}{ x-axis limits. Default is entire range of \code{x}. } \item{subtitles}{ set to \code{FALSE} to suppress putting a subtitle at the bottom left of each plot. The subtitle indicates the numbers of non-missing and missing observations, which are labeled \code{n}, \code{m}. } \item{datadensity}{ If \code{datadensity} is not \code{"none"}, either \code{scat1d} or \code{histSpike} is called to add a rug plot (\code{datadensity="rug"}), spike histogram (\code{datadensity="hist"}), or smooth density estimate (\code{"density"}) to the bottom or top of the ECDF. } \item{side}{ If \code{datadensity} is not \code{"none"}, the default is to place the additional information on top of the x-axis (\code{side=1}). Use \code{side=3} to place at the top of the graph. } \item{frac}{ passed to \code{histSpike} } \item{dens.opts}{ a list of optional arguments for \code{histSpike} } \item{...}{ other parameters passed to plot if add=F. For data frames, other parameters to pass to \code{Ecdf.default}. For \code{Ecdf.formula}, if \code{groups} is not used, you can also add data density information to each panel's ECDF by specifying the \code{datadensity} and optional \code{frac}, \code{side}, \code{dens.opts} arguments. } \item{n.unique}{ minimum number of unique values before an ECDF is drawn for a variable in a data frame. Default is 10. } \item{na.big}{ set to \code{TRUE} to draw the number of NAs in larger letters in the middle of the plot for \code{Ecdf.data.frame} } \item{vnames}{ By default, variable labels are used to label x-axes. Set \code{vnames="names"} to instead use variable names. } \item{method}{ method for computing the empirical cumulative distribution. See \code{wtd.Ecdf}. The default is to use the standard \code{"i/n"} method as is used by the non-Trellis versions of \code{Ecdf}. } \item{fun}{ a function to transform the cumulative proportions, for the Trellis-type usage of \code{Ecdf} } \item{data, groups, subset,prepanel, panel}{the usual Trellis/Lattice parameters, with \code{groups} causing \code{Ecdf.formula} to overlay multiple ECDFs on one panel.} } \value{ for \code{Ecdf.default} an invisible list with elements x and y giving the coordinates of the cdf. If there is more than one \code{group}, a list of such lists is returned. An attribute, \code{N}, is in the returned object. It contains the elements \code{n} and \code{m}, the number of non-missing and missing observations, respectively. } \author{ Frank Harrell \cr Department of Biostatistics, Vanderbilt University \cr \email{fh@fharrell.com} } \section{Side Effects}{ plots } \seealso{ \code{\link{wtd.Ecdf}}, \code{\link{label}}, \code{\link{table}}, \code{\link{cumsum}}, \code{\link{labcurve}}, \code{\link{xYplot}}, \code{\link{histSpike}} } \examples{ set.seed(1) ch <- rnorm(1000, 200, 40) Ecdf(ch, xlab="Serum Cholesterol") scat1d(ch) # add rug plot histSpike(ch, add=TRUE, frac=.15) # add spike histogram # Better: add a data density display automatically: Ecdf(ch, datadensity='density') label(ch) <- "Serum Cholesterol" Ecdf(ch) other.ch <- rnorm(500, 220, 20) Ecdf(other.ch,add=TRUE,lty=2) sex <- factor(sample(c('female','male'), 1000, TRUE)) Ecdf(ch, q=c(.25,.5,.75)) # show quartiles Ecdf(ch, group=sex, label.curves=list(method='arrow')) # Example showing how to draw multiple ECDFs from paired data pre.test <- rnorm(100,50,10) post.test <- rnorm(100,55,10) x <- c(pre.test, post.test) g <- c(rep('Pre',length(pre.test)),rep('Post',length(post.test))) Ecdf(x, group=g, xlab='Test Results', label.curves=list(keys=1:2)) # keys=1:2 causes symbols to be drawn periodically on top of curves # Draw a matrix of ECDFs for a data frame m <- data.frame(pre.test, post.test, sex=sample(c('male','female'),100,TRUE)) Ecdf(m, group=m$sex, datadensity='rug') freqs <- sample(1:10, 1000, TRUE) Ecdf(ch, weights=freqs) # weighted estimates # Trellis/Lattice examples: region <- factor(sample(c('Europe','USA','Australia'),100,TRUE)) year <- factor(sample(2001:2002,1000,TRUE)) Ecdf(~ch | region*year, groups=sex) Key() # draw a key for sex at the default location # Key(locator(1)) # user-specified positioning of key age <- rnorm(1000, 50, 10) Ecdf(~ch | equal.count(age), groups=sex) # use overlapping shingles Ecdf(~ch | sex, datadensity='hist', side=3) # add spike histogram at top } \keyword{nonparametric} \keyword{hplot} \keyword{methods} \keyword{distribution} \concept{trellis} \concept{lattice} Hmisc/man/first.word.Rd0000644000176200001440000000236214027243250014462 0ustar liggesusers\name{first.word} \alias{first.word} \title{First Word in a String or Expression} \description{ \code{first.word} finds the first word in an expression. A word is defined by unlisting the elements of the expression found by the S parser and then accepting any elements whose first character is either a letter or period. The principal intended use is for the automatic generation of temporary file names where it is important to exclude special characters from the file name. For Microsoft Windows, periods in names are deleted and only up to the first 8 characters of the word is returned. } \usage{ first.word(x, i=1, expr=substitute(x)) } \arguments{ \item{x}{ any scalar character string } \item{i}{ word number, default value = 1. Used when the second or \code{i}th word is wanted. Currently only the \code{i=1} case is implemented. } \item{expr}{ any S object of mode \code{expression}. } } \value{ a character string } \author{ Frank E. Harrell, Jr., \cr Department of Biostatistics, \cr Vanderbilt University, \cr \email{fh@fharrell.com} Richard M. Heiberger, \cr Department of Statistics, \cr Temple University, Philadelphia, PA. \cr \email{rmh@temple.edu} } \examples{ first.word(expr=expression(y ~ x + log(w))) } \keyword{character} \keyword{manip} Hmisc/man/summaryM.Rd0000644000176200001440000006130014370731135014175 0ustar liggesusers\name{summaryM} \alias{summaryM} \alias{print.summaryM} \alias{plot.summaryM} \alias{latex.summaryM} \alias{html.summaryM} \alias{printsummaryM} \title{Summarize Mixed Data Types vs. Groups} \description{ \code{summaryM} summarizes the variables listed in an S formula, computing descriptive statistics and optionally statistical tests for group differences. This function is typically used when there are multiple left-hand-side variables that are independently against by groups marked by a single right-hand-side variable. The summary statistics may be passed to \code{print} methods, \code{plot} methods for making annotated dot charts and extended box plots, and \code{latex} methods for typesetting tables using LaTeX. The \code{html} method uses \code{htmlTable::htmlTable} to typeset the table in html, by passing information to the \code{latex} method with \code{html=TRUE}. This is for use with Quarto/RMarkdown. The \code{print} methods use the \code{print.char.matrix} function to print boxed tables when \code{options(prType=)} has not been given or when \code{prType='plain'}. For plain tables, \code{print} calls the internal function \code{printsummaryM}. When \code{prType='latex'} the \code{latex} method is invoked, and when \code{prType='html'} html is rendered. In Quarto/RMarkdown, proper rendering will result even if \code{results='asis'} does not appear in the chunk header. When rendering in html at the console due to having \code{options(prType='html')} the table will be rendered in a viewer. The \code{plot} method creates \code{plotly} graphics if \code{options(grType='plotly')}, otherwise base graphics are used. \code{plotly} graphics provide extra information such as which quantile is being displayed when hovering the mouse. Test statistics are displayed by hovering over the mean. Continuous variables are described by three quantiles (quartiles by default) when printing, or by the following quantiles when plotting expended box plots using the \code{\link{bpplt}} function: 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95. The box plots are scaled to the 0.025 and 0.975 quantiles of each continuous left-hand-side variable. Categorical variables are described by counts and percentages. The left hand side of \code{formula} may contain \code{mChoice} ("multiple choice") variables. When \code{test=TRUE} each choice is tested separately as a binary categorical response. The \code{plot} method for \code{method="reverse"} creates a temporary function \code{Key} as is done by the \code{xYplot} and \code{Ecdf.formula} functions. After \code{plot} runs, you can type \code{Key()} to put a legend in a default location, or e.g. \code{Key(locator(1))} to draw a legend where you click the left mouse button. This key is for categorical variables, so to have the opportunity to put the key on the graph you will probably want to use the command \code{plot(object, which="categorical")}. A second function \code{Key2} is created if continuous variables are being plotted. It is used the same as \code{Key}. If the \code{which} argument is not specified to \code{plot}, two pages of plots will be produced. If you don't define \code{par(mfrow=)} yourself, \code{plot.summaryM} will try to lay out a multi-panel graph to best fit all the individual charts for continuous variables. } \usage{ summaryM(formula, groups=NULL, data=NULL, subset, na.action=na.retain, overall=FALSE, continuous=10, na.include=FALSE, quant=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975), nmin=100, test=FALSE, conTest=conTestkw, catTest=catTestchisq, ordTest=ordTestpo) \method{print}{summaryM}(...) printsummaryM(x, digits, prn = any(n != N), what=c('proportion', '\%'), pctdig = if(what == '\%') 0 else 2, npct = c('numerator', 'both', 'denominator', 'none'), exclude1 = TRUE, vnames = c('labels', 'names'), prUnits = TRUE, sep = '/', abbreviate.dimnames = FALSE, prefix.width = max(nchar(lab)), min.colwidth, formatArgs=NULL, round=NULL, prtest = c('P','stat','df','name'), prmsd = FALSE, long = FALSE, pdig = 3, eps = 0.001, prob = c(0.25, 0.5, 0.75), prN = FALSE, \dots) \method{plot}{summaryM}(x, vnames = c('labels', 'names'), which = c('both', 'categorical', 'continuous'), vars=NULL, xlim = c(0,1), xlab = 'Proportion', pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE, main, ncols=2, prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001, conType = c('bp', 'dot', 'raw'), cex.means = 0.5, cex=par('cex'), height='auto', width=700, \dots) \method{latex}{summaryM}(object, title = first.word(deparse(substitute(object))), file=paste(title, 'tex', sep='.'), append=FALSE, digits, prn = any(n != N), what=c('proportion', '\%'), pctdig = if(what == '\%') 0 else 2, npct = c('numerator', 'both', 'denominator', 'slash', 'none'), npct.size = if(html) mspecs$html$smaller else 'scriptsize', Nsize = if(html) mspecs$html$smaller else 'scriptsize', exclude1 = TRUE, vnames=c("labels", "names"), prUnits = TRUE, middle.bold = FALSE, outer.size = if(html) mspecs$html$smaller else "scriptsize", caption, rowlabel = "", rowsep=html, insert.bottom = TRUE, dcolumn = FALSE, formatArgs=NULL, round=NULL, prtest = c('P', 'stat', 'df', 'name'), prmsd = FALSE, msdsize = if(html) function(x) x else NULL, brmsd=FALSE, long = FALSE, pdig = 3, eps = 0.001, auxCol = NULL, table.env=TRUE, tabenv1=FALSE, prob=c(0.25, 0.5, 0.75), prN=FALSE, legend.bottom=FALSE, html=FALSE, mspecs=markupSpecs, \dots) \method{html}{summaryM}(object, \dots) } \arguments{ \item{formula}{ An S formula with additive effects. There may be several variables on the right hand side separated by "+", or the numeral \code{1}, indicating that there is no grouping variable so that only margin summaries are produced. The right hand side variable, if present, must be a discrete variable producing a limited number of groups. On the left hand side there may be any number of variables, separated by "+", and these may be of mixed types. These variables are analyzed separately by the grouping variable. } \item{groups}{if there is more than one right-hand variable, specify \code{groups} as a character string containing the name of the variable used to produce columns of the table. The remaining right hand variables are combined to produce levels that cause separate tables or plots to be produced.} \item{x}{an object created by \code{summaryM}. For \code{conTestkw} a numeric vector, and for \code{ordTestpo}, a numeric or factor variable that can be considered ordered} \item{data}{ name or number of a data frame. Default is the current frame. } \item{subset}{ a logical vector or integer vector of subscripts used to specify the subset of data to use in the analysis. The default is to use all observations in the data frame. } \item{na.action}{ function for handling missing data in the input data. The default is a function defined here called \code{na.retain}, which keeps all observations for processing, with missing variables or not. } \item{overall}{ Setting \code{overall=TRUE} makes a new column with overall statistics for the whole sample. If \code{test=TRUE} these marginal statistics are ignored in doing statistical tests. } \item{continuous}{ specifies the threshold for when a variable is considered to be continuous (when there are at least \code{continuous} unique values). \code{factor} variables are always considered to be categorical no matter how many levels they have. } \item{na.include}{ Set \code{na.include=TRUE} to keep missing values of categorical variables from being excluded from the table. } \item{nmin}{ For categories of the response variable in which there are less than or equal to \code{nmin} non-missing observations, the raw data are retained for later plotting in place of box plots. } \item{test}{ Set to \code{TRUE} to compute test statistics using tests specified in \code{conTest} and \code{catTest}. } \item{conTest}{ a function of two arguments (grouping variable and a continuous variable) that returns a list with components \code{P} (the computed P-value), \code{stat} (the test statistic, either chi-square or F), \code{df} (degrees of freedom), \code{testname} (test name), \code{namefun} (\code{"chisq", "fstat"}), \code{statname} (statistic name), an optional component \code{latexstat} (LaTeX representation of \code{statname}), an optional component \code{plotmathstat} (for R - the \code{plotmath} representation of \code{statname}, as a character string), and an optional component \code{note} that contains a character string note about the test (e.g., \code{"test not done because n < 5"}). \code{conTest} is applied to continuous variables on the right-hand-side of the formula when \code{method="reverse"}. The default uses the \code{spearman2} function to run the Wilcoxon or Kruskal-Wallis test using the F distribution. } \item{catTest}{ a function of a frequency table (an integer matrix) that returns a list with the same components as created by \code{conTest}. By default, the Pearson chi-square test is done, without continuity correction (the continuity correction would make the test conservative like the Fisher exact test). } \item{ordTest}{ a function of a frequency table (an integer matrix) that returns a list with the same components as created by \code{conTest}. By default, the Proportional odds likelihood ratio test is done. } \item{\dots}{ For \code{Key} and \code{Key2} these arguments are passed to \code{key}, \code{text}, or \code{mtitle}. For \code{print} methods these are optional arguments to \code{print.char.matrix}. For \code{latex} methods these are passed to \code{latex.default}. For \code{html} the arguments are passed the \code{latex.summaryM}, and the arguments may not include \code{file}. For \code{print} the arguments are passed to \code{printsummaryM} or \code{latex.summaryM} depending on \code{options(prType=)}. } \item{object}{an object created by \code{summaryM}} \item{quant}{ vector of quantiles to use for summarizing continuous variables. These must be numbers between 0 and 1 inclusive and must include the numbers 0.5, 0.25, and 0.75 which are used for printing and for plotting quantile intervals. The outer quantiles are used for scaling the x-axes for such plots. Specify outer quantiles as \code{0} and \code{1} to scale the x-axes using the whole observed data ranges instead of the default (a 0.95 quantile interval). Box-percentile plots are drawn using all but the outer quantiles. } \item{prob}{ vector of quantiles to use for summarizing continuous variables. These must be numbers between 0 and 1 inclusive and have previously been included in the \code{quant} argument of \code{summaryM}. The vector must be of length three. By default it contains 0.25, 0.5, and 0.75. Warning: specifying 0 and 1 as two of the quantiles will result in computing the minimum and maximum of the variable. As for many random variables the minimum will continue to become smaller as the sample size grows, and the maximum will continue to get larger. Thus the min and max are not recommended as summary statistics. } \item{vnames}{ By default, tables and plots are usually labeled with variable labels (see the \code{label} and \code{sas.get} functions). To use the shorter variable names, specify \code{vnames="name"}. } \item{pch}{ vector of plotting characters to represent different groups, in order of group levels. } \item{abbreviate.dimnames}{see \code{print.char.matrix}} \item{prefix.width}{see \code{print.char.matrix}} \item{min.colwidth}{ minimum column width to use for boxes printed with \code{print.char.matrix}. The default is the maximum of the minimum column label length and the minimum length of entries in the data cells. } \item{formatArgs}{ a list containing other arguments to pass to \code{format.default} such as \code{scientific}, e.g., \code{formatArgs=list(scientific=c(-5,5))}. For \code{print.summary.formula.reverse} and \code{format.summary.formula.reverse}, \code{formatArgs} applies only to statistics computed on continuous variables, not to percents, numerators, and denominators. The \code{round} argument may be preferred. } \item{digits}{ number of significant digits to print. Default is to use the current value of the \code{digits} system option. } \item{what}{specifies whether proportions or percentages are to be printed or LaTeX'd} \item{pctdig}{ number of digits to the right of the decimal place for printing percentages or proportions. The default is zero if \code{what='\%'}, so percents will be rounded to the nearest percent. The default is 2 for proportions. } \item{prn}{ set to \code{TRUE} to print the number of non-missing observations on the current (row) variable. The default is to print these only if any of the counts of non-missing values differs from the total number of non-missing values of the left-hand-side variable. } \item{prN}{ set to \code{TRUE} to print the number of non-missing observations on rows that contain continuous variables. } \item{npct}{ specifies which counts are to be printed to the right of percentages. The default is to print the frequency (numerator of the percent) in parentheses. You can specify \code{"both"} to print both numerator and denominator as a fraction, \code{"denominator"}, \code{"slash"} to typeset horizontally using a forward slash, or \code{"none"}. } \item{npct.size}{ the size for typesetting \code{npct} information which appears after percents. The default is \code{"scriptsize"}. } \item{Nsize}{ When a second row of column headings is added showing sample sizes, \code{Nsize} specifies the LaTeX size for these subheadings. Default is \code{"scriptsize"}. } \item{exclude1}{ By default, \code{summaryM} objects will be printed, plotted, or typeset by removing redundant entries from percentage tables for categorical variables. For example, if you print the percent of females, you don't need to print the percent of males. To override this, set \code{exclude1=FALSE}. } \item{prUnits}{ set to \code{FALSE} to suppress printing or latexing \code{units} attributes of variables, when \code{method='reverse'} or \code{'response'} } \item{sep}{ character to use to separate quantiles when printing tables } \item{prtest}{ a vector of test statistic components to print if \code{test=TRUE} was in effect when \code{summaryM} was called. Defaults to printing all components. Specify \code{prtest=FALSE} or \code{prtest="none"} to not print any tests. This applies to \code{print}, \code{latex}, and \code{plot} methods. } \item{round}{ Specify \code{round} to round the quantiles and optional mean and standard deviation to \code{round} digits after the decimal point. Set \code{round='auto'} to try an automatic choice. } \item{prmsd}{ set to \code{TRUE} to print mean and SD after the three quantiles, for continuous variables } \item{msdsize}{ defaults to \code{NULL} to use the current font size for the mean and standard deviation if \code{prmsd} is \code{TRUE}. Set to a character string or function to specify an alternate LaTeX font size. } \item{brmsd}{set to \code{TRUE} to put the mean and standard deviation on a separate line, for html} \item{long}{ set to \code{TRUE} to print the results for the first category on its own line, not on the same line with the variable label } \item{pdig}{ number of digits to the right of the decimal place for printing P-values. Default is \code{3}. This is passed to \code{format.pval}. } \item{eps}{ P-values less than \code{eps} will be printed as \code{< eps}. See \code{format.pval}. } \item{auxCol}{ an optional auxiliary column of information, right justified, to add in front of statistics typeset by \code{latex.summaryM}. This argument is a list with a single element that has a name specifying the column heading. If this name includes a newline character, the portions of the string before and after the newline form respectively the main heading and the subheading (typically set in smaller font), respectively. See the \code{extracolheads} argument to \code{latex.default}. \code{auxCol} is filled with blanks when a variable being summarized takes up more than one row in the output. This happens with categorical variables. } \item{table.env}{set to \code{FALSE} to use \code{tabular} environment with no caption} \item{tabenv1}{set to \code{TRUE} in the case of stratification when you want only the first stratum's table to be in a table environment. This is useful when using \code{hyperref}.} \item{which}{Specifies whether to plot results for categorical variables, continuous variables, or both (the default).} \item{vars}{Subscripts (indexes) of variables to plot for \code{plotly} graphics. Default is to plot all variables of each type (categorical or continuous).} \item{conType}{ For drawing plots for continuous variables, extended box plots (box-percentile-type plots) are drawn by default, using all quantiles in \code{quant} except for the outermost ones which are using for scaling the overall plot based on the non-stratified marginal distribution of the current response variable. Specify \code{conType='dot'} to draw dot plots showing the three quartiles instead. For extended box plots, means are drawn with a solid dot and vertical reference lines are placed at the three quartiles. Specify \code{conType='raw'} to make a strip chart showing the raw data. This can only be used if the sample size for each right-hand-side group is less than or equal to \code{nmin}.} \item{cex.means}{ character size for means in box-percentile plots; default is .5} \item{cex}{character size for other plotted items} \item{height,width}{dimensions in pixels for the \code{plotly} \code{subplot} object containing all the extended box plots. If \code{height="auto"}, \code{plot.summaryM} will set \code{height} based on the number of continuous variables and \code{ncols} or for dot charts it will use \code{Hmisc::plotlyHeightDotchart}. At present \code{height} is ignored for extended box plots due to vertical spacing problem with \code{plotly} graphics.} \item{xlim}{ vector of length two specifying x-axis limits. This is only used for plotting categorical variables. Limits for continuous variables are determined by the outer quantiles specified in \code{quant}. } \item{xlab}{x-axis label} \item{main}{a main title. This applies only to the plot for categorical variables.} \item{ncols}{number of columns for \code{plotly} graphics for extended box plots. Defaults to 2. Recommendation is for 1-2.} \item{caption}{character string containing LaTeX table captions.} \item{title}{ name of resulting LaTeX file omitting the \code{.tex} suffix. Default is the name of the \code{summary} object. If \code{caption} is specied, \code{title} is also used for the table's symbolic reference label. } \item{file}{name of file to write LaTeX code to. Specifying \code{file=""} will cause LaTeX code to just be printed to standard output rather than be stored in a permanent file. } \item{append}{specify \code{TRUE} to add code to an existing file} \item{rowlabel}{see \code{latex.default} (under the help file \code{latex})} \item{rowsep}{if \code{html} is \code{TRUE}, instructs the function to use a horizontal line to separate variables from one another. Recommended if \code{brmsd} is \code{TRUE}. Ignored for LaTeX.} \item{middle.bold}{ set to \code{TRUE} to have LaTeX use bold face for the middle quantile } \item{outer.size}{the font size for outer quantiles } \item{insert.bottom}{ set to \code{FALSE} to suppress inclusion of definitions placed at the bottom of LaTeX tables. You can also specify a character string containing other text that overrides the automatic text. At present such text always appears in the main caption for LaTeX. } \item{legend.bottom}{ set to \code{TRUE} to separate the table caption and legend. This will place table legends at the bottom of LaTeX tables. } \item{html}{set to \code{TRUE} to typeset with html} \item{mspecs}{list defining markup syntax for various languages, defaults to Hmisc \code{markupSpecs} which the user can use as a starting point for editing} \item{dcolumn}{see \code{latex}} } \value{ a list. \code{plot.summaryM} returns the number of pages of plots that were made if using base graphics, or \code{plotly} objects created by \code{plotly::subplot} otherwise. If both categorical and continuous variables were plotted, the returned object is a list with two named elements \code{Categorical} and \code{Continuous} each containing \code{plotly} objects. Otherwise a \code{plotly} object is returned. The \code{latex} method returns attributes \code{legend} and \code{nstrata}. } \section{Side Effects}{ \code{plot.summaryM} creates a function \code{Key} and \code{Key2} in frame 0 that will draw legends, if base graphics are being used. } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Harrell FE (2004): Statistical tables and plots using S and LaTeX. Document available from \url{https://hbiostat.org/R/Hmisc/summary.pdf}. } \seealso{ \code{\link{mChoice}}, \code{\link{label}}, \code{\link{dotchart3}}, \code{\link{print.char.matrix}}, \code{\link{update}}, \code{\link{formula}}, \code{\link{format.default}}, \code{\link{latex}}, \code{\link{latexTranslate}}, \code{\link{bpplt}}, \code{\link{tabulr}}, \code{\link{bpplotM}}, \code{\link{summaryP}} } \examples{ options(digits=3) set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE)) age <- rnorm(500, 50, 5) sbp <- rnorm(500, 120, 12) label(sbp) <- 'Systolic BP' units(sbp) <- 'mmHg' treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) treatment[1] sbp[1] <- NA # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') table(as.character(Symptoms)) # Note: In this example, some subjects have the same symptom checked # multiple times; in practice these redundant selections would be NAs # mChoice will ignore these redundant selections f <- summaryM(age + sex + sbp + Symptoms ~ treatment, test=TRUE) f # trio of numbers represent 25th, 50th, 75th percentile print(f, long=TRUE) plot(f) # first specify options(grType='plotly') to use plotly plot(f, conType='dot', prtest='P') bpplt() # annotated example showing layout of bp plot # Produce separate tables by country f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country, groups='treatment', test=TRUE) f \dontrun{ getHdata(pbc) s5 <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc) print(s5, npct='both') # npct='both' : print both numerators and denominators plot(s5, which='categorical') Key(locator(1)) # draw legend at mouse click par(oma=c(3,0,0,0)) # leave outer margin at bottom plot(s5, which='continuous') # see also bpplotM Key2() # draw legend at lower left corner of plot # oma= above makes this default key fit the page better options(digits=3) w <- latex(s5, npct='both', here=TRUE, file='') options(grType='plotly') pbc <- upData(pbc, moveUnits = TRUE) s <- summaryM(bili + albumin + alk.phos + copper + spiders + sex ~ drug, data=pbc, test=TRUE) # Render html options(prType='html') s # invokes print.summaryM a <- plot(s) a$Categorical a$Continuous plot(s, which='con') } } \keyword{category} \keyword{interface} \keyword{hplot} \keyword{manip} \concept{grouping} \concept{stratification} \concept{aggregation} Hmisc/man/string.bounding.box.Rd0000644000176200001440000000140312524011270016250 0ustar liggesusers\name{string.bounding.box} \alias{string.bounding.box} \title{Determine Dimensions of Strings} \description{ This determines the number of rows and maximum number of columns of each string in a vector. } \usage{ string.bounding.box(string, type = c("chars", "width")) } \arguments{ \item{string}{vector of strings} \item{type}{character: whether to count characters or screen columns} } \value{ \item{rows}{vector containing the number of character rows in each string} \item{columns}{vector containing the maximum number of character columns in each string} } \author{Charles Dupont} \seealso{\code{\link{nchar}}, \code{\link{stringDims}}} \examples{ a <- c("this is a single line string", "This is a\nmulti-line string") stringDims(a) } \keyword{print} Hmisc/man/string.break.line.Rd0000644000176200001440000000144012243661443015702 0ustar liggesusers\name{string.break.line} \alias{string.break.line} \title{Break a String into Many Lines at Newlines} \description{ Takes a string and breaks it into seperate substrings where there are newline characters. } \usage{ string.break.line(string) } \arguments{ \item{string}{character vector to be separated into many lines.} } \value{ Returns a list that is the same length of as the \code{string} argument. Each list element is a character vector. Each character vectors elements are the split lines of the corresponding element in the \code{string} argument vector. } \author{Charles Dupont} \seealso{\code{\link{strsplit}}} \examples{ a <- c('', 'this is a single line string', 'This is a\nmulti-line string.') b <- string.break.line(a) } \keyword{print} \keyword{character} Hmisc/man/mChoice.Rd0000644000176200001440000002167614333754371013754 0ustar liggesusers\name{mChoice} \alias{mChoice} \alias{format.mChoice} \alias{print.mChoice} \alias{summary.mChoice} \alias{as.character.mChoice} \alias{as.double.mChoice} \alias{inmChoice} \alias{inmChoicelike} \alias{nmChoice} \alias{match.mChoice} \alias{[.mChoice} \alias{print.summary.mChoice} \alias{is.mChoice} \alias{Math.mChoice} \alias{Ops.mChoice} \alias{Summary.mChoice} \title{Methods for Storing and Analyzing Multiple Choice Variables} \description{ \code{mChoice} is a function that is useful for grouping variables that represent individual choices on a multiple choice question. These choices are typically factor or character values but may be of any type. Levels of component factor variables need not be the same; all unique levels (or unique character values) are collected over all of the multiple variables. Then a new character vector is formed with integer choice numbers separated by semicolons. Optimally, a database system would have exported the semicolon-separated character strings with a \code{levels} attribute containing strings defining value labels corresponding to the integer choice numbers. \code{mChoice} is a function for creating a multiple-choice variable after the fact. \code{mChoice} variables are explicitly handed by the \code{describe} and \code{summary.formula} functions. \code{NA}s or blanks in input variables are ignored. \code{format.mChoice} will convert the multiple choice representation to text form by substituting \code{levels} for integer codes. \code{as.double.mChoice} converts the \code{mChoice} object to a binary numeric matrix, one column per used level (or all levels of \code{drop=FALSE}. This is called by the user by invoking \code{as.numeric}. There is a \code{print} method and a \code{summary} method, and a \code{print} method for the \code{summary.mChoice} object. The \code{summary} method computes frequencies of all two-way choice combinations, the frequencies of the top 5 combinations, information about which other choices are present when each given choice is present, and the frequency distribution of the number of choices per observation. This \code{summary} output is used in the \code{describe} function. \code{in.mChoice} creates a logical vector the same length as \code{x} whose elements are \code{TRUE} when the observation in \code{x} contains at least one of the codes or value labels in the second argument. \code{match.mChoice} creates an integer vector of the indexes of all elements in \code{table} which contain any of the speicified levels \code{nmChoice} returns an integer vector of the number of choices that were made \code{is.mChoice} returns \code{TRUE} is the argument is a multiple choice variable. } \usage{ mChoice(\dots, label='', sort.levels=c('original','alphabetic'), add.none=FALSE, drop=TRUE, ignoreNA=TRUE) \method{format}{mChoice}(x, minlength=NULL, sep=";", \dots) \method{as.double}{mChoice}(x, drop=FALSE, ...) \method{print}{mChoice}(x, quote=FALSE, max.levels=NULL, width=getOption("width"), ...) \method{as.character}{mChoice}(x, ...) \method{summary}{mChoice}(object, ncombos=5, minlength=NULL, drop=TRUE, ...) \method{print}{summary.mChoice}(x, prlabel=TRUE, ...) \method{[}{mChoice}(x, ..., drop=FALSE) match.mChoice(x, table, nomatch=NA, incomparables=FALSE) inmChoice(x, values, condition=c('any', 'all')) inmChoicelike(x, values, condition=c('any', 'all'), ignore.case=FALSE, fixed=FALSE) nmChoice(object) is.mChoice(x) \method{Summary}{mChoice}(..., na.rm) } \arguments{ \item{na.rm}{ Logical: remove \code{NA}'s from data } \item{table}{ a vector (mChoice) of values to be matched against. } \item{nomatch}{ value to return if a value for \code{x} does not exist in \code{table}. } \item{incomparables}{ logical whether incomparable values should be compaired. } \item{...}{ a series of vectors } \item{label}{ a character string \code{label} attribute to attach to the matrix created by \code{mChoice} } \item{sort.levels}{ set \code{sort.levels="alphabetic"} to sort the columns of the matrix created by \code{mChoice} alphabetically by category rather than by the original order of levels in component factor variables (if there were any input variables that were factors) } \item{add.none}{ Set \code{add.none} to \code{TRUE} to make a new category \code{'none'} if it doesn't already exist and if there is an observations with no choices selected. } \item{drop}{ set \code{drop=FALSE} to keep unused factor levels as columns of the matrix produced by \code{mChoice} } \item{ignoreNA}{set to \code{FALSE} to keep any \code{NA}s present in data as a real level. Prior to Hmisc 4.7-2 \code{FALSE} was the default.} \item{x}{ an object of class \code{"mchoice"} such as that created by \code{mChoice}. For \code{is.mChoice} is any object. } \item{object}{ an object of class \code{"mchoice"} such as that created by \code{mChoice} } \item{ncombos}{ maximum number of combos. } \item{width}{ With of a line of text to be formated } \item{quote}{ quote the output } \item{max.levels}{max levels to be displayed} \item{minlength}{ By default no abbreviation of levels is done in \code{format} and \code{summary}. Specify a positive integer to use abbreviation in those functions. See \code{\link{abbreviate}}. } \item{sep}{ character to use to separate levels when formatting } \item{prlabel}{ set to \code{FALSE} to keep \code{print.summary.mChoice} from printing the variable label and number of unique values } \item{values}{ a scalar or vector. If \code{values} is integer, it is the choice codes, and if it is a character vector, it is assumed to be value labels. For \code{inmChoicelike} \code{values} must be character strings which are pieces of choice labels. } \item{condition}{set to \code{'all'} for \code{inmChoice} to require that all choices in \code{values} be present instead of the default of any of them present.} \item{ignore.case}{set to \code{TRUE} to have \code{inmChoicelike} ignore case in the data when matching on \code{values}} \item{fixed}{see \code{grep}} } \value{ \code{mChoice} returns a character vector of class \code{"mChoice"} plus attributes \code{"levels"} and \code{"label"}. \code{summary.mChoice} returns an object of class \code{"summary.mChoice"}. \code{inmChoice} and \code{inmChoicelike} return a logical vector. \code{format.mChoice} returns a character vector, and \code{as.double.mChoice} returns a binary numeric matrix. \code{nmChoice} returns an integer vector } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{label}}, \code{\link{combplotp}} } \examples{ options(digits=3) set.seed(3) n <- 20 sex <- factor(sample(c("m","f"), n, rep=TRUE)) age <- rnorm(n, 50, 5) treatment <- factor(sample(c("Drug","Placebo"), n, rep=TRUE)) # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, n, TRUE) symptom2 <- sample(symp, n, TRUE) symptom3 <- sample(symp, n, TRUE) cbind(symptom1, symptom2, symptom3)[1:5,] Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') Symptoms print(Symptoms, long=TRUE) format(Symptoms[1:5]) inmChoice(Symptoms,'Headache') inmChoicelike(Symptoms, 'head', ignore.case=TRUE) levels(Symptoms) inmChoice(Symptoms, 3) # Find all subjects with either of two symptoms inmChoice(Symptoms, c('Headache','Hangnail')) # Note: In this example, some subjects have the same symptom checked # multiple times; in practice these redundant selections would be NAs # mChoice will ignore these redundant selections # Find all subjects with both symptoms inmChoice(Symptoms, c('Headache', 'Hangnail'), condition='all') meanage <- N <- numeric(5) for(j in 1:5) { meanage[j] <- mean(age[inmChoice(Symptoms,j)]) N[j] <- sum(inmChoice(Symptoms,j)) } names(meanage) <- names(N) <- levels(Symptoms) meanage N # Manually compute mean age for 2 symptoms mean(age[symptom1=='Headache' | symptom2=='Headache' | symptom3=='Headache']) mean(age[symptom1=='Hangnail' | symptom2=='Hangnail' | symptom3=='Hangnail']) summary(Symptoms) #Frequency table sex*treatment, sex*Symptoms summary(sex ~ treatment + Symptoms, fun=table) # Check: ma <- inmChoice(Symptoms, 'Muscle Ache') table(sex[ma]) # could also do: # summary(sex ~ treatment + mChoice(symptom1,symptom2,symptom3), fun=table) #Compute mean age, separately by 3 variables summary(age ~ sex + treatment + Symptoms) summary(age ~ sex + treatment + Symptoms, method="cross") f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE) f # trio of numbers represent 25th, 50th, 75th percentile print(f, long=TRUE) } \keyword{category} \keyword{manip} \concept{multiple choice} Hmisc/man/Merge.Rd0000644000176200001440000000376314252463134013433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Merge.r \name{Merge} \alias{Merge} \title{Merge Multiple Data Frames or Data Tables} \usage{ Merge(..., id = NULL, all = TRUE, verbose = TRUE) } \arguments{ \item{\dots}{two or more dataframes or data tables} \item{id}{a formula containing all the identification variables such that the combination of these variables uniquely identifies subjects or records of interest. May be omitted for data tables; in that case the \code{key} function retrieves the id variables.} \item{all}{set to \code{FALSE} to drop observations not found in second and later data frames (only applies if not using \code{data.table})} \item{verbose}{set to \code{FALSE} to not print information about observations} } \description{ Merges an arbitrarily large series of data frames or data tables containing common \code{id} variables. Information about number of observations and number of unique \code{id}s in individual and final merged datasets is printed. The first data frame/table has special meaning in that all of its observations are kept whether they match \code{id}s in other data frames or not. For all other data frames, by default non-matching observations are dropped. The first data frame is also the one against which counts of unique \code{id}s are compared. Sometimes \code{merge} drops variable attributes such as \code{labels} and \code{units}. These are restored by \code{Merge}. } \examples{ \dontrun{ a <- data.frame(sid=1:3, age=c(20,30,40)) b <- data.frame(sid=c(1,2,2), bp=c(120,130,140)) d <- data.frame(sid=c(1,3,4), wt=c(170,180,190)) all <- Merge(a, b, d, id = ~ sid) # First file should be the master file and must # contain all ids that ever occur. ids not in the master will # not be merged from other datasets. a <- data.table(a); setkey(a, sid) # data.table also does not allow duplicates without allow.cartesian=TRUE b <- data.table(sid=1:2, bp=c(120,130)); setkey(b, sid) d <- data.table(d); setkey(d, sid) all <- Merge(a, b, d) } } Hmisc/man/dotchartpl.Rd0000644000176200001440000002173613747330224014541 0ustar liggesusers\name{dotchartpl} \alias{dotchartpl} \title{Enhanced Version of dotchart Function for plotly} \description{ This function produces a \code{plotly} interactive graphic and accepts a different format of data input than the other \code{dotchart}x functions. It was written to handle a hierarchical data structure including strata that further subdivide the main classes. Strata, indicated by the \code{mult} variable, are shown on the same horizontal line, and if the variable \code{big} is \code{FALSE} will appear slightly below the main line, using smaller symbols, and having some transparency. This is intended to handle output such as that from the \code{summaryP} function when there is a superpositioning variable \code{group} and a stratification variable \code{mult}, especially when the data have been run through the \code{addMarginal} function to create \code{mult} categories labelled \code{"All"} for which the user will specify \code{big=TRUE} to indicate non-stratified estimates (stratified only on \code{group}) to emphasize. When viewing graphics that used \code{mult} and \code{big}, the user can click on the legends for the small points for \code{group}s to vanish the finely stratified estimates. When \code{group} is used by \code{mult} and \code{big} are not, and when the \code{group} variable has exactly two distinct values, you can specify \code{refgroup} to get the difference between two proportions in addition to the individual proportions. The individual proportions are plotted, but confidence intervals for the difference are shown in hover text and half-width confidence intervals for the difference, centered at the midpoint of the proportions, are shown. These have the property of intersecting the two proportions if and only if there is no significant difference at the \code{1 - conf.int} level. Specify \code{fun=exp} and \code{ifun=log} if estimates and confidence limits are on the log scale. Make sure that zeros were prevented in the original calculations. For exponential hazard rates this can be accomplished by replacing event counts of 0 with 0.5. } \usage{ dotchartpl(x, major=NULL, minor=NULL, group=NULL, mult=NULL, big=NULL, htext=NULL, num=NULL, denom=NULL, numlabel='', denomlabel='', fun=function(x) x, ifun=function(x) x, op='-', lower=NULL, upper=NULL, refgroup=NULL, sortdiff=TRUE, conf.int=0.95, minkeep=NULL, xlim=NULL, xlab='Proportion', tracename=NULL, limitstracename='Limits', nonbigtracename='Stratified Estimates', dec=3, width=800, height=NULL, col=colorspace::rainbow_hcl) } \arguments{ \item{x}{a numeric vector used for values on the \code{x}-axis} \item{major}{major vertical category, e.g., variable labels} \item{minor}{minor vertical category, e.g. category levels within variables} \item{group}{superpositioning variable such as treatment} \item{mult}{strata names for further subdivisions without \code{group}s} \item{big}{omit if all levels of \code{mult} are equally important or if \code{mult} is omitted. Otherwise denotes major (larger points, right on horizontal lines) vs. minor (smaller, transparent points slightly below the line).} \item{htext}{additional hover text per point} \item{num}{if \code{x} represents proportions, optionally specifies numerators to be used in fractions added to hover text. When \code{num} is given, \code{x} is automatically added to hover text, rounded to 3 digits after the decimal point.} \item{denom}{like \code{num} but for denominators} \item{numlabel}{character string to put to the right of the numerator in hover text} \item{denomlabel}{character string to put to the right of the denominator in hover text} \item{fun}{a transformation to make when printing estimates. For example, one may specify \code{fun=exp} to anti-log estimates and confidence limites that were computed on a log basis} \item{ifun}{inverse transformation of \code{fun}} \item{op}{set to for example \code{'/'} when \code{fun=exp} and effects are computed as ratios instead of differences. This is used in hover text.} \item{lower}{lower limits for optional error bars} \item{upper}{upper limits for optional error bars} \item{refgroup}{if \code{group} is specified and there are exactly two groups, specify the character string for the reference group in computing difference in proportions. For example if \code{refgroup='A'} and the \code{group} levels are \code{'A','B'}, you will get B - A.} \item{sortdiff}{\code{minor} categories are sorted by descending values of the difference in proportions when \code{refgroup} is used, unless you specify \code{sortdiff=FALSE}} \item{conf.int}{confidence level for computing confidence intervals for the difference in two proportions. Specify \code{conf.int=FALSE} to suppress confidence intervals.} \item{minkeep}{if \code{refgroup} and \code{minkeep} are both given, observations that are at or above \code{minkeep} for at least one of the groups are retained. The defaults to to keep all observations.} \item{xlim}{\code{x}-axis limits} \item{xlab}{\code{x}-axis label} \item{tracename}{\code{plotly} trace name if \code{group} is not used} \item{limitstracename}{\code{plotly} trace name for \code{lower} and \code{upper} if \code{group} is not used} \item{nonbigtracename}{\code{plotly} trace name used for non-big elements, which usually represent stratified versions of the "big" observations} \item{col}{a function or vector of colors to assign to \code{group}. If a function it will be evaluated with an argument equal to the number of distinct groups.} \item{dec}{number of places to the right of the decimal place for formatting numeric quantities in hover text} \item{width}{width of plot in pixels} \item{height}{height of plot in pixels; computed from number of strata by default} } \value{a \code{plotly} object. An attribute \code{levelsRemoved} is added if \code{minkeep} is used and any categories were omitted from the plot as a result. This is a character vector with categories removed. If \code{major} is present, the strings are of the form \code{major:minor}} \author{Frank Harrell} \seealso{\code{\link{dotchartp}}} \examples{ \dontrun{ set.seed(1) d <- expand.grid(major=c('Alabama', 'Alaska', 'Arkansas'), minor=c('East', 'West'), group=c('Female', 'Male'), city=0:2) n <- nrow(d) d$num <- round(100*runif(n)) d$denom <- d$num + round(100*runif(n)) d$x <- d$num / d$denom d$lower <- d$x - runif(n) d$upper <- d$x + runif(n) with(d, dotchartpl(x, major, minor, group, city, lower=lower, upper=upper, big=city==0, num=num, denom=denom, xlab='x')) # Show half-width confidence intervals for Female - Male differences # after subsetting the data to have only one record per # state/region/group d <- subset(d, city == 0) with(d, dotchartpl(x, major, minor, group, num=num, denom=denom, lower=lower, upper=upper, refgroup='Male') ) n <- 500 set.seed(1) d <- data.frame( race = sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex = sample(c('Female', 'Male'), n, TRUE), treat = sample(c('A', 'B'), n, TRUE), smoking = sample(c('Smoker', 'Non-smoker'), n, TRUE), hypertension = sample(c('Hypertensive', 'Non-Hypertensive'), n, TRUE), region = sample(c('North America','Europe','South America', 'Europe', 'Asia', 'Central America'), n, TRUE)) d <- upData(d, labels=c(race='Race', sex='Sex')) dm <- addMarginal(d, region) s <- summaryP(race + sex + smoking + hypertension ~ region + treat, data=dm) s$region <- ifelse(s$region == 'All', 'All Regions', as.character(s$region)) with(s, dotchartpl(freq / denom, major=var, minor=val, group=treat, mult=region, big=region == 'All Regions', num=freq, denom=denom) ) s2 <- s[- attr(s, 'rows.to.exclude1'), ] with(s2, dotchartpl(freq / denom, major=var, minor=val, group=treat, mult=region, big=region == 'All Regions', num=freq, denom=denom) ) # Note these plots can be created by plot.summaryP when options(grType='plotly') # Plot hazard rates and ratios with confidence limits, on log scale d <- data.frame(tx=c('a', 'a', 'b', 'b'), event=c('MI', 'stroke', 'MI', 'stroke'), count=c(10, 5, 5, 2), exposure=c(1000, 1000, 900, 900)) # There were no zero event counts in this dataset. In general we # want to handle that, hence the 0.5 below d <- upData(d, hazard = pmax(0.5, count) / exposure, selog = sqrt(1. / pmax(0.5, count)), lower = log(hazard) - 1.96 * selog, upper = log(hazard) + 1.96 * selog) with(d, dotchartpl(log(hazard), minor=event, group=tx, num=count, denom=exposure, lower=lower, upper=upper, fun=exp, ifun=log, op='/', numlabel='events', denomlabel='years', refgroup='a', xlab='Events Per Person-Year') ) } } \keyword{hplot} Hmisc/man/stat_plsmo.Rd0000644000176200001440000000362713632313445014560 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat-plsmo.r \name{stat_plsmo} \alias{stat_plsmo} \title{Add a lowess smoother without counfidence bands.} \usage{ stat_plsmo( mapping = NULL, data = NULL, geom = "smooth", position = "identity", n = 80, fullrange = FALSE, span = 2/3, fun = function(x) x, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ... ) } \arguments{ \item{mapping, data, geom, position, show.legend, inherit.aes}{see ggplot2 documentation} \item{n}{number of points to evaluate smoother at} \item{fullrange}{should the fit span the full range of the plot, or just the data} \item{span}{see \code{f} argument to \code{lowess}} \item{fun}{a function to transform smoothed \code{y}} \item{na.rm}{If \code{FALSE} (the default), removes missing values with a warning. If \code{TRUE} silently removes missing values.} \item{...}{other arguments are passed to smoothing function} } \value{ a data.frame with additional columns \item{y}{predicted value} } \description{ Automatically selects \code{iter=0} for \code{lowess} if \code{y} is binary, otherwise uses \code{iter=3}. } \examples{ \donttest{ c <- ggplot(mtcars, aes(qsec, wt)) c + stat_plsmo() c + stat_plsmo() + geom_point() c + stat_plsmo(span = 0.1) + geom_point() # Smoothers for subsets c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl) c + stat_plsmo() + geom_point() c + stat_plsmo(fullrange = TRUE) + geom_point() # Geoms and stats are automatically split by aesthetics that are factors c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl))) c + stat_plsmo() + geom_point() c + stat_plsmo(aes(fill = factor(cyl))) + geom_point() c + stat_plsmo(fullrange=TRUE) + geom_point() # Example with logistic regression data("kyphosis", package="rpart") qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) + stat_plsmo() } } \seealso{ \code{\link{lowess}} for \code{loess} smoother. } Hmisc/man/mgp.axis.Rd0000644000176200001440000000521112675331665014122 0ustar liggesusers\name{mgp.axis} \alias{mgp.axis} \alias{mgp.axis.labels} \title{Draw Axes With Side-Specific mgp Parameters} \description{ \code{mgp.axis} is a version of \code{axis} that uses the appropriate side-specific \code{mgp} parameter (see \code{\link{par}}) to account for different space requirements for axis labels vertical vs. horizontal tick marks. \code{mgp.axis} also fixes a bug in \code{axis(2,\dots)} that causes it to assume \code{las=1}. \code{mgp.axis.labels} is used so that different spacing between tick marks and axis tick mark labels may be specified for x- and y-axes. Use \code{mgp.axis.labels('default')} to set defaults. Users can set values manually using \code{mgp.axis.labels(x,y)} where \code{x} and \code{y} are 2nd value of \code{par('mgp')} to use. Use \code{mgp.axis.labels(type=w)} to retrieve values, where \code{w='x'}, \code{'y'}, \code{'x and y'}, \code{'xy'}, to get 3 \code{mgp} values (first 3 types) or 2 \code{mgp.axis.labels}. } \usage{ mgp.axis(side, at = NULL, \dots, mgp = mgp.axis.labels(type = if (side == 1 | side == 3) "x" else "y"), axistitle = NULL, cex.axis=par('cex.axis'), cex.lab=par('cex.lab')) mgp.axis.labels(value,type=c('xy','x','y','x and y')) } \arguments{ \item{side,at}{see \code{\link{par}}} \item{\dots}{arguments passed through to \code{\link{axis}}} \item{mgp,cex.axis,cex.lab}{see \code{\link{par}}} \item{axistitle}{if specified will cause \code{axistitle} to be drawn on the appropriate axis as a title} \item{value}{vector of values to which to set system option \code{mgp.axis.labels}} \item{type}{see above} } \section{Side Effects}{\code{mgp.axis.labels} stores the value in the system option \code{mgp.axis.labels}} \value{ \code{mgp.axis.labels} returns the value of \code{mgp} (only the second element of \code{mgp} if \code{type="xy"} or a list with elements \code{x} and \code{y} if \code{type="x or y"}, each list element being a 3-vector) for the appropriate axis if \code{value} is not specified, otherwise it returns nothing but the system option \code{mgp.axis.labels} is set. \code{mgp.axis} returns nothing. } \author{Frank Harrell} \seealso{\code{\link{par}}} \examples{ \dontrun{ mgp.axis.labels(type='x') # get default value for x-axis mgp.axis.labels(type='y') # get value for y-axis mgp.axis.labels(type='xy') # get 2nd element of both mgps mgp.axis.labels(type='x and y') # get a list with 2 elements mgp.axis.labels(c(3,.5,0), type='x') # set options('mgp.axis.labels') # retrieve plot(..., axes=FALSE) mgp.axis(1, "X Label") mgp.axis(2, "Y Label") }} \keyword{iplot} \keyword{dplot} \keyword{environment} Hmisc/man/simMarkovOrd.Rd0000644000176200001440000001014414014740471014776 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simMarkovOrd.r \name{simMarkovOrd} \alias{simMarkovOrd} \title{simMarkovOrd} \usage{ simMarkovOrd( n = 1, y, times, initial, X = NULL, absorb = NULL, intercepts, g, carry = FALSE, rdsample = NULL, ... ) } \arguments{ \item{n}{number of subjects to simulate} \item{y}{vector of possible y values in order (numeric, character, factor)} \item{times}{vector of measurement times} \item{initial}{initial value of \code{y} (baseline state; numeric, character, or factor matching \code{y}). If length 1 this value is used for all subjects, otherwise it is a vector of length \code{n}.} \item{X}{an optional vector of matrix of baseline covariate values passed to \code{g}. If a vector, \code{X} represents a set of single values for all the covariates and those values are used for every subject. Otherwise \code{X} is a matrix with rows corresponding to subjects and columns corresponding to covariates which \code{g} must know how to handle. \code{g} only sees one row of \code{X} at a time.} \item{absorb}{vector of absorbing states, a subset of \code{y} (numeric, character, or factor matching \code{y}). The default is no absorbing states. Observations are truncated when an absorbing state is simulated.} \item{intercepts}{vector of intercepts in the proportional odds model. There must be one fewer of these than the length of \code{y}.} \item{g}{a user-specified function of three or more arguments which in order are \code{yprev} - the value of \code{y} at the previous time, the current time \code{t}, the \code{gap} between the previous time and the current time, an optional (usually named) covariate vector \code{X}, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow \code{yprev} to be a vector and \code{yprev} must not include any absorbing states. The \code{g} function returns the linear predictor for the proportional odds model aside from \code{intercepts}. The returned value must be a matrix with row names taken from \code{yprev}. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to \code{intercepts}. The different columns are used for \code{y}-specific contributions to the linear predictor (aside from \code{intercepts}) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments.} \item{carry}{set to \code{TRUE} to carry absorbing state forward after it is first hit; the default is to end records for the subject once the absorbing state is hit} \item{rdsample}{an optional function to do response-dependent sampling. It is a function of these arguments, which are vectors that stop at any absorbing state: \code{times} (ascending measurement times for one subject), \code{y} (vector of ordinal outcomes at these times for one subject. The function returns \code{NULL} if no observations are to be dropped, returns the vector of new times to sample.} \item{...}{additional arguments to pass to \code{g} such as a regresson coefficient} } \value{ data frame with one row per subject per time, and columns id, time, yprev, y, values in ... } \description{ Simulate Ordinal Markov Process } \details{ Simulates longitudinal data for subjects following a first-order Markov process under a proportional odds model. Optionally, response-dependent sampling can be done, e.g., if a subject hits a specified state at time t, measurements are removed for times t+1, t+3, t+5, ... This is applicable when for example a study of hospitalized patients samples every day, Y=1 denotes patient discharge to home, and sampling is less frequent outside the hospital. This example assumes that arriving home is not an absorbing state, i.e., a patient could return to the hospital. } \seealso{ \url{https://hbiostat.org/R/Hmisc/markov/} } \author{ Frank Harrell } Hmisc/man/plotCorrPrecision.Rd0000644000176200001440000000222514112736760016047 0ustar liggesusers\name{plotCorrPrecision} \alias{plotCorrPrecision} \title{Plot Precision of Estimate of Pearson Correlation Coefficient} \description{ This function plots the precision (margin of error) of the product-moment linear correlation coefficient r vs. sample size, for a given vector of correlation coefficients \code{rho}. Precision is defined as the larger of the upper confidence limit minus rho and rho minus the lower confidence limit. \code{labcurve} is used to automatically label the curves. } \usage{ plotCorrPrecision(rho = c(0, 0.5), n = seq(10, 400, length.out = 100), conf.int = 0.95, offset=0.025, \dots) } \arguments{ \item{rho}{single or vector of true correlations. A worst-case precision graph results from rho=0} \item{n}{vector of sample sizes to use on the x-axis} \item{conf.int}{confidence coefficient; default uses 0.95 confidence limits} \item{offset}{see \code{\link{labcurve}}} \item{\dots}{other arguments to \code{\link{labcurve}}} } \author{Xing Wang and Frank Harrell} \seealso{\code{\link{rcorr}},\code{\link{cor}},\code{\link{cor.test}}} \examples{ plotCorrPrecision() plotCorrPrecision(rho=0) } \keyword{htest} Hmisc/man/redun.Rd0000644000176200001440000001253113714234045013501 0ustar liggesusers\name{redun} \alias{redun} \alias{print.redun} \title{Redundancy Analysis} \description{ Uses flexible parametric additive models (see \code{\link{areg}} and its use of regression splines) to determine how well each variable can be predicted from the remaining variables. Variables are dropped in a stepwise fashion, removing the most predictable variable at each step. The remaining variables are used to predict. The process continues until no variable still in the list of predictors can be predicted with an \eqn{R^2} or adjusted \eqn{R^2} of at least \code{r2} or until dropping the variable with the highest \eqn{R^2} (adjusted or ordinary) would cause a variable that was dropped earlier to no longer be predicted at least at the \code{r2} level from the now smaller list of predictors. } \usage{ redun(formula, data=NULL, subset=NULL, r2 = 0.9, type = c("ordinary", "adjusted"), nk = 3, tlinear = TRUE, allcat=FALSE, minfreq=0, iterms=FALSE, pc=FALSE, pr = FALSE, ...) \method{print}{redun}(x, digits=3, long=TRUE, ...) } \arguments{ \item{formula}{a formula. Enclose a variable in \code{I()} to force linearity.} \item{data}{a data frame} \item{subset}{usual subsetting expression} \item{r2}{ordinary or adjusted \eqn{R^2} cutoff for redundancy} \item{type}{specify \code{"adjusted"} to use adjusted \eqn{R^2}} \item{nk}{number of knots to use for continuous variables. Use \code{nk=0} to force linearity for all variables.} \item{tlinear}{set to \code{FALSE} to allow a variable to be automatically nonlinearly transformed (see \code{areg}) while being predicted. By default, only continuous variables on the right hand side (i.e., while they are being predictors) are automatically transformed, using regression splines. Estimating transformations for target (dependent) variables causes more overfitting than doing so for predictors.} \item{allcat}{set to \code{TRUE} to ensure that all categories of categorical variables having more than two categories are redundant (see details below)} \item{minfreq}{For a binary or categorical variable, there must be at least two categories with at least \code{minfreq} observations or the variable will be dropped and not checked for redundancy against other variables. \code{minfreq} also specifies the minimum frequency of a category or its complement before that category is considered when \code{allcat=TRUE}.} \item{iterms}{set to \code{TRUE} to consider derived terms (dummy variables and nonlinear spline components) as separate variables. This will perform a redundancy analysis on pieces of the variables.} \item{pc}{if \code{iterms=TRUE} you can set \code{pc} to \code{TRUE} to replace the submatrix of terms corresponding to each variable with the orthogonal principal components before doing the redundancy analysis. The components are based on the correlation matrix.} \item{pr}{set to \code{TRUE} to monitor progress of the stepwise algorithm} \item{\dots}{arguments to pass to \code{dataframeReduce} to remove "difficult" variables from \code{data} if \code{formula} is \code{~.} to use all variables in \code{data} (\code{data} must be specified when these arguments are used). Ignored for \code{print}.} \item{x}{an object created by \code{redun}} \item{digits}{number of digits to which to round \eqn{R^2} values when printing} \item{long}{set to \code{FALSE} to prevent the \code{print} method from printing the \eqn{R^2} history and the original \eqn{R^2} with which each variable can be predicted from ALL other variables.} } \value{an object of class \code{"redun"}} \details{ A categorical variable is deemed redundant if a linear combination of dummy variables representing it can be predicted from a linear combination of other variables. For example, if there were 4 cities in the data and each city's rainfall was also present as a variable, with virtually the same rainfall reported for all observations for a city, city would be redundant given rainfall (or vice-versa; the one declared redundant would be the first one in the formula). If two cities had the same rainfall, \code{city} might be declared redundant even though tied cities might be deemed non-redundant in another setting. To ensure that all categories may be predicted well from other variables, use the \code{allcat} option. To ignore categories that are too infrequent or too frequent, set \code{minfreq} to a nonzero integer. When the number of observations in the category is below this number or the number of observations not in the category is below this number, no attempt is made to predict observations being in that category individually for the purpose of redundancy detection.} \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{\code{\link{areg}}, \code{\link{dataframeReduce}}, \code{\link{transcan}}, \code{\link{varclus}}, \code{subselect::genetic}} \examples{ set.seed(1) n <- 100 x1 <- runif(n) x2 <- runif(n) x3 <- x1 + x2 + runif(n)/10 x4 <- x1 + x2 + x3 + runif(n)/10 x5 <- factor(sample(c('a','b','c'),n,replace=TRUE)) x6 <- 1*(x5=='a' | x5=='c') redun(~x1+x2+x3+x4+x5+x6, r2=.8) redun(~x1+x2+x3+x4+x5+x6, r2=.8, minfreq=40) redun(~x1+x2+x3+x4+x5+x6, r2=.8, allcat=TRUE) # x5 is no longer redundant but x6 is } \keyword{smooth} \keyword{regression} \keyword{multivariate} \keyword{methods} \keyword{models} \concept{data reduction} Hmisc/man/list.tree.Rd0000644000176200001440000000365412243661443014305 0ustar liggesusers\name{list.tree} \alias{list.tree} \title{ Pretty-print the Structure of a Data Object } \description{ This is a function to pretty-print the structure of any data object (usually a list). It is similar to the R function \code{str}. } \usage{ list.tree(struct, depth=-1, numbers=FALSE, maxlen=22, maxcomp=12, attr.print=TRUE, front="", fill=". ", name.of, size=TRUE) } \arguments{ \item{struct}{ The object to be displayed } \item{depth}{ Maximum depth of recursion (of lists within lists \dots) to be printed; negative value means no limit on depth. } \item{numbers}{ If TRUE, use numbers in leader instead of dots to represent position in structure. } \item{maxlen}{ Approximate maximum length (in characters) allowed on each line to give the first few values of a vector. maxlen=0 suppresses printing any values. } \item{maxcomp}{ Maximum number of components of any list that will be described. } \item{attr.print}{ Logical flag, determining whether a description of attributes will be printed. } \item{front}{ Front material of a line, for internal use. } \item{fill}{ Fill character used for each level of indentation. } \item{name.of}{ Name of object, for internal use (deparsed version of struct by default). } \item{size}{ Logical flag, should the size of the object in bytes be printed? A description of the structure of struct will be printed in outline form, with indentation for each level of recursion, showing the internal storage mode, length, class(es) if any, attributes, and first few elements of each data vector. By default each level of list recursion is indicated by a "." and attributes by "A". }} \seealso{ \code{\link{str}} } \examples{ X <- list(a=ordered(c(1:30,30:1)),b=c("Rick","John","Allan"), c=diag(300),e=cbind(p=1008:1019,q=4)) list.tree(X) # In R you can say str(X) } \author{ Alan Zaslavsky, \email{zaslavsk@hcp.med.harvard.edu} } \keyword{documentation} % Converted by Sd2Rd version 1.21. Hmisc/man/capitalize.Rd0000644000176200001440000000126412243661443014514 0ustar liggesusers\name{capitalize} \alias{capitalize} %- Also NEED an '\alias' for EACH other topic documented here. \title{ capitalize the first letter of a string} \description{ Capitalizes the first letter of each element of the string vector. } \usage{ capitalize(string) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{string}{ String to be capitalized } } \value{ Returns a vector of charaters with the first letter capitalized } \author{ Charles Dupont } \examples{ capitalize(c("Hello", "bob", "daN")) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ manip } \keyword{ character }% __ONLY ONE__ keyword per line Hmisc/man/windows/0000755000176200001440000000000014112734310013556 5ustar liggesusersHmisc/man/windows/sas.get.Rd0000644000176200001440000004646714112734310015432 0ustar liggesusers\name{sas.get} \alias{sas.get} \alias{is.special.miss} \alias{[.special.miss} \alias{print.special.miss} \alias{format.special.miss} \alias{sas.codes} \alias{code.levels} \title{Convert a SAS Dataset to an S Data Frame} \description{ Converts a \acronym{SAS} dataset into an S data frame. You may choose to extract only a subset of variables or a subset of observations in the \acronym{SAS} dataset. The function will automatically convert \preformatted{PROC FORMAT}-coded variables to factor objects. The original \acronym{SAS} codes are stored in an attribute called \code{sas.codes} and these may be added back to the \code{levels} of a \code{factor} variable using the \code{code.levels} function. Information about special missing values may be captured in an attribute of each variable having special missing values. This attribute is called \code{special.miss}, and such variables are given class \code{special.miss}. There are \code{print}, \code{[]}, \code{format}, and \code{is.special.miss} methods for such variables. date, time, and date-time variables use respectively \code{\link{Dates}}, \code{\link{DateTimeClasses}}, and \code{\link[chron]{chron}} variables. If using S-Plus 5 or 6 or later, the \code{timeDate} function is used instead. If a date variable represents a partial date (0.5 added if month missing, 0.25 added if day missing, 0.75 if both), an attribute \code{partial.date} is added to the variable, and the variable also becomes a class \code{imputed} variable. The \code{describe} function uses information about partial dates and special missing values. There is an option to automatically \command{PKUNZIP} compressed \acronym{SAS} datasets. \code{sas.get} works by composing and running a \acronym{SAS} job that creates various \acronym{ASCII} files that are read and analyzed by \code{sas.get}. You can also run the \acronym{SAS} \code{sas_get} macro, which writes the \acronym{ASCII} files for downloading, in a separate step or on another computer, and then tell \code{sas.get} (through the \code{sasout} argument) to access these files instead of running \acronym{SAS}. } \usage{ sas.get(libraryName, member, variables=character(0), ifs=character(0), format.library=libraryName, id, dates.=c("sas","yymmdd","yearfrac","yearfrac2"), keep.log=TRUE, log.file="_temp_.log", macro=sas.get.macro, data.frame.out=existsFunction("data.frame"), clean.up=FALSE, quiet=FALSE, temp=tempfile("SaS"), formats=TRUE, recode=formats, special.miss=FALSE, sasprog="sas", as.is=.5, check.unique.id=TRUE, force.single=FALSE, pos, uncompress=FALSE, defaultencoding="latin1", var.case="lower") is.special.miss(x, code) \method{[}{special.miss}(x, \dots, drop=FALSE) \method{print}{special.miss}(x, \dots) \method{format}{special.miss}(x, \dots) sas.codes(object) code.levels(object) } \arguments{ \item{libraryName}{ character string naming the directory in which the dataset is kept. The default is \code{libraryName = "."}, indicating that the current directory is to be used. } \item{member}{ character string giving the second part of the two part \acronym{SAS} dataset name. (The first part is irrelevant here - it is mapped to the directory name.) } \item{x}{ a variable that may have been created by \code{sas.get} with \code{special.miss=TRUE} or with \code{recode} in effect. } \item{variables}{ vector of character strings naming the variables in the \acronym{SAS} dataset. The resulting data frame will contain only those variables from the \acronym{SAS} dataset. To get all of the variables (the default), an empty string may be given. It is a fatal error if any one of the variables is not in the \acronym{SAS} dataset. If you have retrieved a subset of the variables in the \acronym{SAS} dataset and which to retrieve the same list of variables from another dataset, you can program the value of \code{variables} - see one of the last examples. } \item{ifs}{ a vector of character strings, each containing one \acronym{SAS} \dQuote{subsetting if} statement. These will be used to extract a subset of the observations in the \acronym{SAS} dataset. } \item{format.library}{ The directory containing the file \file{formats.sc2}, which contains the definitions of the user defined formats used in this dataset. By default, we look for the formats in the same directory as the data. The user defined formats must be available (so \acronym{SAS} can read the data). } \item{formats}{ Set \code{formats} to \code{FALSE} to keep \code{sas.get} from telling the \acronym{SAS} macro to retrieve value label formats from \code{format.library}. When you do not specify \code{formats} or \code{recode}, \code{sas.get} will set \code{format} to \code{TRUE} if a \acronym{SAS} format catalog (\file{.sct} or \file{.sc2}) file exists in \code{format.library}. \code{sas.get} stores \acronym{SAS} \preformatted{PROC FORMAT VALUE} definitions as the \code{formats} attribute of the returned object (see below). A format is used if it is referred to by one or more variables in the dataset, if it contains no ranges of values (i.e., it identifies value labels for single values), and if it is a character format or a numeric format that is not used just to label missing values. To fetch the values and labels for variable \code{x} in the dataset \code{d} you could type: \cr \code{f <- attr(d\$x, "format")} \cr \code{formats <- attr(d, "formats")} \cr \code{formats\$f\$values; formats\$f\$labels} } \item{recode}{ This parameter defaults to \code{TRUE} if \code{formats} is \code{TRUE}. If it is \code{TRUE}, variables that have an appropriate format (see above) are recoded as \code{factor} objects, which map the values to the value labels for the format. Alternatively, set \code{recode} to 1 to use labels of the form value:label, e.g. 1:good 2:better 3:best. Set \code{recode} to 2 to use labels such as good(1) better(2) best(3). Since \code{sas.codes} and \code{code.levels} add flexibility, the usual choice for \code{recode} is \code{TRUE}. } \item{drop}{ logical. If \code{TRUE} the result is coerced to the lowest possible dimension. } \item{special.miss}{ For numeric variables, any missing values are stored as NA in S. You can recover special missing values by setting \code{special.miss} to \code{TRUE}. This will cause the \code{special.miss} attribute and the \code{special.miss} class to be added to each variable that has at least one special missing value. Suppose that variable \code{y} was .E in observation 3 and .G in observation 544. The \code{special.miss} attribute for \code{y} then has the value \cr \code{list(codes=c("E","G"),obs=c(3,544))} \cr To fetch this information for variable \code{y} you would say for example \cr \code{s <- attr(y, "special.miss")} \cr \code{s\$codes; s\$obs} \cr or use \code{is.special.miss(x)} or the \code{print.special.miss} method, which will replace \code{NA} values for the variable with \samp{E} or \samp{G} if they correspond to special missing values. The describe function uses this information in printing a data summary. } \item{id}{ The name of the variable to be used as the row names of the S dataset. The id variable becomes the \code{row.names} attribute of a data frame, but the id variable is still retained as a variable in the data frame. You can also specify a vector of variable names as the \code{id} parameter. After fetching the data from \acronym{SAS}, all these variables will be converted to character format and concatenated (with a space as a separator) to form a (hopefully) unique identification variable. } \item{dates.}{ specifies the format for storing \acronym{SAS} dates in the resulting data frame. } \item{as.is}{ \acronym{SAS} character variables are converted to S factor objects if \code{as.is=FALSE} or if \code{as.is} is a number between 0 and 1 inclusive and the number of unique values of the variable is less than the number of observations (\code{n}) times \code{as.is}. The default if \code{as.is} is 0.5, so character variables are converted to factors only if they have fewer than \code{n/2} unique values. The primary purpose of this is to keep unique identification variables as character values in the data frame instead of using more space to store both the integer factor codes and the factor labels. } \item{check.unique.id}{ If \code{id} is specified, the row names are checked for uniqueness if \code{check.unique.id = TRUE}. If any are duplicated, a warning is printed. Note that if a data frame is being created with duplicate row names, statements such as \code{my.data.frame["B23",]} will retrieve only the first row with a row name of \samp{B23}. } \item{force.single}{ By default, \acronym{SAS} numeric variables having \eqn{LENGTH > 4} are stored as S double precision numerics, which allow for the same precision as a \acronym{SAS} \preformatted{LENGTH} 8 variable. Set \code{force.single = TRUE} to store every numeric variable in single precision (7 digits of precision). This option is useful when the creator of the \acronym{SAS} dataset has failed to use a \preformatted{LENGTH} statement. R does not have single precision, so no attempt is made to convert to single if running \R. } \item{keep.log}{ logical: if \code{FALSE}, delete the \acronym{SAS} log file upon completion. } \item{log.file}{ the name of the \acronym{SAS} log file. } \item{macro}{ the name of an S object in the current search path that contains the text of the \acronym{SAS} macro called by S. The S object is a character vector that can be edited using, for example, \code{sas.get.macro <- editor(sas.get.macro)}. } \item{data.frame.out}{ set to \code{FALSE} to make the result a list instead of a data frame } \item{clean.up}{ logical flag: if \code{TRUE}, remove all temporary files when finished. You may want to keep these while debugging the \acronym{SAS} macro. Not needed for \R. } \item{quiet}{ logical flag: if \code{FALSE}, print the contents of the \acronym{SAS} log file if there has been an error. } \item{temp}{ the prefix to use for the temporary files. Two characters will be added to this, the resulting name must fit on your file system. } \item{sasprog}{ the name of the system command to invoke \acronym{SAS} } \item{uncompress}{ set to \code{FALSE} by default. Set it to \code{TRUE} to automatically invoke the DOS \command{PKUNZIP} command if \file{\var{member}.zip} exists, to uncompress the \acronym{SAS} dataset before proceeding. This assumes you have the file permissions to allow uncompressing in place. If the file is already uncompressed, this option is ignored. } \item{pos}{ by default, a list or data frame which contains all the variables is returned. If you specify \code{pos}, each individual variable is placed into a separate object (whose name is the name of the variable) using the \code{assign} function with the \code{pos} argument. For example, you can put each variable in its own file in a directory, which in some cases may save memory over attaching a data frame. } \item{code}{ a special missing value code (\samp{A} through \samp{Z} or \samp{\_}) to check against. If \code{code} is omitted, \code{is.special.miss} will return a \code{TRUE} for each observation that has any special missing value. } \item{defaultencoding}{ encoding to assume if the SAS dataset does not specify one. Defaults to "latin1". } \item{var.case}{ specify the case that you want variable names to be in. "lower" for lower case, "upper" for upper case, and "preserve" to retain the case from SAS. } \item{object}{ a variable in a data frame created by \code{sas.get} } \item{\dots}{ignored} } \value{ A data frame resembling the \acronym{SAS} dataset. If \code{id} was specified, that column of the data frame will be used as the row names of the data frame. Each variable in the data frame or vector in the list will have the attributes \code{label} and \code{format} containing \acronym{SAS} labels and formats. Underscores in formats are converted to periods. Formats for character variables have \samp{\$} placed in front of their names. If \code{formats} is \code{TRUE} and there are any appropriate format definitions in \code{format.library}, the returned object will have attribute \code{formats} containing lists named the same as the format names (with periods substituted for underscores and character formats prefixed by \samp{\$}). Each of these lists has a vector called \code{values} and one called \code{labels} with the \preformatted{PROC FORMAT; VALUE} \code{\dots} definitions. } \section{Side Effects}{ if a \acronym{SAS} error occurs the \acronym{SAS} log file will be printed under the control of the \code{pager} function. } \details{ If you specify \code{special.miss = TRUE} and there are no special missing values in the data \acronym{SAS} dataset, the \acronym{SAS} step will bomb. For variables having a \preformatted{PROC FORMAT VALUE} format with some of the levels undefined, \code{sas.get} will interpret those values as \code{NA} if you are using \code{recode}. If you leave the \code{sasprog} argument at its default value of \samp{sas}, be sure that the \acronym{SAS} executable is in the \file{PATH} specified in your \file{autoexec.bat} file. Also make sure that you invoke S so that your current project directory is known to be the current working directory. This is best done by creating a shortcut in Windows95, for which the command to execute will be something like \command{drive:\\spluswin\\cmd\\splus.exe HOME=.} and the program is flagged to start in \file{drive:\\myproject} for example. In this way, you will be able to examine the \acronym{SAS} log file easily since it will be placed in \file{drive:\\myproject} by default. \acronym{SAS} will create \samp{SASWORK} and \samp{SASUSER} directories in what it thinks are the current working directories. To specify where \acronym{SAS} should put these instead, edit the \file{config.sas} file or specify a \code{sasprog} argument of the following form: \code{sasprog="\\sas\\sas.exe -saswork c:\\saswork -sasuser c:\\sasuser"}. When \code{sas.get} needs to run \acronym{SAS} it is run in iconized form. The \acronym{SAS} macro \file{sas\_get} uses record lengths of up to 4096 in two places. If you are exporting records that are very long (because of a large number of variables and/or long character variables), you may want to edit these \samp{LRECL}s to quadruple them, for example. } \note{ If \code{sasout} is not given, you must be able to run \acronym{SAS} on your system. If you are reading time or date-time variables, you will need to execute the command \code{library(chron)} to print those variables or the data frame. } \section{BACKGROUND}{ The references cited below explain the structure of \acronym{SAS} datasets and how they are stored. See \emph{\acronym{SAS} Language} for a discussion of the \preformatted{subsetting if} statement. } \author{ Terry Therneau, Mayo Clinic \cr Frank Harrell, Vanderbilt University \cr Bill Dunlap, University of Washington and Insightful Corp. \cr Michael W. Kattan, Cleveland Clinic Foundation \cr Reinhold Koch (encoding) } \references{ \acronym{SAS} Institute Inc. (1990). \emph{\acronym{SAS} Language: Reference, Version 6.} First Edition. \acronym{SAS} Institute Inc., Cary, North Carolina. \acronym{SAS} Institute Inc. (1988). \acronym{SAS} Technical Report P-176, \emph{Using the \acronym{SAS} System, Release 6.03, under UNIX Operating Systems and Derivatives. } \acronym{SAS} Institute Inc., Cary, North Carolina. \acronym{SAS} Institute Inc. (1985). \emph{\acronym{SAS} Introductory Guide.} Third Edition. \acronym{SAS} Institute Inc., Cary, North Carolina. } \seealso{ \code{\link{data.frame}}, \code{\link[Hmisc]{describe}}, \code{\link[Hmisc]{label}}, \code{\link[Hmisc]{upData}} } \examples{ \dontrun{ mice <- sas.get("saslib", mem="mice", var=c("dose", "strain", "ld50")) plot(mice$dose, mice$ld50) nude.mice <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", ifs="if strain='nude'") nude.mice.dl <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", var=c("dose", "ld50"), ifs="if strain='nude'") # Get a dataset from current directory, recode PROC FORMAT; VALUE \dots # variables into factors with labels of the form "good(1)" "better(2)", # get special missing values, recode missing codes .D and .R into new # factor levels "Don't know" and "Refused to answer" for variable q1 d <- sas.get(mem="mydata", recode=2, special.miss=TRUE) attach(d) nl <- length(levels(q1)) lev <- c(levels(q1), "Don't know", "Refused") q1.new <- as.integer(q1) q1.new[is.special.miss(q1,"D")] <- nl+1 q1.new[is.special.miss(q1,"R")] <- nl+2 q1.new <- factor(q1.new, 1:(nl+2), lev) # Note: would like to use factor() in place of as.integer ... but # factor in this case adds "NA" as a category level d <- sas.get(mem="mydata") sas.codes(d$x) # for PROC FORMATted variables returns original data codes d$x <- code.levels(d$x) # or attach(d); x <- code.levels(x) # This makes levels such as "good" "better" "best" into e.g. # "1:good" "2:better" "3:best", if the original SAS values were 1,2,3 # For the following example, suppose that SAS is run on a # different machine from the one on which S is run. # The sas_get macro is used to create files needed by # sas.get. To make a text file containing the sas_get macro # run the following S command, for example: # cat(sas.get.macro, file='/sasmacro/sas_get.sas', sep='\n') # Here is the SAS job. This job assumes that you put # sas_get.sas in an autocall macro library. # libname db '/my/sasdata/area'; # \%sas_get(db.mydata, dict, data, formats, specmiss, # formats=1, specmiss=1) # Substitute whatever file names you may want. # Next the 4 files are moved to the S machine (using # ASCII file transfer mode) and the following S # program is run: mydata <- sas.get(sasout=c('dict','data','formats','specmiss'), id='idvar') # If PKZIP is run after \%sas_get, e.g. "PKZIP port dict data formats" # (assuming that specmiss was not used here), use mydata <- sas.get(sasout='a:port', id='idvar') # which will run PKUNZIP port to unzip a:port.zip, creating the # dict, data, and formats files which are generated (and later # deleted) by sas.get # Retrieve the same variables from another dataset (or an update of # the original dataset) mydata2 <- sas.get('mydata2', var=names(mydata)) # This only works if none of the original SAS variable names contained _ # Code from Don MacQueen to generate SAS dataset to test import of # date, time, date-time variables # data ssd.test; # d1='3mar2002'd ; # dt1='3mar2002 9:31:02'dt; # t1='11:13:45't; # output; # # d1='3jun2002'd ; # dt1='3jun2002 9:42:07'dt; # t1='11:14:13't; # output; # format d1 mmddyy10. dt1 datetime. t1 time.; # run; } } \keyword{interface} \keyword{manip} Hmisc/man/tabulr.Rd0000644000176200001440000001651712742737077013702 0ustar liggesusers\name{tabulr} \alias{tabulr} \alias{table_trio} \alias{table_N} \alias{table_freq} \alias{table_pc} \alias{table_latexdefs} \alias{table_formatpct} \alias{nFm} \title{Interface to Tabular Function} \description{ \code{\link{tabulr}} is a front-end to the \code{tables} package's \code{\link[tables]{tabular}} function so that the user can take advantage of variable annotations used by the \code{Hmisc} package, particular those created by the \code{\link{label}}, \code{\link{units}}, and \code{\link{upData}} functions. When a variable appears in a \code{\link[tables]{tabular}} function, the variable \code{x} is found in the \code{data} argument or in the parent environment, and the \code{\link{labelLatex}} function is used to create a LaTeX label. By default any units of measurement are right justified in the current LaTeX tabular field using \code{hfill}; use \code{nofill} to list variables for which \code{units} are not right-justified with \code{hfill}. Once the label is constructed, the variable name is preceeded by \code{Heading("LaTeX label")*x} in the formula before it is passed to \code{\link[tables]{tabular}}. \code{nolabel} can be used to specify variables for which labels are ignored. \code{tabulr} also replaces \code{trio} with \code{table_trio}, \code{N} with \code{table_N}, and \code{freq} with \code{table_freq} in the formula. \code{table_trio} is a function that takes a numeric vector and computes the three quartiles and optionally the mean and standard deviation, and outputs a LaTeX-formatted character string representing the results. By default, calculated statistics are formatted with 3 digits to the left and 1 digit to the right of the decimal point. Running \code{\link[tables]{table_options}(left=l, right=r)} will use \code{l} and \code{r} digits instead. Other options that can be given to \code{table_options} are \code{prmsd=TRUE} to add mean +/- standard deviation to the result, \code{pn=TRUE} to add the sample size, \code{bold=TRUE} to set the median in bold face, \code{showfreq='all', 'low', 'high'} used by the \code{table_freq} function, \code{pctdec}, specifying the number of places to the right of the decimal point for percentages (default is zero), and \code{npct='both','numerator','denominator','none'} used by \code{table_formatpct} to control what appears after the percent. Option \code{pnformat} may be specified to control the formatting for \code{pn}. The default is \code{"(n=..)"}. Specify \code{pnformat="non"} to suppress \code{"n="}. \code{pnwhen} specifies when to print the number of observations. The default is \code{"always"}. Specify \code{pnwhen="ifna"} to include \code{n} only if there are missing values in the vector being processed. \code{tabulr} substitutes \code{table_N} for \code{N} in the formula. This is used to create column headings for the number of observations, without a row label. \code{table_freq} analyzes a character variable to compute, for a single output cell, the percents, numerator, and denominator for each category, or optimally just the maximum or minimum, as specified by \code{table_options(showfreq)}. \code{table_formatpct} is a function that formats percents depending on settings of options in \code{table_options}. \code{nFm} is a function that calls \code{\link{sprintf}} to format numeric values to have a specific number of digits to the \code{left} and to the \code{right} of the point. \code{table_latexdefs} writes (by default) to the console a set of LaTeX definitions that can be invoked at any point thereafter in a \code{knitr} or \code{sweave} document by naming the macro, preceeded by a single slash. The \code{blfootnote} macro is called with a single LaTeX argument which will appear as a footnote without a number. \code{keytrio} invokes \code{blfootnote} to define the output of \code{table_trio} if mean and SD are not included. If mean and SD are included, use \code{keytriomsd}. } \usage{ tabulr(formula, data = NULL, nolabel=NULL, nofill=NULL, \dots) table_trio(x) table_freq(x) table_formatpct(num, den) nFm(x, left, right, neg=FALSE, pad=FALSE, html=FALSE) table_latexdefs(file='') } \arguments{ \item{formula}{a formula suitable for \code{\link[tables]{tabular}} except for the addition of \code{.(variable name)}, \code{.n()}, \code{trio}.} \item{data}{a data frame or list. If omitted, the parent environment is assumed to contain the variables.} \item{nolabel}{a formula such as \code{~ x1 + x2} containing the list of variables for which labels are to be ignored, forcing use of the variable name} \item{nofill}{a formula such as \code{~ x1 + x2} contaning the list of variables for which units of measurement are not to be right-justified in the field using the LaTeX \code{hfill} directive} \item{\dots}{other arguments to \code{tabular}} \item{x}{a numeric vector} \item{num}{a single numerator or vector of numerators} \item{den}{a single denominator} \item{left, right}{number of places to the left and right of the decimal point, respectively} \item{neg}{set to \code{TRUE} if negative \code{x} values are allowed, to add one more space to the left of the decimal place} \item{pad}{set to \code{TRUE} to replace blanks with the LaTeX tilde placeholder} \item{html}{set to \code{TRUE} to make \code{pad} use an HTML space character instead of a LaTeX tilde space} \item{file}{location of output of \code{table_latexdefs}} } \value{\code{tabulr} returns an object of class \code{"tabular"}} \author{Frank Harrell} \seealso{\code{\link[tables]{tabular}}, \code{\link{label}}, \code{\link{latex}}, \code{\link{summaryM}}} \examples{ \dontrun{ n <- 400 set.seed(1) d <- data.frame(country=factor(sample(c('US','Canada','Mexico'), n, TRUE)), sex=factor(sample(c('Female','Male'), n, TRUE)), age=rnorm(n, 50, 10), sbp=rnorm(n, 120, 8)) d <- upData(d, preghx=ifelse(sex=='Female', sample(c('No','Yes'), n, TRUE), NA), labels=c(sbp='Systolic BP', age='Age', preghx='Pregnancy History'), units=c(sbp='mmHg', age='years')) contents(d) require(tables) invisible(booktabs()) # use booktabs LaTeX style for tabular g <- function(x) { x <- x[!is.na(x)] if(length(x) == 0) return('') paste(latexNumeric(nFm(mean(x), 3, 1)), ' \\hfill{\\smaller[2](', length(x), ')}', sep='') } tab <- tabulr((age + Heading('Females')*(sex == 'Female')*sbp)* Heading()*g + (age + sbp)*Heading()*trio ~ Heading()*country*Heading()*sex, data=d) # Formula after interpretation by tabulr: # (Heading('Age\\hfill {\\smaller[2] years}') * age + Heading("Females") # * (sex == "Female") * Heading('Systolic BP {\\smaller[2] mmHg}') * sbp) # * Heading() * g + (age + sbp) * Heading() * table_trio ~ Heading() # * country * Heading() * sex cat('\\begin{landscape}\n') cat('\\begin{minipage}{\\textwidth}\n') cat('\\keytrio\n') latex(tab) cat('\\end{minipage}\\end{landscape}\n') getHdata(pbc) pbc <- upData(pbc, moveUnits=TRUE) # Convert to character to prevent tabular from stratifying for(x in c('sex', 'stage', 'spiders')) { pbc[[x]] <- as.character(pbc[[x]]) label(pbc[[x]]) <- paste(toupper(substring(x, 1, 1)), substring(x, 2), sep='') } table_options(pn=TRUE, showfreq='all') tab <- tabulr((bili + albumin + protime + age) * Heading()*trio + (sex + stage + spiders)*Heading()*freq ~ drug, data=pbc) latex(tab) } } \keyword{utilities} \keyword{interface} Hmisc/man/html.Rd0000644000176200001440000001524613714234051013333 0ustar liggesusers\name{html} \alias{html} \alias{html.latex} \alias{html.data.frame} \alias{html.default} \alias{htmlVerbatim} \title{Convert an S object to HTML} \description{ \code{html} is a generic function, for which only two methods are currently implemented, \code{html.latex} and a rudimentary \code{html.data.frame}. The former uses the \code{HeVeA} LaTeX to HTML translator by Maranget to create an HTML file from a LaTeX file like the one produced by \code{latex}. \code{html.default} just runs \code{html.data.frame}. \code{htmlVerbatim} prints all of its arguments to the console in an html verbatim environment, using a specified percent of the prevailing character size. This is useful for R Markdown with \code{knitr}. Most of the html-producing functions in the Hmisc and rms packages return a character vector passed through \code{htmltools::HTML} so that \code{kintr} will correctly format the result without the need for the user putting \code{results='asis'} in the chunk header. } \usage{ html(object, \dots) \method{html}{latex}(object, file, where=c('cwd', 'tmp'), method=c('hevea', 'htlatex'), rmarkdown=FALSE, cleanup=TRUE, \dots) \method{html}{data.frame}(object, file=paste(first.word(deparse(substitute(object))),'html',sep='.'), header, caption=NULL, rownames=FALSE, align='r', align.header='c', bold.header=TRUE, col.header='Black', border=2, width=NULL, size=100, translate=FALSE, append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), \dots) \method{html}{default}(object, file=paste(first.word(deparse(substitute(object))),'html',sep='.'), append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), \dots) htmlVerbatim(\dots, size=75, width=85, scroll=FALSE, rows=10, cols=100, propts=NULL, omit1b=FALSE) } \arguments{ \item{object}{a data frame or an object created by \code{latex}. For the generic \code{html} is any object for which an \code{html} method exists.} \item{file}{ name of the file to create. The default file name is \code{object.html} where \code{object} is the first word in the name of the argument for \code{object}. For \code{html.latex} specify \code{file=''} or \code{file=character(0)} to print html code to the console, as when using \code{knitr}. For the \code{data.frame} method, \code{file} may be set to \code{FALSE} which causes a character vector enclosed in \code{htmltools::HTML} to be returned instead of writing to the console. } \item{where}{for \code{html}. Default is to put output files in current working directory. Specify \code{where='tmp'} to put in a system temporary directory area.} \item{method}{default is to use system command \code{hevea} to convert from LaTeX to html. Specify\code{method='htlatex'} to use system command \code{htlatex}, assuming the system package \code{TeX4ht} is installed.} \item{rmarkdown}{set to \code{TRUE} if using RMarkdown (usually under \code{knitr} and RStudio). This causes html to be packaged for RMarkdown and output to go into the console stream. \code{file} is ignored when \code{rmarkdown=TRUE}.} \item{cleanup}{if using \code{method='htlatex'} set to \code{FALSE} if \code{where='cwd'} to prevent deletion of auxiliary files created by \code{htlatex} that are not needed when using the final \code{html} document (only the \code{.css} file is needed in addition to \code{.html}). If using \code{method='hevea'}, \code{cleanup=TRUE} causes deletion of the generated \code{.haux} file.} \item{header}{vector of column names. Defaults to names in \code{object}. Set to \code{NULL} to suppress column names.} \item{caption}{a character string to be used as a caption before the table} \item{rownames}{set to \code{FALSE} to ignore row names even if they are present} \item{align}{alignment for table columns (all are assumed to have the same if is a scalar). Specify \code{"c", "r", "l"} for center, right, or left alignment.} \item{align.header}{same coding as for \code{align} but pertains to header} \item{bold.header}{set to \code{FALSE} to not bold face column headers} \item{col.header}{color for column headers} \item{border}{set to 0 to not include table cell borders, 1 to include only outer borders, or 2 (the default) to put borders around cells too} \item{translate}{set to \code{TRUE} to run header and table cell text through the \code{htmlTranslate} function} \item{width}{optional table width for \code{html.data.frame}. For full page width use \code{width="100\%"}, for use in \code{options()} for printing objects.} \item{size}{a number between 0 and 100 representing the percent of the prevailing character size to be used by \code{htmlVerbatim} and the data frame method.} \item{append}{set to \code{TRUE} to append to an existing file} \item{link}{character vector specifying hyperlink names to attach to selected elements of the matrix or data frame. No hyperlinks are used if \code{link} is omitted or for elements of \code{link} that are \code{""}. To allow multiple links per link, \code{link} may also be a character matrix shaped as \code{object} in which case \code{linkCol} is ignored.} \item{linkCol}{column number of \code{object} to which hyperlinks are attached. Defaults to first column.} \item{linkType}{defaults to \code{"href"}} \item{\dots}{ignored except for \code{htmlVerbatim} - is a list of objects to \code{print()}} \item{scroll}{set to \code{TRUE} to put the html in a scrollable \code{textarea}} \item{rows,cols}{the number of rows and columns to devote to the visable part of the scrollable box} \item{propts}{options, besides \code{quote=FALSE} to pass to the \code{print} method, for \code{htmlVerbatim}} \item{omit1b}{for \code{htmlVerbatim} if \code{TRUE} causes an initial line of output that is all blank to be deleted} } \author{ Frank E. Harrell, Jr. \cr Department of Biostatistics, \cr Vanderbilt University, \cr \email{fh@fharrell.com} } \references{ Maranget, Luc. HeVeA: a LaTeX to HTML translater. URL: http://para.inria.fr/~maranget/hevea/ } \seealso{ \code{\link{latex}} } \examples{ \dontrun{ x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','e'))) w <- latex(x) h <- html(w) # run HeVeA to convert .tex to .html h <- html(x) # convert x directly to html w <- html(x, link=c('','B')) # hyperlink first row first col to B # Assuming system package tex4ht is installed, easily convert advanced # LaTeX tables to html getHdata(pbc) s <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc, test=TRUE) w <- latex(s, npct='slash', file='s.tex') z <- html(w) browseURL(z$file) d <- describe(pbc) w <- latex(d, file='d.tex') z <- html(w) browseURL(z$file) } } \keyword{utilities} \keyword{interface} \keyword{methods} \keyword{file} \keyword{character} \keyword{manip} Hmisc/man/soprobMarkovOrd.Rd0000644000176200001440000000451314014740471015515 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simMarkovOrd.r \name{soprobMarkovOrd} \alias{soprobMarkovOrd} \title{soprobMarkovOrd} \usage{ soprobMarkovOrd(y, times, initial, absorb = NULL, intercepts, g, ...) } \arguments{ \item{y}{a vector of possible y values in order (numeric, character, factor)} \item{times}{vector of measurement times} \item{initial}{initial value of \code{y} (baseline state; numeric, character, factr)} \item{absorb}{vector of absorbing states, a subset of \code{y}. The default is no absorbing states. (numeric, character, factor)} \item{intercepts}{vector of intercepts in the proportional odds model, with length one less than the length of \code{y}} \item{g}{a user-specified function of three or more arguments which in order are \code{yprev} - the value of \code{y} at the previous time, the current time \code{t}, the \code{gap} between the previous time and the current time, an optional (usually named) covariate vector \code{X}, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow \code{yprev} to be a vector and \code{yprev} must not include any absorbing states. The \code{g} function returns the linear predictor for the proportional odds model aside from \code{intercepts}. The returned value must be a matrix with row names taken from \code{yprev}. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to \code{intercepts}. The different columns are used for \code{y}-specific contributions to the linear predictor (aside from \code{intercepts}) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments.} \item{...}{additional arguments to pass to \code{g} such as covariate settings} } \value{ matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities } \description{ State Occupancy Probabilities for First-Order Markov Ordinal Model } \seealso{ \url{https://hbiostat.org/R/Hmisc/markov/} } \author{ Frank Harrell } Hmisc/man/hoeffd.Rd0000644000176200001440000000620213714234051013612 0ustar liggesusers\name{hoeffd} \alias{hoeffd} \alias{print.hoeffd} \title{ Matrix of Hoeffding's D Statistics } \description{ Computes a matrix of Hoeffding's (1948) \code{D} statistics for all possible pairs of columns of a matrix. \code{D} is a measure of the distance between \code{F(x,y)} and \code{G(x)H(y)}, where \code{F(x,y)} is the joint CDF of \code{X} and \code{Y}, and \code{G} and \code{H} are marginal CDFs. Missing values are deleted in pairs rather than deleting all rows of \code{x} having any missing variables. The \code{D} statistic is robust against a wide variety of alternatives to independence, such as non-monotonic relationships. The larger the value of \code{D}, the more dependent are \code{X} and \code{Y} (for many types of dependencies). \code{D} used here is 30 times Hoeffding's original \code{D}, and ranges from -0.5 to 1.0 if there are no ties in the data. \code{print.hoeffd} prints the information derived by \code{hoeffd}. The higher the value of \code{D}, the more dependent are \code{x} and \code{y}. \code{hoeffd} also computes the mean and maximum absolute values of the difference between the joint empirical CDF and the product of the marginal empirical CDFs. } \usage{ hoeffd(x, y) \method{print}{hoeffd}(x, \dots) } \arguments{ \item{x}{ a numeric matrix with at least 5 rows and at least 2 columns (if \code{y} is absent), or an object created by \code{hoeffd} } \item{y}{ a numeric vector or matrix which will be concatenated to \code{x} } \item{\dots}{ignored} } \value{ a list with elements \code{D}, the matrix of D statistics, \code{n} the matrix of number of observations used in analyzing each pair of variables, and \code{P}, the asymptotic P-values. Pairs with fewer than 5 non-missing values have the D statistic set to NA. The diagonals of \code{n} are the number of non-NAs for the single variable corresponding to that row and column. } \details{ Uses midranks in case of ties, as described by Hollander and Wolfe. P-values are approximated by linear interpolation on the table in Hollander and Wolfe, which uses the asymptotically equivalent Blum-Kiefer-Rosenblatt statistic. For \code{P<.0001} or \code{>0.5}, \code{P} values are computed using a well-fitting linear regression function in \code{log P} vs. the test statistic. Ranks (but not bivariate ranks) are computed using efficient algorithms (see reference 3). } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr \email{fh@fharrell.com} } \references{ Hoeffding W. (1948): A non-parametric test of independence. Ann Math Stat 19:546--57. Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods, pp. 228--235, 423. New York: Wiley. Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical Recipes in C. Cambridge: Cambridge University Press. } \seealso{ \code{\link{rcorr}}, \code{\link{varclus}} } \examples{ x <- c(-2, -1, 0, 1, 2) y <- c(4, 1, 0, 1, 4) z <- c(1, 2, 3, 4, NA) q <- c(1, 2, 3, 4, 5) hoeffd(cbind(x,y,z,q)) # Hoeffding's test can detect even one-to-many dependency set.seed(1) x <- seq(-10,10,length=200) y <- x*sign(runif(200,-1,1)) plot(x,y) hoeffd(x,y) } \keyword{nonparametric} \keyword{htest} Hmisc/man/mdb.get.Rd0000644000176200001440000000476013474465251013721 0ustar liggesusers\name{mdb.get} \alias{mdb.get} \title{Read Tables in a Microsoft Access Database} \description{ Assuming the \code{mdbtools} package has been installed on your system and is in the system path, \code{mdb.get} imports one or more tables in a Microsoft Access database. Date-time variables are converted to dates or \code{chron} package date-time variables. The \code{csv.get} function is used to import automatically exported csv files. If \code{tables} is unspecified all tables in the database are retrieved. If more than one table is imported, the result is a list of data frames. } \usage{ mdb.get(file, tables=NULL, lowernames=FALSE, allow=NULL, dateformat='\%m/\%d/\%y', mdbexportArgs='-b strip', ...) } \arguments{ \item{file}{the file name containing the Access database} \item{tables}{character vector specifying the names of tables to import. Default is to import all tables. Specify \code{tables=TRUE} to return the list of available tables.} \item{lowernames}{set this to \code{TRUE} to change variable names to lower case} \item{allow}{a vector of characters allowed by \R that should not be converted to periods in variable names. By default, underscores in variable names are converted to periods as with \R before version 1.9.} \item{dateformat}{see \code{\link{cleanup.import}}. Default is the usual Access format used in the U.S.} \item{mdbexportArgs}{command line arguments to issue to mdb-export. Set to \code{''} to omit \code{'-b strip'}.} \item{\dots}{arguments to pass to \code{csv.get}} } \details{ Uses the \code{mdbtools} package executables \code{mdb-tables}, \code{mdb-schema}, and \code{mdb-export} (with by default option \code{-b strip} to drop any binary output). In Debian/Ubuntu Linux run \code{apt get install mdbtools}. \code{cleanup.import} is invoked by \code{csv.get} to transform variables and store them as efficiently as possible. } \value{a new data frame or a list of data frames} \author{Frank Harrell, Vanderbilt University} \seealso{ \code{\link{data.frame}}, \code{\link{cleanup.import}}, \code{\link{csv.get}}, \code{\link{Date}}, \code{\link[chron]{chron}} } \examples{ \dontrun{ # Read all tables in the Microsoft Access database Nwind.mdb d <- mdb.get('Nwind.mdb') contents(d) for(z in d) print(contents(z)) # Just print the names of tables in the database mdb.get('Nwind.mdb', tables=TRUE) # Import one table Orders <- mdb.get('Nwind.mdb', tables='Orders') } } \keyword{manip} \keyword{IO} \keyword{file} Hmisc/man/soprobMarkovOrdm.Rd0000644000176200001440000000347014067403332015673 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simMarkovOrd.r \name{soprobMarkovOrdm} \alias{soprobMarkovOrdm} \title{soprobMarkovOrdm} \usage{ soprobMarkovOrdm( object, data, times, ylevels, absorb = NULL, tvarname = "time", pvarname = "yprev", gap = NULL ) } \arguments{ \item{object}{a fit object created by \code{blrm}, \code{lrm}, \code{orm}, \code{VGAM::vglm()}, or \code{VGAM::vgam()}} \item{data}{a single observation list or data frame with covariate settings, including the initial state for Y} \item{times}{vector of measurement times} \item{ylevels}{a vector of ordered levels of the outcome variable (numeric or character)} \item{absorb}{vector of absorbing states, a subset of \code{ylevels}. The default is no absorbing states. (numeric, character, factor)} \item{tvarname}{name of time variable, defaulting to \code{time}} \item{pvarname}{name of previous state variable, defaulting to \code{yprev}} \item{gap}{name of time gap variable, defaults assuming that gap time is not in the model} } \value{ if \code{object} was not a Bayesian model, a matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities. If \code{object} was created by \code{blrm}, the result is a 3-dimensional array with the posterior draws as the first dimension. } \description{ State Occupancy Probabilities for First-Order Markov Ordinal Model from a Model Fit } \details{ Computes state occupancy probabilities for a single setting of baseline covariates. If the model fit was from \code{rms::blrm()}, these probabilities are from all the posterior draws of the basic model parameters. Otherwise they are maximum likelihood point estimates. } \seealso{ \url{https://hbiostat.org/R/Hmisc/markov/} } \author{ Frank Harrell } Hmisc/man/describe.Rd0000644000176200001440000004660414370566661014167 0ustar liggesusers\name{describe} \alias{describe} \alias{describe.default} \alias{describe.vector} \alias{describe.matrix} \alias{describe.formula} \alias{describe.data.frame} \alias{plot.describe} \alias{print.describe} \alias{print.describe.single} \alias{[.describe} \alias{latex.describe} \alias{latex.describe.single} \alias{html.describe} \alias{html.describe.single} \alias{formatdescribeSingle} \title{Concise Statistical Description of a Vector, Matrix, Data Frame, or Formula} \description{ \code{describe} is a generic method that invokes \code{describe.data.frame}, \code{describe.matrix}, \code{describe.vector}, or \code{describe.formula}. \code{describe.vector} is the basic function for handling a single variable. This function determines whether the variable is character, factor, category, binary, discrete numeric, and continuous numeric, and prints a concise statistical summary according to each. A numeric variable is deemed discrete if it has <= 10 distinct values. In this case, quantiles are not printed. A frequency table is printed for any non-binary variable if it has no more than 20 distinct values. For any variable for which the frequency table is not printed, the 5 lowest and highest values are printed. This behavior can be overriden for long character variables with many levels using the \code{listunique} parameter, to get a complete tabulation. \code{describe} is especially useful for describing data frames created by \code{*.get}, as labels, formats, value labels, and (in the case of \code{sas.get}) frequencies of special missing values are printed. For a binary variable, the sum (number of 1's) and mean (proportion of 1's) are printed. If the first argument is a formula, a model frame is created and passed to describe.data.frame. If a variable is of class \code{"impute"}, a count of the number of imputed values is printed. If a date variable has an attribute \code{partial.date} (this is set up by \code{sas.get}), counts of how many partial dates are actually present (missing month, missing day, missing both) are also presented. If a variable was created by the special-purpose function \code{substi} (which substitutes values of a second variable if the first variable is NA), the frequency table of substitutions is also printed. For numeric variables, \code{describe} adds an item called \code{Info} which is a relative information measure using the relative efficiency of a proportional odds/Wilcoxon test on the variable relative to the same test on a variable that has no ties. \code{Info} is related to how continuous the variable is, and ties are less harmful the more untied values there are. The formula for \code{Info} is one minus the sum of the cubes of relative frequencies of values divided by one minus the square of the reciprocal of the sample size. The lowest information comes from a variable having only one distinct value following by a highly skewed binary variable. \code{Info} is reported to two decimal places. A latex method exists for converting the \code{describe} object to a LaTeX file. For numeric variables having more than 20 distinct values, \code{describe} saves in its returned object the frequencies of 100 evenly spaced bins running from minimum observed value to the maximum. When there are less than or equal to 20 distinct values, the original values are maintained. \code{latex} and \code{html} insert a spike histogram displaying these frequency counts in the tabular material using the LaTeX picture environment. For example output see \url{https://hbiostat.org/doc/rms/book/chapter7edition1.pdf}. Note that the latex method assumes you have the following styles installed in your latex installation: setspace and relsize. The \code{html} method mimics the LaTeX output. This is useful in the context of Quarto/Rmarkdown html and html notebook output. If \code{options(prType='html')} is in effect, calling \code{print} on an object that is the result of running \code{describe} on a data frame will result in rendering the HTML version. If run from the console a browser window will open. The \code{plot} method is for \code{describe} objects run on data frames. It produces spike histograms for a graphic of continuous variables and a dot chart for categorical variables, showing category proportions. The graphic format is \code{ggplot2} if the user has not set \code{options(grType='plotly')} or has set the \code{grType} option to something other than \code{'plotly'}. Otherwise \code{plotly} graphics that are interactive are produced, and these can be placed into an Rmarkdown html notebook. The user must install the \code{plotly} package for this to work. When the use hovers the mouse over a bin for a raw data value, the actual value will pop-up (formatted using \code{digits}). When the user hovers over the minimum data value, most of the information calculated by \code{describe} will pop up. For each variable, the number of missing values is used to assign the color to the histogram or dot chart, and a legend is drawn. Color is not used if there are no missing values in any variable. For categorical variables, hovering over the leftmost point for a variable displays details, and for all points proportions, numerators, and denominators are displayed in the popup. If both continuous and categorical variables are present and \code{which='both'} is specified, the \code{plot} method returns an unclassed \code{list} containing two objects, named \code{'Categorical'} and \code{'Continuous'}, in that order. Sample weights may be specified to any of the functions, resulting in weighted means, quantiles, and frequency tables. Note: As discussed in Cox and Longton (2008), Stata Technical Bulletin 8(4) pp. 557, the term "unique" has been replaced with "distinct" in the output (but not in parameter names). When \code{weights} are not used, Gini's mean difference is computed for numeric variables. This is a robust measure of dispersion that is the mean absolute difference between any pairs of observations. In the output Gini's difference is labeled \code{Gmd}. \code{formatdescribeSingle} is a service function for \code{latex}, \code{html}, and \code{print} methods for single variables that is not intended to be called by the user. } \usage{ \method{describe}{vector}(x, descript, exclude.missing=TRUE, digits=4, listunique=0, listnchar=12, weights=NULL, normwt=FALSE, minlength=NULL, \dots) \method{describe}{matrix}(x, descript, exclude.missing=TRUE, digits=4, \dots) \method{describe}{data.frame}(x, descript, exclude.missing=TRUE, digits=4, \dots) \method{describe}{formula}(x, descript, data, subset, na.action, digits=4, weights, \dots) \method{print}{describe}(x, \dots) \method{latex}{describe}(object, title=NULL, file=paste('describe',first.word(expr=attr(object,'descript')),'tex',sep='.'), append=FALSE, size='small', tabular=TRUE, greek=TRUE, spacing=0.7, lspace=c(0,0), \dots) \method{latex}{describe.single}(object, title=NULL, vname, file, append=FALSE, size='small', tabular=TRUE, greek=TRUE, lspace=c(0,0), \dots) \method{html}{describe}(object, size=85, tabular=TRUE, greek=TRUE, scroll=FALSE, rows=25, cols=100, \dots) \method{html}{describe.single}(object, size=85, tabular=TRUE, greek=TRUE, \dots) formatdescribeSingle(x, condense=c('extremes', 'frequencies', 'both', 'none'), lang=c('plain', 'latex', 'html'), verb=0, lspace=c(0, 0), size=85, \dots) \method{plot}{describe}(x, which=c('both', 'continuous', 'categorical'), what=NULL, sort=c('ascending', 'descending', 'none'), n.unique=10, digits=5, bvspace=2, \dots) } \arguments{ \item{x}{ a data frame, matrix, vector, or formula. For a data frame, the \code{describe.data.frame} function is automatically invoked. For a matrix, \code{describe.matrix} is called. For a formula, describe.data.frame(model.frame(x)) is invoked. The formula may or may not have a response variable. For \code{print}, \code{latex}, \code{html}, or \code{formatdescribeSingle}, \code{x} is an object created by \code{describe}. } \item{descript}{ optional title to print for x. The default is the name of the argument or the "label" attributes of individual variables. When the first argument is a formula, \code{descript} defaults to a character representation of the formula. } \item{exclude.missing}{ set toTRUE to print the names of variables that contain only missing values. This list appears at the bottom of the printout, and no space is taken up for such variables in the main listing. } \item{digits}{ number of significant digits to print. For \code{plot.describe} is the number of significant digits to put in hover text for \code{plotly} when showing raw variable values.} \item{listunique}{ For a character variable that is not an \code{mChoice} variable, that has its longest string length greater than \code{listnchar}, and that has no more than \code{listunique} distinct values, all values are listed in alphabetic order. Any value having more than one occurrence has the frequency of occurrence included. Specify \code{listunique} equal to some value at least as large as the number of observations to ensure that all character variables will have all their values listed. For purposes of tabulating character strings, multiple white spaces of any kind are translated to a single space, leading and trailing white space are ignored, and case is ignored. } \item{listnchar}{see \code{listunique}} \item{weights}{ a numeric vector of frequencies or sample weights. Each observation will be treated as if it were sampled \code{weights} times. } \item{minlength}{value passed to summary.mChoice.} \item{normwt}{ The default, \code{normwt=FALSE} results in the use of \code{weights} as weights in computing various statistics. In this case the sample size is assumed to be equal to the sum of \code{weights}. Specify \code{normwt=TRUE} to divide \code{weights} by a constant so that \code{weights} sum to the number of observations (length of vectors specified to \code{describe}). In this case the number of observations is taken to be the actual number of records given to \code{describe}. } \item{object}{a result of \code{describe}} \item{title}{unused} \item{data}{ } \item{subset}{ } \item{na.action}{ These are used if a formula is specified. \code{na.action} defaults to \code{na.retain} which does not delete any \code{NA}s from the data frame. Use \code{na.action=na.omit} or \code{na.delete} to drop any observation with any \code{NA} before processing. } \item{\dots}{ arguments passed to \code{describe.default} which are passed to calls to \code{format} for numeric variables. For example if using R \code{POSIXct} or \code{Date} date/time formats, specifying \code{describe(d,format='\%d\%b\%y')} will print date/time variables as \code{"01Jan2000"}. This is useful for omitting the time component. See the help file for \code{format.POSIXct} or \code{format.Date} for more information. For \code{plot} methods, \dots is ignored. For \code{html} and \code{latex} methods, \dots is used to pass optional arguments to \code{formatdescribeSingle}, especially the \code{condense} argument. } \item{file}{ name of output file (should have a suffix of .tex). Default name is formed from the first word of the \code{descript} element of the \code{describe} object, prefixed by \code{"describe"}. Set \code{file=""} to send LaTeX code to standard output instead of a file. } \item{append}{ set to \code{TRUE} to have \code{latex} append text to an existing file named \code{file} } \item{size}{ LaTeX text size (\code{"small"}, the default, or \code{"normalsize"}, \code{"tiny"}, \code{"scriptsize"}, etc.) for the \code{describe} output in LaTeX. For html is the percent of the prevailing font size to use for the output. } \item{tabular}{ set to \code{FALSE} to use verbatim rather than tabular (or html table) environment for the summary statistics output. By default, tabular is used if the output is not too wide.} \item{greek}{By default, the \code{latex} and \code{html} methods will change names of greek letters that appear in variable labels to appropriate LaTeX symbols in math mode, or html symbols, unless \code{greek=FALSE}.} \item{spacing}{By default, the \code{latex} method for \code{describe} run on a matrix or data frame uses the \code{setspace} LaTeX package with a line spacing of 0.7 so as to no waste space. Specify \code{spacing=0} to suppress the use of the \code{setspace}'s \code{spacing} environment, or specify another positive value to use this environment with a different spacing.} \item{lspace}{extra vertical scape, in character size units (i.e., "ex" as appended to the space). When using certain font sizes, there is too much space left around LaTeX verbatim environments. This two-vector specifies space to remove (i.e., the values are negated in forming the \code{vspace} command) before (first element) and after (second element of \code{lspace}) verbatims} \item{scroll}{set to \code{TRUE} to create an html scrollable box for the html output} \item{rows, cols}{the number of rows or columns to allocate for the scrollable box} \item{vname}{unused argument in \code{latex.describe.single}} \item{which}{specifies whether to plot numeric continuous or binary/categorical variables, or both. When \code{"both"} a list with two elements is created. Each element is a \code{ggplot2} or \code{plotly} object. If there are no variables of a given type, a single \code{ggplot2} or \code{plotly} object is returned, ready to print.} \item{what}{character or numeric vector specifying which variables to plot; default is to plot all} \item{sort}{specifies how and whether variables are sorted in order of the proportion of positives when \code{which="categorical"}. Specify \code{sort="none"} to leave variables in the order they appear in the original data.} \item{n.unique}{the minimum number of distinct values a numeric variable must have before \code{plot.describe} uses it in a continuous variable plot} \item{bvspace}{the between-variable spacing for categorical variables. Defaults to 2, meaning twice the amount of vertical space as what is used for between-category spacing within a variable} \item{condense}{specifies whether to condense the output with regard to the 5 lowest and highest values (\code{"extremes"}) and the frequency table } \item{lang}{specifies the markup language} \item{verb}{set to 1 if a verbatim environment is already in effect for LaTeX} } \value{ a list containing elements \code{descript}, \code{counts}, \code{values}. The list is of class \code{describe}. If the input object was a matrix or a data frame, the list is a list of lists, one list for each variable analyzed. \code{latex} returns a standard \code{latex} object. For numeric variables having at least 20 distinct values, an additional component \code{intervalFreq}. This component is a list with two elements, \code{range} (containing two values) and \code{count}, a vector of 100 integer frequency counts. } \details{ If \code{options(na.detail.response=TRUE)} has been set and \code{na.action} is \code{"na.delete"} or \code{"na.keep"}, summary statistics on the response variable are printed separately for missing and non-missing values of each predictor. The default summary function returns the number of non-missing response values and the mean of the last column of the response values, with a \code{names} attribute of \code{c("N","Mean")}. When the response is a \code{Surv} object and the mean is used, this will result in the crude proportion of events being used to summarize the response. The actual summary function can be designated through \code{options(na.fun.response = "function name")}. If you are modifying LaTex \code{parskip} or certain other parameters, you may need to shrink the area around \code{tabular} and \code{verbatim} environments produced by \code{latex.describe}. You can do this using for example \code{\\usepackage{etoolbox}\\makeatletter\\preto{\\@verbatim}{\\topsep=-1.4pt \\partopsep=0pt}\\preto{\\@tabular}{\\parskip=2pt \\parsep=0pt}\\makeatother} in the LaTeX preamble. } \author{ Frank Harrell \cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{sas.get}}, \code{\link{quantile}}, \code{\link{GiniMd}}, \code{\link{table}}, \code{\link{summary}}, \code{\link{model.frame.default}}, \code{\link{naprint}}, \code{\link{lapply}}, \code{\link{tapply}}, \code{\link[survival]{Surv}}, \code{\link{na.delete}}, \code{\link{na.keep}}, \code{\link{na.detail.response}}, \code{\link{latex}} } \examples{ set.seed(1) describe(runif(200),dig=2) #single variable, continuous #get quantiles .05,.10,\dots dfr <- data.frame(x=rnorm(400),y=sample(c('male','female'),400,TRUE)) describe(dfr) \dontrun{ options(grType='plotly') d <- describe(mydata) p <- plot(d) # create plots for both types of variables p[[1]]; p[[2]] # or p$Categorical; p$Continuous plotly::subplot(p[[1]], p[[2]], nrows=2) # plot both in one plot(d, which='categorical') # categorical ones d <- sas.get(".","mydata",special.miss=TRUE,recode=TRUE) describe(d) #describe entire data frame attach(d, 1) describe(relig) #Has special missing values .D .F .M .R .T #attr(relig,"label") is "Religious preference" #relig : Religious preference Format:relig # n missing D F M R T distinct # 4038 263 45 33 7 2 1 8 # #0:none (251, 6\%), 1:Jewish (372, 9\%), 2:Catholic (1230, 30\%) #3:Jehovah's Witnes (25, 1\%), 4:Christ Scientist (7, 0\%) #5:Seventh Day Adv (17, 0\%), 6:Protestant (2025, 50\%), 7:other (111, 3\%) # Method for describing part of a data frame: describe(death.time ~ age*sex + rcs(blood.pressure)) describe(~ age+sex) describe(~ age+sex, weights=freqs) # weighted analysis fit <- lrm(y ~ age*sex + log(height)) describe(formula(fit)) describe(y ~ age*sex, na.action=na.delete) # report on number deleted for each variable options(na.detail.response=TRUE) # keep missings separately for each x, report on dist of y by x=NA describe(y ~ age*sex) options(na.fun.response="quantile") describe(y ~ age*sex) # same but use quantiles of y by x=NA d <- describe(my.data.frame) d$age # print description for just age d[c('age','sex')] # print description for two variables d[sort(names(d))] # print in alphabetic order by var. names d2 <- d[20:30] # keep variables 20-30 page(d2) # pop-up window for these variables # Test date/time formats and suppression of times when they don't vary library(chron) d <- data.frame(a=chron((1:20)+.1), b=chron((1:20)+(1:20)/100), d=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20, hour=rep(11,20),min=rep(17,20),sec=rep(11,20)), f=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20, hour=1:20,min=1:20,sec=1:20), g=ISOdate(year=2001:2020,month=rep(3,20),day=1:20)) describe(d) # Make a function to run describe, latex.describe, and use the kdvi # previewer in Linux to view the result and easily make a pdf file ldesc <- function(data) { options(xdvicmd='kdvi') d <- describe(data, desc=deparse(substitute(data))) dvi(latex(d, file='/tmp/z.tex'), nomargins=FALSE, width=8.5, height=11) } ldesc(d) } } \keyword{interface} \keyword{nonparametric} \keyword{category} \keyword{distribution} \keyword{robust} \keyword{models} \keyword{hplot} Hmisc/man/format.pval.Rd0000644000176200001440000000262612243661443014623 0ustar liggesusers\name{format.pval} \alias{format.pval} \title{Format P Values} \description{ \code{format.pval} is intended for formatting p-values. } \usage{ format.pval(x, pv=x, digits = max(1, .Options$digits - 2), eps = .Machine$double.eps, na.form = "NA", \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{pv}{a numeric vector.} \item{x}{argument for method compliance.} \item{digits}{how many significant digits are to be used.} \item{eps}{a numerical tolerance: see Details.} \item{na.form}{character representation of \code{NA}s.} \item{\dots}{ arguments passed to \code{\link{format}} in the \code{format.pval} function body. } } \details{ \code{format.pval} is mainly an auxiliary function for \code{\link{print.summary.lm}} etc., and does separate formatting for fixed, floating point and very small values; those less than \code{eps} are formatted as \dQuote{\samp{< [eps]}} (where \dQuote{\samp{[eps]}} stands for \code{format(eps, digits)}). } \value{ A character vector. } \note{This is the base \code{\link[base]{format.pval}} function with the ablitiy to pass the \code{nsmall} argument to \code{\link{format}} } \examples{ format.pval(c(runif(5), pi^-100, NA)) format.pval(c(0.1, 0.0001, 1e-27)) format.pval(c(0.1, 1e-27), nsmall=3) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{print} Hmisc/man/discrete.Rd0000644000176200001440000000356112243661443014173 0ustar liggesusers\name{discrete} \alias{as.discrete} \alias{as.discrete.default} \alias{discrete} \alias{[<-.discrete} \alias{[.discrete} \alias{[[.discrete} \alias{is.discrete} \alias{is.na<-.discrete} \alias{length<-.discrete} \title{ Discrete Vector tools } \description{ \code{discrete} creates a discrete vector which is distinct from a continuous vector, or a factor/ordered vector. The other function are tools for manipulating descrete vectors. } \usage{ as.discrete(x, ...) \method{as.discrete}{default}(x, ...) discrete(x, levels = sort(unique.default(x), na.last = TRUE), exclude = NA) \method{[}{discrete}(x, ...) <- value \method{[}{discrete}(x, ..., drop = FALSE) \method{[[}{discrete}(x, i) is.discrete(x) \method{is.na}{discrete}(x) <- value \method{length}{discrete}(x) <- value } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a vector } \item{drop}{ Should unused levels be dropped. } \item{exclude}{logical: should \code{NA} be excluded. } \item{i}{ indexing vector } \item{levels}{ charater: list of individual level values } \item{value}{ index of elements to set to \code{NA} } \item{\dots}{ arguments to be passed to other functions } } \details{ \code{as.discrete} converts a vector into a discrete vector. \code{discrete} creates a discrete vector from provided values. \code{is.discrete} tests to see if the vector is a discrete vector. } \value{ \code{as.discrete}, \code{discrete} returns a vector of \code{discrete} type. \code{is.discrete} returan logical \code{TRUE} if the vector is of class discrete other wise it returns \code{FALSE}. } \author{ Charles Dupont} \seealso{ \code{\link{[[}}, \code{\link{[}}, \code{\link{factor}} } \examples{ a <- discrete(1:25) a is.discrete(a) b <- as.discrete(2:4) b } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ manip } Hmisc/DESCRIPTION0000644000176200001440000000317714371140723013035 0ustar liggesusersPackage: Hmisc Version: 4.8-0 Date: 2023-02-08 Title: Harrell Miscellaneous Authors@R: c(person(given = "Frank E", family = "Harrell Jr", role = c("aut", "cre"), email = "fh@fharrell.com", comment = c(ORCID = "0000-0002-8271-5493")), person(given = "Charles", family = "Dupont", role = "ctb", email = "charles.dupont@vumc.org", comment = "contributed several functions and maintains latex functions")) Maintainer: Frank E Harrell Jr Depends: lattice, survival (>= 3.1-6), Formula, ggplot2 (>= 2.2) Imports: methods, latticeExtra, cluster, rpart, nnet, foreign, gtable, grid, gridExtra, data.table, htmlTable (>= 1.11.0), viridis, htmltools, base64enc, colorspace Suggests: acepack, chron, rms, mice, rstudioapi, tables, knitr, plotly (>= 4.5.6), rlang, plyr, VGAM Description: Contains many functions useful for data analysis, high-level graphics, utility operations, functions for computing sample size and power, simulation, importing and annotating datasets, imputing missing values, advanced table making, variable clustering, character string manipulation, conversion of R objects to LaTeX and html code, and recoding variables. License: GPL (>= 2) LazyLoad: Yes URL: https://hbiostat.org/R/Hmisc/ Encoding: UTF-8 RoxygenNote: 7.1.2 NeedsCompilation: yes Packaged: 2023-02-08 20:53:24 UTC; harrelfe Author: Frank E Harrell Jr [aut, cre] (), Charles Dupont [ctb] (contributed several functions and maintains latex functions) Repository: CRAN Date/Publication: 2023-02-09 09:50:11 UTC Hmisc/src/0000755000176200001440000000000014371005704012105 5ustar liggesusersHmisc/src/largrec.f0000644000176200001440000000635613222736413013707 0ustar liggesusers SUBROUTINE largrec(x, y, n, xlim, ylim, width, height, & numbins, itype, rx, ry) C ********************************************************* C * x(n) - array of x values of data points DOUBLE PRECISION x(*) C * y(n) - array of y values of data points DOUBLE PRECISION y(*) C * n - number of data points INTEGER n C * xlim(2) - contains left and right limits of x axis DOUBLE PRECISION xlim(2) C * ylim(2) - contains bottom and top limits of y axis DOUBLE PRECISION ylim(2) C * width - minimum allowable width of empty space DOUBLE PRECISION width C * height - minimum allowable height of empty space DOUBLE PRECISION height C * numbins - number of blocks to chunk axis ranges into INTEGER numbins C * itype - how to favor box size INTEGER itype C * rx(2) - left and right limits of found box DOUBLE PRECISION rx(2) C * ry(2) - top and bottom limits of found box DOUBLE PRECISION ry(2) C * xd - x distance between x limits DOUBLE PRECISION xd C * yd - y distance between y limits DOUBLE PRECISION yd C * xinc - amount to add to x axis search box bounds DOUBLE PRECISION xinc C * yinc - amount to add to y axis search box bounds DOUBLE PRECISION yinc C * xl - left search box bound DOUBLE PRECISION xl C * xr - right search box bound DOUBLE PRECISION xr C * yb - bottom search box bound DOUBLE PRECISION yb C * yt - top search box bound DOUBLE PRECISION yt C * i - itterator variable INTEGER i C * area - area of empty space DOUBLE PRECISION area C * w - width of empty space DOUBLE PRECISION w C * h - height of empty space DOUBLE PRECISION h C * ar - tempory area storage DOUBLE PRECISION ar C xd = xlim(2)-xlim(1) yd = ylim(2)-ylim(1) xinc = xd / numbins yinc = yd / numbins rx(1) = 1d30 rx(2) = 1d30 ry(1) = 1d30 ry(2) = 1d30 IF(width .GE. xd .OR. height .GE. yd) THEN RETURN ENDIF C w = 0d0 h = 0d0 area = 0d0 C xl=xlim(1) DO WHILE (xl .LE. xlim(2)-width) yb = ylim(1) DO WHILE (yb .LE. ylim(2)-height) xr = xl + width DO WHILE (xr .LE. xlim(2)) yt = yb + height DO WHILE (yt .LE. ylim(2)) DO i=1,n IF(x(i) .GE. xl .AND. x(i) .LE. xr .AND. & y(i) .GE. yb .AND. y(i) .LE. yt) GO TO 1 ENDDO ar = (yt-yb)*(xr-xl) if((itype.EQ.1 .AND. ar .GT. area) .OR. & (itype.EQ.2 .AND. yt-yb .GE. h .AND. & xr-xl .GE. w)) THEN area = ar w = xr - xl h = yt - yb rx(1) = xl rx(2) = xr ry(1) = yb ry(2) = yt ENDIF yt = yt + yinc ENDDO xr = xr + xinc ENDDO 1 CONTINUE yb = yb + yinc ENDDO xl = xl + xinc ENDDO RETURN END Hmisc/src/jacklins.f0000644000176200001440000000100613222743450014050 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine jacklins(x, w, n, k, res) integer n, k, l double precision x(n), w(n-1,k), res(n,k), sj do23000 l = 1, k do23002 j = 1, n sj = 0d0 do23004 i = 1, n if(i .lt. j)then sj = sj + w(i,l) * x(i) endif if(i .gt. j)then sj = sj + w(i-1,l) * x(i) endif 23004 continue 23005 continue res(j,l) = sj 23002 continue 23003 continue 23000 continue 23001 continue return end Hmisc/src/ranksort.c0000644000176200001440000000252414335517512014124 0ustar liggesusers#include "R.h" void sort2(int *np, double ra[], int rb[]) { int l,j,ir,i,n,rrb,*xrb; double rra,*xra; n = *np; xra=ra-1; xrb=rb-1; l=(n >> 1)+1; ir=n; for(;;) { if(l > 1) { rra=xra[--l]; rrb=xrb[l]; } else { rra=xra[ir]; rrb=xrb[ir]; xra[ir]=xra[1]; xrb[ir]=xrb[1]; if(--ir == 1) { xra[1]=rra; xrb[1]=rrb; return; } } i=l; j=l << 1; while (j <= ir) { if ( j < ir && xra[j] < xra[j+1]) ++j; if (rra < xra[j]) { xra[i]=xra[j]; xrb[i]=xrb[j]; j += (i=j); } else j=ir+1; } xra[i]=rra; xrb[i]=rrb; } } void crank(int *np, double w[]) { int n,j=1,ji,jt; double rank,*xw; n = *np; xw = w-1; while (j < n) { if(xw[j+1] != xw[j]) { xw[j]=j; ++j; } else { for (jt=j+1;jt<=n;jt++) if (xw[jt] != xw[j]) break; rank=0.5*(j+jt-1); for (ji=j;ji<=(jt-1);ji++) xw[ji]=rank; j=jt; } } if (j == n) xw[n]=n; } void F77_SUB(rank)(int *np, double x[], double w[], int ix[], double r[]) { int n, *xix, i; double *xx, *xr, *xw; n = *np; xx = x-1; xix = ix-1; xr = r-1; xw = w-1; for(i=1; i<=n; i++) { xix[i]=i; xw[i]=xx[i]; } sort2(np, w, ix); crank(np, w); for(i=1; i<=n; i++) xr[xix[i]] = xw[i]; } Hmisc/src/maxempr.f0000644000176200001440000000326112243661443013733 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine maxempr(ax, ay, x, y, n, w, h, z, area, rect) implicit double precision (a-h,o-z) integer n double precision ax(2), ay(2), x(n), y(n), z(3), rect(4), maxr, li maxr = z(1) * dabs(ay(2) - ay(1)) rect(1) = z(2) rect(2) = ay(1) rect(3) = z(3) rect(4) = ay(2) do23000 i=1,n tl = ax(1) tr = ax(2) if(i .lt. n)then do23004 j=(i+1),n if(x(j) .gt. tl .and. x(j) .lt. tr)then area = (tr - tl) * (y(j) - y(i)) if(area .gt. maxr .and. ((tr - tl) .gt. w) .and. ((y(j) - y(i)) .g *t. h))then maxr = area rect(1) = tl rect(2) = y(i) rect(3) = tr rect(4) = y(j) endif if(x(j) .gt. x(i))then tr = x(j) else tl = x(j) endif endif 23004 continue 23005 continue endif area = (tr - tl) * (ay(2) - y(i)) if(area .gt. maxr .and. ((tr - tl) .gt. w) .and. ((ay(2) - y(i)) . *gt. h))then maxr = area rect(1) = tl rect(2) = y(i) rect(3) = tr rect(4) = ay(2) endif ri = ax(2) li = ax(1) do23014 k=1,n if(y(k) .lt. y(i) .and. x(k) .gt. x(i))then ri = dmin1(ri, x(k)) endif if(y(k) .lt. y(i) .and. x(k) .lt. x(i))then li = dmax1(li, x(k)) endif 23014 continue 23015 continue area = (ri - li) * (ay(2) - y(i)) if(area .gt. maxr .and. ((ri - li) .gt. w) .and. ((y(i) - ay(1)) . *gt. h))then maxr = area rect(1) = li rect(2) = ay(1) rect(3) = ri rect(4) = y(i) endif 23000 continue 23001 continue area = maxr return end Hmisc/src/cidxcp.f0000644000176200001440000000460112243661443013533 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine cidxcp(x1,x2,y,e,n,method,outx, nrel,nuncer,c1,c2,gamma *1,gamma2,gamma,sd,c12,c21) implicit double precision (a-h,o-z) double precision x1(n),x2(n),y(n) logical e(n),outx integer n,method,i,j double precision nrel,nuncer,nconc1,nconc2,c1,c2,gamma1,gamma2, su *mr,sumr2,sumw,sumw2,sumrw, wi,ri,sumc,c12,c21,gamma,sd double precision dx,dx2,dy nconc1=0d0 nconc2=0d0 nrel=0d0 nuncer=0d0 sumr=0d0 sumr2=0d0 sumw=0d0 sumw2=0d0 sumrw=0d0 sumc=0d0 do23000 i=1,n wi=0d0 ri=0d0 do23002 j=1,n dx=x1(i)-x1(j) dx2=x2(i)-x2(j) if((i.ne.j) .and. (.not.outx .or. dx.ne.0. .or. dx2.ne.0.))then dy=y(i)-y(j) if((e(i).and.(dy.lt.0.)).or.(e(i).and..not.e(j).and.(dy.eq.0.)))th *en nrel=nrel+1d0 nconc1=nconc1+(z(dx.lt.0.)+.5d0*z(dx.eq.0.)) nconc2=nconc2+(z(dx2.lt.0.)+.5d0*z(dx2.eq.0.)) ri=ri+1d0 if(method.eq.1)then wi=wi+(z(dx.lt.dx2)-z(dx.gt.dx2)) sumc=sumc+z(dx.lt.dx2) else wi=wi+(z(dx.lt.0..and.dx2.ge.0.)-z(dx.gt.0..and.dx2.le.0.)) sumc=sumc+z(dx.lt.0..and.dx2.ge.0.) endif else if((e(j).and.(dy.gt.0.)).or.(e(j).and..not.e(i).and.(dy.eq.0.)))th *en nrel=nrel+1d0 nconc1=nconc1+(z(dx.gt.0.)+.5d0*z(dx.eq.0.)) nconc2=nconc2+(z(dx2.gt.0.)+.5d0*z(dx2.eq.0.)) ri=ri+1d0 if(method.eq.1)then wi=wi+(z(dx.gt.dx2)-z(dx.lt.dx2)) sumc=sumc+z(dx.gt.dx2) else wi=wi+(z(dx.gt.0..and.dx2.le.0.)-z(dx.lt.0..and.dx2.ge.0.)) sumc=sumc+z(dx.gt.0..and.dx2.le.0.) endif else if(.not.(e(i).and.e(j)))then nuncer=nuncer+1d0 endif endif endif endif 23002 continue 23003 continue sumr=sumr+ri sumr2=sumr2+ri*ri sumw=sumw+wi sumw2=sumw2+wi*wi sumrw=sumrw+ri*wi 23000 continue 23001 continue c1=nconc1/nrel gamma1=2d0*(c1-.5d0) c2=nconc2/nrel gamma2=2d0*(c2-.5d0) gamma=sumw/sumr sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 sd=2d0*dsqrt(sd)/sumr/sumr c12=sumc/sumr c21=sumc/sumr-gamma return end function z(a) double precision z logical a if(a)then z=1d0 else z=0d0 endif return end Hmisc/src/cidxcn.f0000644000176200001440000000450312243661443013532 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 C------------------------------------------------------------------------ C Compute c-index (c) and Brown-Hollander-Krowar-Goodman-Kruskal-Somer C rank correlation (gamma) between X and Y with censoring indicator E C Also returns number of relevant, concordant, and uncertain pairs C (nrel, nconc, nuncert) and estimated s.d. of gamma (sd) using C Quade formula (see SAS PROC MATPAR). Pairs with tied x are C excluded if outx=.TRUE. C C F. Harrell 27Nov90 C Modification of SAS Procedure KGKC (1980) C------------------------------------------------------------------------- subroutine cidxcn(x,y,e,n,nrel,nconc,nuncert,c,gamma,sd,outx) implicit double precision (a-h,o-z) double precision x(n),y(n),dx,dy logical e(n),outx double precision nrel,nuncert,nconc nconc=0d0 nrel=0d0 nuncert=0d0 sumr=0d0 sumr2=0d0 sumw=0d0 sumw2=0d0 sumrw=0d0 do23000 i=1,n wi=0d0 ri=0d0 do23002 j=1,n if(j.ne.i)then dx=x(i)-x(j) dy=y(i)-y(j) if(dx.ne.0. .or. .not.outx)then if((e(i).and.dy.lt.0.).or.(e(i).and..not.e(j).and.dy.eq.0.))then if(dx.lt.0.)then nconc=nconc+1d0 wi=wi+1d0 else if(dx.eq.0.)then nconc=nconc+.5d0 else wi=wi-1d0 endif endif nrel=nrel+1d0 ri=ri+1d0 else if((e(j).and.dy.gt.0.).or.(e(j).and..not.e(i).and.dy.eq.0.))then if(dx.gt.0.)then nconc=nconc+1d0 wi=wi+1d0 else if(dx.eq.0.)then nconc=nconc+.5d0 else wi=wi-1d0 endif endif nrel=nrel+1d0 ri=ri+1d0 else if(.not.(e(i).and.e(j)))then nuncert=nuncert+1d0 endif endif endif endif endif 23002 continue 23003 continue sumr=sumr+ri sumr2=sumr2+ri*ri sumw=sumw+wi sumw2=sumw2+wi*wi sumrw=sumrw+ri*wi 23000 continue 23001 continue c=nconc/nrel gamma=2.*(c-.5) Ccall dblepr('sumr',4,sumr,1) Ccall dblepr('sumw',4,sumw,1) Ccall dblepr('sumr2',5,sumr2,1) Ccall dblepr('sumw2',5,sumw2,1) Ccall dblepr('sumrw',5,sumrw,1) sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 sd=2.*dsqrt(sd)/sumr/sumr return end Hmisc/src/nstr.c0000644000176200001440000000277212243661443013253 0ustar liggesusers#include "Hmisc.h" static Hmisc_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; SEXP do_nstr(SEXP s, SEXP n) { SEXP ans; /* the returned character string */ int i, j; /* the length of the string and number of reps */ int s_counter = 0, n_counter = 0; int longest, s_length, n_length; S_EVALUATOR n_length = length(n); s_length = length(s); longest = n_length < s_length ? s_length : n_length; if(n_length == 1 && INTEGER(n)[0] == 1) return s; PROTECT(ans = allocVector(STRSXP, longest)); for(i=0; i < longest; i++) { int n_reps = INTEGER(n)[n_counter]; if(n_reps < 1) { SET_STRING_ELT(ans, i, mkChar("")); } else if(n_reps == 1) { SET_STRING_ELT(ans, i, duplicate(STRING_ELT(s, s_counter))); } else { char *cbuf, *buf; const char *seg; size_t seg_len; seg = CHAR(STRING_ELT(s, s_counter)); seg_len = strlen(seg); cbuf = buf = Hmisc_AllocStringBuffer((n_reps * seg_len + 1) * sizeof(char), &cbuff); for(j=0; j < n_reps; ++j) { strcpy(buf, seg); buf += seg_len; } *buf = '\0'; SET_STRING_ELT(ans, i, mkChar(cbuf)); } n_counter = (++n_counter < n_length) ? n_counter : 0; s_counter = (++s_counter < s_length) ? s_counter : 0; } Hmisc_FreeStringBuffer(&cbuff); UNPROTECT(1); return ans; } Hmisc/src/ratfor/0000755000176200001440000000000013555351222013404 5ustar liggesusersHmisc/src/ratfor/rcorr.r0000644000176200001440000000375613222745117014732 0ustar liggesusersSUBROUTINE rcorr(xx, n, p, itype, dmat, npair, x, y, rx, ry, work, iwork) INTEGER p, npair(p,p) DOUBLE PRECISION xx(n,p), dmat(p,p), x(n), y(n), rx(n), ry(n), work(n) INTEGER iwork(n) DOUBLE PRECISION sumx, sumx2, sumy, sumy2, sumxy, z, a, b, xk, yk, d # The following is just to prevent a gfortran possible uninitialized var warning sumx = 0d0; sumy = 0d0; sumx2 = 0d0; sumy2 = 0d0; sumxy = 0d0 DO i = 1, p { np = 0 DO k = 1, n { if(xx(k, i) < 1d49) np = np + 1 } npair(i, i) = np DO j = (i + 1), p { m = 0 if(itype == 1) { sumx = 0d0; sumy = 0d0; sumx2 = 0d0; sumy2 = 0d0; sumxy = 0d0 } DO k = 1, n { xk = xx(k, i) yk = xx(k, j) if(xk < 1d49 & yk < 1d49) { m = m + 1 if(itype == 1) { a = xk; b = yk sumx = sumx + a sumx2 = sumx2 + a * a sumy = sumy + b sumy2 = sumy2 + b * b sumxy = sumxy + a * b } else { x(m) = xk y(m) = yk } } } npair(i, j) = m if(m > 1) { if(itype == 1) { z = m d = (sumxy - sumx * sumy / z) / dsqrt((sumx2 - sumx * sumx / z) * (sumy2 - sumy * sumy / z)) } else CALL docorr(x, y, m, d, rx, ry, work, iwork) dmat(i, j) = d } else dmat(i, j) = 1d50 } } DO i = 1, p { dmat(i, i) = 1d0 DO j = (i + 1), p { dmat(j, i) = dmat(i, j) npair(j, i) = npair(i, j) } } RETURN END SUBROUTINE docorr(x, y, n, d, rx, ry, work, iwork) DOUBLE PRECISION x(1), y(1), rx(n), ry(n), work(1) INTEGER iwork(1) DOUBLE PRECISION sumx, sumx2, sumy, sumy2, sumxy, a, b, z, d CALL rank(n, x, work, iwork, rx) CALL rank(n, y, work, iwork, ry) sumx = 0d0; sumx2 = 0d0; sumy = 0d0; sumy2 = 0d0; sumxy = 0d0 DO i = 1, n { a = rx(i) b = ry(i) sumx = sumx + a sumx2 = sumx2 + a * a sumy = sumy + b sumy2 = sumy2 + b * b sumxy = sumxy + a * b } z = n d = (sumxy - sumx * sumy / z) / dsqrt((sumx2 - sumx * sumx / z) * (sumy2 - sumy * sumy / z)) RETURN END Hmisc/src/ratfor/hoeffd.r0000644000176200001440000000572513222735762015041 0ustar liggesusers# ratfor -o ../hoeffd.f hoeffd.r # SUBROUTINE hoeffd(xx, n, p, dmat, aadmat, madmat, npair, x, y, rx, ry, rj) IMPLICIT DOUBLE PRECISION (a-h,o-z) INTEGER p, npair(p,p) DOUBLE PRECISION xx(n,p), dmat(p,p), aadmat(p,p), madmat(p,p), x(n), y(n), rx(n), ry(n), rj(n), maxad DO i=1, p { np=0 DO k=1, n { if(xx(k,i) < 1d49) np = np + 1 } npair(i,i) = np DO j=(i+1),p { m = 0 DO k=1,n { xk = xx(k,i) yk = xx(k,j) if(xk < 1d49 & yk < 1d49) { m = m + 1 x(m) = xk y(m) = yk } } npair(i,j) = m if(m > 4) { CALL hoeff(x, y, m, d, aad, maxad, rx, ry, rj) dmat(i,j) = d aadmat(i,j) = aad madmat(i,j) = maxad } else dmat(i,j) = 1d50 } } DO i=1,p { dmat(i,i) = 1d0/30d0 DO j=(i+1),p { dmat(j,i) = dmat(i,j) npair(j,i) = npair(i,j) aadmat(j,i) = aadmat(i,j) madmat(j,i) = madmat(i,j) } } RETURN END SUBROUTINE hoeff(x, y, n, d, aad, maxad, rx, ry, rj) IMPLICIT DOUBLE PRECISION (a-h,o-z) DOUBLE PRECISION x(n), y(n), rx(n), ry(n), rj(n), maxad # INTEGER iwork(1) # CALL rank(n, x, work, iwork, rx) # CALL rank(n, y, work, iwork, ry) CALL jrank(x, y, n, rx, ry, rj) q = 0d0 r = 0d0 s = 0d0 aad = 0d0 maxad = 0d0 z = n DO i=1,n { rxi = rx(i) ryi = ry(i) rji = rj(i) ad = dabs((rji/z) - (rxi/z)*(ryi/z)) aad = aad + ad maxad = dmax1(maxad, ad) q = q + (rxi-1d0)*(rxi-2d0)*(ryi-1d0)*(ryi-2d0) r = r + (rxi-2d0)*(ryi-2d0)*(rji-1d0) s = s + (rji-1d0)*(rji-2d0) } aad = aad / z d = (q-2d0*(z-2d0)*r+(z-2d0)*(z-3d0)*s)/z/(z-1d0)/(z-2d0)/(z-3d0)/(z-4d0) RETURN END # Use C version of this which is much faster (since uses a sort) # SUBROUTINE rank(x, n, r) # simple rank with midranks for ties # REAL*4 x(1), r(1) # DO i=1,n { # xi=x(i) # ir=2 # will be 2*rank(x(i)) # DO j=1,n { # if(i.ne.j) { # if(x(j) j) sj = sj + w(i-1,l) * x(i) } res(j,l) = sj } } return end Hmisc/src/ratfor/cidxcp.r0000644000176200001440000000353712243661443015053 0ustar liggesuserssubroutine cidxcp(x1,x2,y,e,n,method,outx, nrel,nuncer,c1,c2,gamma1,gamma2,gamma,sd,c12,c21) implicit DOUBLE PRECISION (a-h,o-z) DOUBLE PRECISION x1(n),x2(n),y(n) logical e(n),outx integer n,method,i,j DOUBLE PRECISION nrel,nuncer,nconc1,nconc2,c1,c2,gamma1,gamma2, sumr,sumr2,sumw,sumw2,sumrw, wi,ri,sumc,c12,c21,gamma,sd DOUBLE PRECISION dx,dx2,dy nconc1=0d0;nconc2=0d0;nrel=0d0;nuncer=0d0;sumr=0d0;sumr2=0d0;sumw=0d0; sumw2=0d0;sumrw=0d0;sumc=0d0; do i=1,n { wi=0d0;ri=0d0; do j=1,n { dx=x1(i)-x1(j);dx2=x2(i)-x2(j); if((i!=j) & (!outx | dx!=0. | dx2!=0.)) { dy=y(i)-y(j); if ((e(i)&(dy<0.))|(e(i)&^e(j)&(dy==0.))) { nrel=nrel+1d0; nconc1=nconc1+(z(dx<0.)+.5D0*z(dx==0.)); nconc2=nconc2+(z(dx2<0.)+.5D0*z(dx2==0.)); ri=ri+1d0; if (method==1) { wi=wi+(z(dxdx2)); sumc=sumc+z(dx=0.)-z(dx>0.&dx2<=0.)); sumc=sumc+z(dx<0.&dx2>=0.); } } else if ((e(j)&(dy>0.))|(e(j)&^e(i)&(dy==0.))) { nrel=nrel+1d0; nconc1=nconc1+(z(dx>0.)+.5D0*z(dx==0.)); nconc2=nconc2+(z(dx2>0.)+.5D0*z(dx2==0.)); ri=ri+1d0; if (method==1) { wi=wi+(z(dx>dx2)-z(dxdx2); } else { wi=wi+(z(dx>0.&dx2<=0.)-z(dx<0.&dx2>=0.)); sumc=sumc+z(dx>0.&dx2<=0.); } } else if (^(e(i)&e(j))) nuncer=nuncer+1d0; } } sumr=sumr+ri; sumr2=sumr2+ri*ri sumw=sumw+wi; sumw2=sumw2+wi*wi; sumrw=sumrw+ri*wi } c1=nconc1/nrel; gamma1=2D0*(c1-.5D0); c2=nconc2/nrel; gamma2=2D0*(c2-.5D0); gamma=sumw/sumr sd=sumr2*sumw**2-2D0*sumr*sumw*sumrw+sumw2*sumr**2; sd=2D0*dsqrt(sd)/sumr/sumr; c12=sumc/sumr; c21=sumc/sumr-gamma return end function z(a) DOUBLE PRECISION z logical a if(a)z=1d0 else z=0d0 return end Hmisc/src/ratfor/cidxcn.r0000644000176200001440000000535012243661443015044 0ustar liggesusers#------------------------------------------------------------------------ # Compute c-index (c) and Brown-Hollander-Krowar-Goodman-Kruskal-Somer # rank correlation (gamma) between X and Y with censoring indicator E # Also returns number of relevant, concordant, and uncertain pairs # (nrel, nconc, nuncert) and estimated s.d. of gamma (sd) using # Quade formula (see SAS PROC MATPAR). Pairs with tied x are # excluded if outx=.TRUE. # # F. Harrell 27Nov90 # Modification of SAS Procedure KGKC (1980) #------------------------------------------------------------------------- SUBROUTINE cidxcn(x,y,e,n,nrel,nconc,nuncert,c,gamma,sd,outx) IMPLICIT DOUBLE PRECISION (a-h,o-z) DOUBLE PRECISION x(n),y(n),dx,dy LOGICAL e(n),outx DOUBLE PRECISION nrel,nuncert,nconc nconc=0d0 nrel=0d0 nuncert=0d0 sumr=0d0 sumr2=0d0 sumw=0d0 sumw2=0d0 sumrw=0d0 do i=1,n { wi=0d0 ri=0d0 do j=1,n if(j^=i) { dx=x(i)-x(j) dy=y(i)-y(j) if(dx!=0. | !outx) { if((e(i)&dy<0.)|(e(i)&!e(j)&dy==0.)) { if(dx<0.) { nconc=nconc+1d0 wi=wi+1d0 } else if(dx==0.)nconc=nconc+.5d0 else wi=wi-1d0 nrel=nrel+1d0 ri=ri+1d0 } else if((e(j)&dy>0.)|(e(j)&!e(i)&dy==0.)) { if(dx>0.) { nconc=nconc+1d0 wi=wi+1d0 } else if(dx==0.) nconc=nconc+.5d0 else wi=wi-1d0 nrel=nrel+1d0 ri=ri+1d0 } else if(!(e(i)&e(j)))nuncert=nuncert+1d0 } } sumr=sumr+ri sumr2=sumr2+ri*ri sumw=sumw+wi sumw2=sumw2+wi*wi sumrw=sumrw+ri*wi } c=nconc/nrel gamma=2.*(c-.5) #call dblepr('sumr',4,sumr,1) #call dblepr('sumw',4,sumw,1) #call dblepr('sumr2',5,sumr2,1) #call dblepr('sumw2',5,sumw2,1) #call dblepr('sumrw',5,sumrw,1) sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 sd=2.*dsqrt(sd)/sumr/sumr return end Hmisc/src/ratfor/wclosest.r0000644000176200001440000000146213222736004015431 0ustar liggesusersSUBROUTINE wclosest(w, x, lw, lx, j) IMPLICIT DOUBLE PRECISION (a-h,o-z) INTEGER lw, lx, j(lw) DOUBLE PRECISION w(lw), x(lx) do i=1,lw { wi=w(i) dmin=1d40 m=0 do k=1,lx { d = dabs(x(k) - wi) if(d < dmin) { dmin = d m = k } } j(i) = m } return end SUBROUTINE wclosepw(w, x, r, f, lw, lx, xd, j) IMPLICIT DOUBLE PRECISION (a-h,o-z) DOUBLE PRECISION w(lw),x(lx),r(lw),xd(lx) INTEGER lw, lx, j(lw) do i=1, lw { wi = w(i) dmean = 0d0 do k=1, lx { xd(k) = dabs(x(k) - wi) dmean = dmean + xd(k) } dmean = f * dmean / lx sump = 0d0 do k=1, lx { z = min(xd(k)/dmean, 1d0) xd(k) = (1d0 - z**3)**3 sump = sump + xd(k) } prob = 0d0 ri = r(i) m = 1 do k=1, lx { prob = prob + xd(k) / sump if(ri > prob) m = m + 1 } j(i) = m } return end Hmisc/src/ratfor/maxempr.r0000644000176200001440000000366212243661443015251 0ustar liggesusers# Converted from R code provided by Hans Werner Borchers # ax = x-limits for region of interest # ay = y-limits " " # x, y = coordinates of points to avoid # Assume x, y are sorted in y-order, e.g # o = order(y); x <- x[o]; y <- y[o] # n = length(x) = length(y) # z = c(D[m], d[m], d[m+1]), d=sort(c(ax,x)), D=diff(d), m=which.max(D) # Output: area, rect[4] # To convert to Fortran: # sudo apt-get install ratfor # ratfor -o ../maxempr.f maxempr.r SUBROUTINE maxempr(ax, ay, x, y, n, w, h, z, area, rect) IMPLICIT DOUBLE PRECISION (a-h,o-z) INTEGER n DOUBLE PRECISION ax(2), ay(2), x(n), y(n), z(3), rect(4), maxr, li # check vertical slices maxr = z(1) * dabs(ay(2) - ay(1)) rect(1) = z(2) rect(2) = ay(1) rect(3) = z(3) rect(4) = ay(2) do i=1,n { tl = ax(1); tr = ax(2) if (i < n) { do j=(i+1),n { if (x(j) > tl & x(j) < tr) { ## check horizontal slices (j == i+1) ## and (all) rectangles above (x(i), y(i)) area = (tr - tl) * (y(j) - y(i)) if (area > maxr & ((tr - tl) > w) & ((y(j) - y(i)) > h)) { maxr = area rect(1) = tl rect(2) = y(i) rect(3) = tr rect(4) = y(j) } if (x(j) > x(i)) tr = x(j) else tl = x(j) } } } ## check open rectangles above (x(i), y(i)) area = (tr - tl) * (ay(2) - y(i)) if (area > maxr & ((tr - tl) > w) & ((ay(2) - y(i)) > h)) { maxr = area rect(1) = tl rect(2) = y(i) rect(3) = tr rect(4) = ay(2) } ## check open rectangles below (x(i), y(i)) ri = ax(2); li = ax(1) do k=1,n { if(y(k) < y(i) & x(k) > x(i)) ri = dmin1(ri, x(k)) if(y(k) < y(i) & x(k) < x(i)) li = dmax1(li, x(k)) } area = (ri - li) * (ay(2) - y(i)) if (area > maxr & ((ri - li) > w) & ((y(i) - ay(1)) > h)) { maxr = area rect(1) = li rect(2) = ay(1) rect(3) = ri rect(4) = y(i) } } area = maxr return end Hmisc/src/init.c0000644000176200001440000000450313222744540013220 0ustar liggesusers#include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP do_mchoice_match(SEXP, SEXP, SEXP); extern SEXP do_nstr(SEXP, SEXP); /* .Fortran calls */ extern void F77_NAME(cidxcn)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(cidxcp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hoeffd)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(jacklins)(void *, void *, void *, void *, void *); extern void F77_NAME(largrec)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(maxempr)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(rcorr)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); /* extern void F77_NAME(wcidxy)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); */ extern void F77_NAME(wclosepw)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(wclosest)(void *, void *, void *, void *, void *); static const R_CallMethodDef CallEntries[] = { {"do_mchoice_match", (DL_FUNC) &do_mchoice_match, 3}, {"do_nstr", (DL_FUNC) &do_nstr, 2}, {NULL, NULL, 0} }; static const R_FortranMethodDef FortranEntries[] = { {"cidxcn", (DL_FUNC) &F77_NAME(cidxcn), 11}, {"cidxcp", (DL_FUNC) &F77_NAME(cidxcp), 17}, {"hoeffd", (DL_FUNC) &F77_NAME(hoeffd), 12}, {"jacklins", (DL_FUNC) &F77_NAME(jacklins), 5}, {"largrec", (DL_FUNC) &F77_NAME(largrec), 11}, {"maxempr", (DL_FUNC) &F77_NAME(maxempr), 10}, {"rcorr", (DL_FUNC) &F77_NAME(rcorr), 12}, /* {"wcidxy", (DL_FUNC) &F77_NAME(wcidxy), 11}, */ {"wclosepw", (DL_FUNC) &F77_NAME(wclosepw), 8}, {"wclosest", (DL_FUNC) &F77_NAME(wclosest), 5}, {NULL, NULL, 0} }; void R_init_Hmisc(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, FortranEntries, NULL); R_useDynamicSymbols(dll, FALSE); } Hmisc/src/rcorr.f0000644000176200001440000000474213222745131013411 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine rcorr(xx, n, p, itype, dmat, npair, x, y, rx, ry, work, * iwork) integer p, npair(p,p) double precision xx(n,p), dmat(p,p), x(n), y(n), rx(n), ry(n), wor *k(n) integer iwork(n) double precision sumx, sumx2, sumy, sumy2, sumxy, z, a, b, xk, yk, * d sumx = 0d0 sumy = 0d0 sumx2 = 0d0 sumy2 = 0d0 sumxy = 0d0 do23000 i = 1, p np = 0 do23002 k = 1, n if(xx(k, i) .lt. 1d49)then np = np + 1 endif 23002 continue 23003 continue npair(i, i) = np do23006 j = (i + 1), p m = 0 if(itype .eq. 1)then sumx = 0d0 sumy = 0d0 sumx2 = 0d0 sumy2 = 0d0 sumxy = 0d0 endif do23010 k = 1, n xk = xx(k, i) yk = xx(k, j) if(xk .lt. 1d49 .and. yk .lt. 1d49)then m = m + 1 if(itype .eq. 1)then a = xk b = yk sumx = sumx + a sumx2 = sumx2 + a * a sumy = sumy + b sumy2 = sumy2 + b * b sumxy = sumxy + a * b else x(m) = xk y(m) = yk endif endif 23010 continue 23011 continue npair(i, j) = m if(m .gt. 1)then if(itype .eq. 1)then z = m d = (sumxy - sumx * sumy / z) / dsqrt((sumx2 - sumx * sumx / z) * *(sumy2 - sumy * sumy / z)) else call docorr(x, y, m, d, rx, ry, work, iwork) endif dmat(i, j) = d else dmat(i, j) = 1d50 endif 23006 continue 23007 continue 23000 continue 23001 continue do23020 i = 1, p dmat(i, i) = 1d0 do23022 j = (i + 1), p dmat(j, i) = dmat(i, j) npair(j, i) = npair(i, j) 23022 continue 23023 continue 23020 continue 23021 continue return end subroutine docorr(x, y, n, d, rx, ry, work, iwork) double precision x(1), y(1), rx(n), ry(n), work(1) integer iwork(1) double precision sumx, sumx2, sumy, sumy2, sumxy, a, b, z, d call rank(n, x, work, iwork, rx) call rank(n, y, work, iwork, ry) sumx = 0d0 sumx2 = 0d0 sumy = 0d0 sumy2 = 0d0 sumxy = 0d0 do23024 i = 1, n a = rx(i) b = ry(i) sumx = sumx + a sumx2 = sumx2 + a * a sumy = sumy + b sumy2 = sumy2 + b * b sumxy = sumxy + a * b 23024 continue 23025 continue z = n d = (sumxy - sumx * sumy / z) / dsqrt((sumx2 - sumx * sumx / z) * *(sumy2 - sumy * sumy / z)) return end Hmisc/src/hoeffd.f0000644000176200001440000000533213222736103013510 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine hoeffd(xx, n, p, dmat, aadmat, madmat, npair, x, y, rx, * ry, rj) implicit double precision (a-h,o-z) integer p, npair(p,p) double precision xx(n,p), dmat(p,p), aadmat(p,p), madmat(p,p), x(n *), y(n), rx(n), ry(n), rj(n), maxad do23000 i=1, p np=0 do23002 k=1, n if(xx(k,i) .lt. 1d49)then np = np + 1 endif 23002 continue 23003 continue npair(i,i) = np do23006 j=(i+1),p m = 0 do23008 k=1,n xk = xx(k,i) yk = xx(k,j) if(xk .lt. 1d49 .and. yk .lt. 1d49)then m = m + 1 x(m) = xk y(m) = yk endif 23008 continue 23009 continue npair(i,j) = m if(m .gt. 4)then call hoeff(x, y, m, d, aad, maxad, rx, ry, rj) dmat(i,j) = d aadmat(i,j) = aad madmat(i,j) = maxad else dmat(i,j) = 1d50 endif 23006 continue 23007 continue 23000 continue 23001 continue do23014 i=1,p dmat(i,i) = 1d0/30d0 do23016 j=(i+1),p dmat(j,i) = dmat(i,j) npair(j,i) = npair(i,j) aadmat(j,i) = aadmat(i,j) madmat(j,i) = madmat(i,j) 23016 continue 23017 continue 23014 continue 23015 continue return end subroutine hoeff(x, y, n, d, aad, maxad, rx, ry, rj) implicit double precision (a-h,o-z) double precision x(n), y(n), rx(n), ry(n), rj(n), maxad call jrank(x, y, n, rx, ry, rj) q = 0d0 r = 0d0 s = 0d0 aad = 0d0 maxad = 0d0 z = n do23018 i=1,n rxi = rx(i) ryi = ry(i) rji = rj(i) ad = dabs((rji/z) - (rxi/z)*(ryi/z)) aad = aad + ad maxad = dmax1(maxad, ad) q = q + (rxi-1d0)*(rxi-2d0)*(ryi-1d0)*(ryi-2d0) r = r + (rxi-2d0)*(ryi-2d0)*(rji-1d0) s = s + (rji-1d0)*(rji-2d0) 23018 continue 23019 continue aad = aad / z d = (q-2d0*(z-2d0)*r+(z-2d0)*(z-3d0)*s)/z/(z-1d0)/(z-2d0)/(z-3d0)/ *(z-4d0) return end subroutine jrank(x, y, n, rx, ry, r) integer n double precision x(n), y(n), rx(n), ry(n), r(n), cx, cy, ri, rix, *riy, xi, yi do23020 i=1,n xi = x(i) yi = y(i) ri = 1d0 rix = 1d0 riy = 1d0 do23022 j=1,n if(i .ne. j)then cx = 0d0 if(x(j) .lt. xi)then cx = 1d0 endif if(x(j) .eq. xi)then cx = .5d0 endif cy = 0d0 if(y(j) .lt. yi)then cy = 1d0 endif if(y(j) .eq. yi)then cy = .5d0 endif rix = rix + cx riy = riy + cy ri = ri + cx*cy endif 23022 continue 23023 continue rx(i) = rix ry(i) = riy r(i) = ri 23020 continue 23021 continue return end Hmisc/src/Hmisc.c0000644000176200001440000000170214126626200013312 0ustar liggesusers#include "Hmisc.h" char *Hmisc_AllocStringBuffer(size_t blen, Hmisc_StringBuffer *buf) { size_t blen1, bsize = buf->defaultSize; S_EVALUATOR if(blen * sizeof(char) < buf->bufsize) return buf->data; blen1 = blen = (blen + 1) * sizeof(char); blen = (blen / bsize) * bsize; if(blen < blen1) blen += bsize; if(buf->data == NULL) { buf->data = (char *) malloc(blen); buf->data[0] = '\0'; } else buf->data = (char *) realloc(buf->data, blen); buf->bufsize = blen; if(!buf->data) { buf->bufsize = 0; /* don't translate internal error message */ Rf_error("could not allocate memory (%u Mb) in C function 'Hmisc_AllocStringBuffer'", (unsigned int) blen/1024/1024); } return buf->data; } void Hmisc_FreeStringBuffer(Hmisc_StringBuffer *buf) { if (buf->data != NULL) { free(buf->data); buf->bufsize = 0; buf->data = NULL; } } Hmisc/src/Hmisc.h0000644000176200001440000000145414126626200013323 0ustar liggesusers#ifndef _HMISC_H_ #define _HMISC_H_ #include #include #include #include #include "R_ext/Error.h" #ifdef _SPLUS_ # define STRING_ELT(x,i) (CHARACTER_POINTER(x)[i]) # define TO_CHAR(x) (x) # define translateChar(x) (x) # define IS_NA_LGL(x) (is_na(&x, LGL)) # define SET_NA_LGL(x) (na_set(&x, LGL)) typedef s_object *SEXP ; typedef char *STR_ELT; #else # define TO_CHAR(x) (CHAR(x)) # define STR_ELT SEXP # define IS_NA_LGL(x) (x == NA_LOGICAL) # define SET_NA_LGL(x) (x = NA_LOGICAL) #endif #define MAXELTSIZE 8192 typedef struct { char *data; size_t bufsize; size_t defaultSize; } Hmisc_StringBuffer; char *Hmisc_AllocStringBuffer(size_t blen, Hmisc_StringBuffer *buf); void Hmisc_FreeStringBuffer(Hmisc_StringBuffer *buf); #endif Hmisc/src/string_box.c0000644000176200001440000000246112243661443014436 0ustar liggesusers#include #include SEXP string_box(SEXP string) { int i,j; int num_string = LENGTH(string); SEXP ans; SEXP names; SEXP height; SEXP width; PROTECT(ans = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans, 0, height = allocVector(INTSXP, num_string)); SET_VECTOR_ELT(ans, 1, width = allocVector(INTSXP, num_string)); setAttrib(ans, R_NamesSymbol, names = allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, mkChar("rows")); SET_STRING_ELT(names, 1, mkChar("columns")); for(i=0; i < num_string; i++) { int str_width = 0; int str_subwidth = 0; int str_height= 0; const char *substring = CHAR(STRING_ELT(string, i)); j = 0; while(substring[j] != '\0') { if(substring[j] == '\n') { if(str_subwidth > str_width) str_width = str_subwidth; str_subwidth = 0; str_height++; } else str_subwidth++; j++; } if(j > 0) str_height++; if(str_subwidth > str_width) INTEGER(width)[i] = str_subwidth; else INTEGER(width)[i] = str_width; INTEGER(height)[i] = str_height; } UNPROTECT(1); return(ans); } Hmisc/src/wclosest.f0000644000176200001440000000234313222736040014117 0ustar liggesusersC Output from Public domain Ratfor, version 1.01 subroutine wclosest(w, x, lw, lx, j) implicit double precision (a-h,o-z) integer lw, lx, j(lw) double precision w(lw), x(lx) do23000 i=1,lw wi=w(i) dmin=1d40 m=0 do23002 k=1,lx d = dabs(x(k) - wi) if(d .lt. dmin)then dmin = d m = k endif 23002 continue 23003 continue j(i) = m 23000 continue 23001 continue return end subroutine wclosepw(w, x, r, f, lw, lx, xd, j) implicit double precision (a-h,o-z) double precision w(lw),x(lx),r(lw),xd(lx) integer lw, lx, j(lw) do23006 i=1, lw wi = w(i) dmean = 0d0 do23008 k=1, lx xd(k) = dabs(x(k) - wi) dmean = dmean + xd(k) 23008 continue 23009 continue dmean = f * dmean / lx sump = 0d0 do23010 k=1, lx z = min(xd(k)/dmean, 1d0) xd(k) = (1d0 - z**3)**3 sump = sump + xd(k) 23010 continue 23011 continue prob = 0d0 ri = r(i) m = 1 do23012 k=1, lx prob = prob + xd(k) / sump if(ri .gt. prob)then m = m + 1 endif 23012 continue 23013 continue j(i) = m 23006 continue 23007 continue return end Hmisc/src/mChoice.c0000644000176200001440000001445714126626200013631 0ustar liggesusers/* #define USE_RINTERNALS 1 */ #include "Hmisc.h" static Hmisc_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; int get_next_mchoice(char **s) { long opt; int errsv; char *begin, *end, *err_chk; S_EVALUATOR begin = *s; if(begin == NULL) return 0; if(*begin == ';') end = begin; else if(*begin == '\0') /* begin points to end of string set end to NULL */ end = NULL; else /* set end to the location of the the next ';' */ end = strchr(begin + 1, ';'); if(end) { /* set end value to null and increment */ *end++ = '\0'; /* set s to the begining of the next substring */ *s = end; } else /* end points to the end of the string. Set *s to NULL to indecate all of string consumed. */ *s = NULL; /* if begin is zero length substring return 0 */ if(strlen(begin) == 0) return 0; /* convert substring begin into its integer value */ /* set errno to zero */ errno = 0; opt = strtol(begin, &err_chk, 10); /* Check to see if an error occured in strtol */ if(errno != 0) { errsv = errno; Rf_error("string to integer conversion error: %s", strerror(errsv)); } if(err_chk == begin || *err_chk != '\0') Rf_error("string %s is not a valid integer number", begin); /* return the integer mChoice option */ return (int)opt; } SEXP do_mchoice_match(SEXP x, SEXP table, SEXP nomatch) { SEXP elm_index; /* Storage for value of first row of first match of each element in x *\/ */ R_len_t len; /* Number of elements in x */ R_len_t nfound = 0; /* count of number of elements of x matched in table */ char *str_ptr; /* current location pointer */ const char *str; int i, j, comp; size_t slen; /* length of string */ S_EVALUATOR /* get number of elements in x */ len = LENGTH(x); /* allocate an index vector of the same length as x */ PROTECT(elm_index = NEW_INTEGER(len)); /* set all values in elm_index to 0 */ memset((int *)INTEGER_POINTER(elm_index), 0, len * sizeof(int)); /* count number of x values that are zero and set nfound to that */ for(i=0; i < len; i++) { if(INTEGER_POINTER(x)[i] == 0) { INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; nfound++; } } /* iterate through each element of table looking for matches to values in x. it is done this way because parsing the mChoice string is expensive and looping is not. */ for(i=0; i < LENGTH(table) && nfound < len; i++) { if(STRING_ELT(table, i) == NA_STRING) continue; str = translateCharUTF8(STRING_ELT(table, i)); slen = strlen(str) + 1; str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); strncpy(str_ptr, str, slen); str_ptr[slen] = '\0'; while(str_ptr != NULL && nfound < len) { /* get the next component of the mChoice string */ comp = get_next_mchoice(&str_ptr); /* if comp is zero the next component was blank continue */ if(comp == 0) continue; /* Compare the component to all elements of x */ for(j = 0; j < len && nfound < len; j++) { /* If the element index is not zero that value has been prevously matched continue to next value */ if(INTEGER_POINTER(elm_index)[j] || INTEGER_POINTER(x)[j] == 0) continue; if(INTEGER_POINTER(x)[j] == comp) { nfound++; INTEGER_POINTER(elm_index)[j] = i+1; } } } } Hmisc_FreeStringBuffer(&cbuff); if(nfound < len) { /* if not all elements of x are matched to those in table set the elements of elmt_index that are zero to the value of nomatch */ for(i=0; i < len; i++) { if(INTEGER_POINTER(elm_index)[i] == 0) { INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; } } } UNPROTECT(1); return(elm_index); } SEXP do_mchoice_equals(SEXP x, SEXP y) { int x_len = LENGTH(x); /* length of x vector */ int y_len = LENGTH(y); /* length of y vector */ SEXP ans; /* Logical return vector */ int nfound = 0; /* number of matches found */ int i,j, comp; /* iterators */ size_t slen; char *str_ptr; /* copy of the x string element */ const char *str; S_EVALUATOR if(!IS_INTEGER(y) || y_len == 0) Rf_error("y must be an integer vector of at least length one."); PROTECT(ans = NEW_LOGICAL(x_len)); for(i=0; i < x_len; ++i) { nfound = 0; str = translateCharUTF8(STRING_ELT(x, i)); slen = strlen(str) + 1; /* if length of x element is zero or NA no posible match */ if(STRING_ELT(x, i) == NA_STRING) { SET_NA_LGL(LOGICAL_POINTER(ans)[i]); continue; } if(slen == 0) { LOGICAL_POINTER(ans)[i] = 0; continue; } str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); strncpy(str_ptr, str, slen); str_ptr[slen] = '\0'; while(str_ptr != NULL && nfound < y_len) { comp = get_next_mchoice(&str_ptr); for(j=0; j < y_len; j++) { if(comp == INTEGER_POINTER(y)[j]) { nfound++; break; } } } if(nfound < y_len) LOGICAL_POINTER(ans)[i] = 0; else LOGICAL_POINTER(ans)[i] = 1; } Hmisc_FreeStringBuffer(&cbuff); UNPROTECT(1); return(ans); } Hmisc/src/sas/0000755000176200001440000000000013555351165012703 5ustar liggesusersHmisc/src/sas/exportlib.sas0000644000176200001440000000346012243661443015422 0ustar liggesusers/* Macro exportlib Exports all SAS datasets in a data library to csv files. One of the datasets is assumed to be the result of PROC FORMAT CNTLOUT= if any user formats are referenced. Numeric variables are formatted in BEST16 format so that date/time variables will be exported with their internal numeric values. A special file _contents_.csv is created to hold, for all datasets combined, the dataset name, dataset label, variable names, labels, formats, types, and lengths. Usage: %INCLUDE "foo\exportlib.sas"; * Define macro; LIBNAME lib ...; * E.g. LIBNAME d SASV5XPT "foo.xpt"; %exportlib(lib, outdir, tempdir); Arguments: lib - SAS libname for input datasets outdir - directory in which to write .csv files (default ".") tempdir - temporary directory to hold generated SAS code (default C:/WINDOWS/TEMP) */ %macro exportlib(lib, outdir, tempdir); %IF %QUOTE(&outdir)= %THEN %LET outdir=.; %IF %QUOTE(&tempdir)= %THEN %LET tempdir=C:/WINDOWS/TEMP; OPTIONS NOFMTERR; PROC COPY IN=&lib OUT=work;RUN; PROC CONTENTS DATA=work._ALL_ NOPRINT OUT=_contents_(KEEP=memname memlabel name type label format length nobs);RUN; PROC EXPORT DATA=_contents_ OUTFILE="&outdir/_contents_.csv" REPLACE;RUN; DATA _NULL_; SET _contents_; BY MEMNAME; FILE "&tempdir/_export_.sas"; RETAIN bk -1; if FIRST.MEMNAME & (NOBS > 0) THEN DO; PUT "DATA " MEMNAME "; SET " MEMNAME ";FORMAT _NUMERIC_ BEST14.;RUN;"; PUT "PROC EXPORT DATA=" MEMNAME " OUTFILE=" '"' "&outdir/" MEMNAME +bk ".csv" '" ' "REPLACE;RUN;"; END; RUN; %INCLUDE "&tempdir/_export_.sas";RUN; %MEND exportlib; Hmisc/NEWS0000644000176200001440000012704014370711567012033 0ustar liggesusersChanges in version 4.8-0 (2023-02-08) * rendHTML: new function to render HTML, sensing if knitr is running and if so using knitr::asis_output, otherwise htmltools * contents, describe: added the use of rendHTML so that HTML can be rendered from the console, and made it so that if options(prType='html') is in effect the html method will be invoked automatically when printing Changes in version 4.7-2 (2022-11-18) * grType: changed to use find.package() instead of the slow installed.packages() (thanks: Uwe Ligges) * latex.default: changed to by default omit the LaTeX comment; specify options(omitlatexcom=FALSE) to get original behavior * ggplot.transcan: corrected color/color assignments; thanks: David Norris * restoreHattrib: fixed to ignore attributes for variables that are not in obj * popower: fixed URL for Live Free or Dichotomize * getRs: improved to use rstudioapi to load the file into a script editor window * mChoice: added ignoreNA argument and made new default behavior ignoreNA=TRUE unlike the prior versions; made value (appearing with semicolons) null instead of 0 when no choice was selected, which makes option add.none finally work * mChoice: added condition argument to intersect all values instead of unioning them * mChoicelike: new function like inmChoice but allowing matching on pieces of labels and optionally ignoring the case * nmChoice: new function to compute the number of choices that were made per observation * describe: fixed length > 1 problem with check for timeDate class * Merge: better error message if unexpected variable from non-id-variable overlap * all.is.numeric: corrected to return TRUE is the only non-NA values after converting to numeric were originally NA, addedd what='nonnum' argument Changes in version 4.7-1 (2022-08-13) * plot.describe: added bvspace argument to control spacing between categorical variables * getRs: changed default put argument to 'source', changed RStudio code * html.contents.data.frame: changed long space to   to render in more browser * .q: new quoting function, extends Cs * html.contents.data.frame, html.describe: added to html * plot.curveRep: added method='data' so can set up for ggplot2 * combplotp, dotchart3, dotchartpl, plot.describe, plotlyM, scat1d: look at options(plotlyauto=TRUE) and if set override height and width to NULL to that plotly will use auto sizing * dataframeReduce: return information about dropped variables as attribute 'info' * plotCorrM: added hjust=1 if xangle is not zero * Merge: changed to rely on merge.data.table in addition to merge.data.frame * formatCons used by summaryM and summary.formula: respect outer.size for html * options(localHfiles=TRUE) makes `getRs` and `getHdata` read from local file directories instead of from github or our web server * various .Rd files: got rid of \var{} to pass new CRAN checks * fit.mult.impute: fixed bug when using mice. Thanks: Michał Krassowski https://github.com/harrelfe/Hmisc/issues/152 * fit.mult.impute: fixed bug with deparse(substitute(fitter)) when fitter is an anonymous function. Thanks Thanks: Michał Krassowski https://github.com/harrelfe/Hmisc/issues/157 * plot.summaryM: fixed bug not fetching group label from correct place. Thanks: Thomas tkpmep@gmail.com * bpplt: fixed par(mai). Thanks: tkpmep@gmail.com * plot.summaryM: similar correction Changes in version 4.7-0 (2022-04-18) * html.contents.data.frame: properly closed html <a> * simPOcuts: new function to demonstrate variation in odds ratios due to random chance * R2Measures: new function to compute various pseudo R^2 measures * putHcap: added new capabilities around the subsub argument * print.summary.formula.response: added markdown argument * knitrSet: added rudimentary quarto support * knitrSet: sense figure labels of the form fig-... used by Quarto, and generate correct cross-reference * na.pattern: greatly simplified, and added data.table support Changes in version 4.6-0 (2021-10-05) * package: improved author formatting in DESCRIPTION * html: markupSpecs$html$session: added citations for any Harrell packages that are loaded, respecting current output format in effect with knitr * soprobMarkovOrdm: new function to compute state occupancy probabilities from proportion odds model fits * plotCorrM: new function to graph correlation matrices and gap time relationships using ggplot2 * ggplotlyr: new function to use plotly::ggplotly to render ggplot2 graphs but intercepting hover text to remove extraneous labels * Fixed Heiberger email address * propTrans: removed zero frequency combinations * combplotp: fixed bug in case regarding recognition of positives * propsPO: fixed making y factor * soprobMarkovOrdm: added as.data.frame(generated data) * estSeqMarkovOrd: extended timecriterion function to allow user to return the event/censoring time and indicator, and to allow groupContrast to compute using a variance formula * estSeqMarkovOrd: trapped errors better, returning attribute failures, and added maxest and maxvest argument to declare large parameter or variance estimates as failed iterations; changed from vgam to vglm and sped up computations by using previous coefficient estimates as starting values * session in markupSpecs: added citations for several other packages * soprobMarkovOrdm: extended to work with VGAM package * describe: fix error with . (thanks to Cole Beck; https://github.com/harrelfe/Hmisc/pull/144) * many: tests for presence of suggested packages using requireNamespace (thanks for major editing work by Duncan Murdoch at https://github.com/harrelfe/Hmisc/pull/143 motivated by https://stat.ethz.ch/pipermail/r-package-devel/2021q2/007101.html) * mdb.get: fix for Windows by changing from system to system2 (thanks to Rainer Hurling at https://github.com/harrelfe/Hmisc/pull/135) * summaryM: fixed error with prN=TRUE with latex (thanks Matt Shotwell in https://github.com/harrelfe/Hmisc/pull/109) * many: fix partial argument matching warnings length -> length.out in rep (thanks Bill Denney in https://github.com/harrelfe/Hmisc/pull/128) * rcspline.restate: was dropping + for exactly zero coefficients (thanks https://github.com/harrelfe/Hmisc/pull/118) * mdb.get: remove brackets from table names (thanks https://github.com/harrelfe/Hmisc/pull/123) * latex: added new argument comment so that the generated comment can be suppressed (thanks Giuseppe Ragusa https://github.com/harrelfe/Hmisc/pull/33) * sas.get: added new argument for variable case (thanks Tyler Hunt https://github.com/harrelfe/Hmisc/pull/8) * format.df: changed to use system option OutDec when cdot is not specified (thanks https://github.com/harrelfe/Hmisc/issues/142) * C code: changed calls to warning/error routines * formatdescribeSingle: changed for character value to make 'NA' work Changes in version 4.5-0 (2021-02-27) * approxExtrap: changed x and y to as.numeric. Thanks: juha.heikkinen@luke.fi * upData: fix column subsetting for data.tables * dotchartpl: intercepted missing Diff * simMarkovOrd, soprobMarkovOrd, intMarkovOrd, estSeqMarkovOrd: new functions for Markov proportional odds model simulation and calculation of state occupancy probabilities * ggfreqScatter: added by argument * VGAM package added to Suggests in DESCRIPTION * html markupSpecs mdchunk: added caption argument, allowed for vectors * propsTrans: added labels argument for plotly, added numerators and denominators in tooltips, fixed bug where factor levels were reversed if odds.ratio specified Changes in version 4.4-2 (2020-11-25) * rcorr: captured loss of precision leading to square root of a negative number. Thanks: Ann Voss <avoss@novanthealth.org> * summaryS: sapply was doubling names * pairUpDiff: created for dotchartpl - function to pair up grouped observations for sorting by descending differences and computing approximate confidence intervals for the difference given individual confidence limits * dotchartpl: added new arguments and functionality to handle quantities other than proportions (e.g., hazards) and streamlined code using pairUpDiff * propsPO: added tooltip text for gpplot that will be transmitted to ggplotly; reversed order of categories so lowest category put at bottom of bar * dotchartpl: suppressed confidence intervals when the number of groups is not 2; fixed bug where hover text confidence intervals were repeats of the last computed interval instead of properly using all the intervals; added dec argument * added estSeqSim and gbayesSeqSim functions * ggfreqScatter: stopped varying alpha and just varied color, as alpha collided with the color scheme * histSpike: added minimal=TRUE and bins arguments Changes in version 4.4-1 (2020-08-07) * popower: added approximate S.E. of log(OR) in results * propsPO: new function for exploring proportional odds * propsTrans: new function for showing distributions of successive state transitions * changed acepack to suggests * multEventChart: new function for multi-state event charts based on code written by Lucy D'Agostino McGowan * getHdata: changed to use hbiostat.org/data/repo * markupSpecs$markdown$tof: new function to render a table of figures with short captions * knitrSet: added capfile argument to store figure tags and short captions * getLatestSource: changed to use GitHub and hbiostat.org/R/packagename/dir.txt * histboxpM: added width argument * upData: handled zero-length subsets (Thanks: Will Gray) * upData: made faster for large datasets * ffCompress: removed from package and put in Github Rscripts * Changed maintainer email Changes in version 4.4-0 (2020-03-22) * combplotp: new function for attribute plots with plotly * summaryP: made to work with new default stringsAsFactors=FALSE * plotlyM: fixed bug where need to unlist if only one graph produced * plotlyM: added ECDF support through fitter='ecdf' * keepHattrib, restoreHattrib: new functions for saving Hmisc attributes that can be restored later, e.g., after data.table processes a data frame Changes in version 4.3-1 (2020-02-07) * Depend on survival >= 3.1-6 * pomodm: checked that x is sorted * markupSpecs: added ord function for ordinal representation of integers * mChoice: removed unused argument sort. from help file Changes in version 4.3-0 (2019-11-07) * Corrected typos in aregImpute help file. Thanks: Mark Seeto. * describe: changed to print 5 lowest and highest values even if frequency table printed, added a line of printed output specifying any rounding done for the frequency table * vcov.fit.mult.impute: use vcov.orm if object has orm class. intercepts=mid logic was not working for this * New service function convertPdate to do automatic date conversions and handle partial dates such as YYYY and mm/YYYY with imputation, for cleanup.import * cleanup.import: new arguments autodate, autonum, fracnn * describe: formatted dates if there is only one distinct value; changed to sense date or date-times, for Gmd not format as date-time * plot.describe: treated date/time variables as numeric * cleanup.import: refined autonum considerNA * ggfreqScatter: added stick=TRUE argument * markupSpecs: changed math and similar functions to use ... argument instead of x * summaryDp: new function for plotly dotcharts stratifying separately on a series of variables * dotchartpl: added height argument * html.describe: fixed but where markupSpecs$html utility function last arguments were not named in calls Changes in version 4.2-0 (2019-01-25) * putHcap: new function * inst/tests/example.Rmd: changed to html document, replaced putHfig calls with new putHcap * knitrSet: new lang='blogdown' option * fit.mult.impute: suppressed warning for orm if there were no missing Ys (thanks: Lucy D'Agostino McGowan) * knitrSet: made more functional for blogdown * knitrSet: removed duplicate setting of out.width * fit.mult.impute: added step-by-step instructions for imputing missing baseline variables when analyzing serial data * histboxp: changed to display quantiles using line segments, and centered segments vertically. added connect argument with default set to connect quantiles with a line * wtd.quantile: removed observations with missing weights * print.summaryM, latex.summaryM: if no statistical results, do nothing and return * naclus: checking missings in character variables to also use is.na * dotchartpl: backsolve from Wilson confidence interval when prop=0 or 1; added nobigtracename argument; fixed trace names * putHfig: added file=FALSE to return character vector instead of writing * histboxp: added showlegend so that can be called in a loop * histboxpM: new function to call histboxp for multiple variables in a data frame * htmlSN: re-wrote for speed, and added argument pretty= * plot.summaryS: added plotly method when options(grType='plotly') * plotp: moved generic from rms to Hmisc * plotlyM: new function to make it easy to use plotly to produce multiple graphics of certain types, with specifications coming from a data frame * plotp.summaryS: new plotly method for summaryS * mbarclpl, medvpl: new auxiliary functions for plotp.summaryS * dhistboxp: new function * markupSpecs$httml: added new function mdchunk to intersperse markdown with R object output * markupSpecs$html$varlabel: if html is TRUE run label and units through htmlTranslate * dotchartpl: removed stratified by groupname from hover text * plot.summaryS: removed transhtml argument * markupSpecs$html cap-type functions: called htmlTranslate(...,greek=TRUE) on long and short captions * formatTestStats: ran P-values with < through latexTranslate to make latex.summary.formula.reverse and latex.summaryM work correctly * hidingTOC: new function by Thomas Dupont for various features for floating table of contents with RMarkdown html reports * histbackback: use pretty() to label breaks * cut2: added formatfun argument, by github.com/harrelfe/Hmisc/commit/c8816d589b2e95eed15b96a886558a92bb143abd * latex.default: fixed midfules for booktabs with ctable: https://github.com/harrelfe/Hmisc/pull/100 * formatTestStats: trapped logical length mismatch * latex.default: checks options(omitlatexcom) and if TRUE suppresses emitting LaTeX comments; keeps Rmarkdown/pandoc from trying to escape LaTeX comment characters, putting the comments in the final output Changes in version 4.1-1 (2018-01-03) * describe: quit rounding values when <= 20 distinct values no matter how far apart any two values are spaced. https://stackoverflow.com/questions/47679551 * summaryM: fixed to handle character variables properly, by converting them to factors. Thanks: Matt Shotwell * Fortran/ratfor routines hoeffd, jacklins, largrec, rcorr, wclosest: fixed errors where some scalars were single precision, got rid of some warnings Changes in version 4.1-0 (2017-12-19) * latex.default: insert spaces between multiple entries in bottom index in latex tables (thanks: ecortens https://github.com/harrelfe/Hmisc/pull/73) * latex.summary.formula.reverse,latex.summaryM: fix double replacement of ampersand (Thanks: Evan Cortens https://github.com/harrelfe/Hmisc/pull/72) * fit.mult.impute: verified dfmi formula, added comment * dotchartpl: added refgroup, sortdiff, conf.int arguments to allow inclusion of difference in two proportions across levels of the group variable * plot.summaryP: for plotly, added refgroup to pass to dotchartpl so that differences in proportions can be handled; also passed ... arguments * dotchartpl: added minkeep argument (used by hreport eReport) * markupSpecs$html: added function uncover for hiding chunks; fixed typo in definiton of larger function * knitrSet: when lang='markdown' sets up hook option uncover * knitrSet: added aliases ho for out.height, wo for out.width, added knitrSet arguments ho, wo * wtd.var: replaced with fixed verion by Benjamin Tyner <btyner@gmail.com> * inst/tests/example.Rmd: added example with simple caption and font size change for caption * pomodm: new function to work with popower/posamsize to assist translating odds ratios to means and medians for proportional odds model * histboxp: added wmax, mult arguments to tweak margin set-aside * describe: fixed bug in html.describe when there is only one variable * simRegOrd: new function to simulate covariate-adjusted ordinal model two-sample test power * html: use htmlSpecialType for functions calling htmlSpecial, htmlGreek * htmlSpecialType: new function to check options('htmlSpecialType') and use as a default '&' for special symbols * htmlSpecial: added new characters, cleaned up code * impute.transcan: if original variable was logical, convert 0/1 imputations to FALSE/TRUE; design matrix column names were problematic with fit.mult.impute in this case * impute.transcan: converted character variables on input dataset to factor so can replace NAs with imputed codes * htmlTranslate: fixed bug when unicode was in effect: characters were shifted;fixed &# code for pound sign * errbar: properly passed arguments main and sub to plot * html markupSpecs: added sectionNumberDepth function * wtd.quantile: remove zero weight observations early * latexCheckOptions: new function from Richard Heiberger * htmlSN: used new HTML specials mechanism for times symbol * summaryM: added escape.html=FALSE to htmlTable calls * combine, combine<-: removed (were deprecated long time ago) * removed inc-dec.s which defined unused inc<- and dec<- for increment and decrement * NAMESPACE: removed exportPattern("^([^.]|\\..+\\.)") and explicitly exported everything, stopping export of methods * round.POSIXt,trunc.POSIXt: changed to roundPOSIXt, truncPOSIXt to avoid conflicts with R base * asNumericMatrix, matrix2dataFrame: changed to respect original storage.mode. Thanks: Andre Mikulec Changes in version 4.0-3 (2017-04-30) * bpplot: added plotopts argument * describe, histboxp: improved logic to still slighty round numeric variables with fewer than 100 distinct values, in case some values are really close, when computing frequency distribution (for spike histograms in plot.describe) * all.is.numeric: fixed to handle case where all values are " "; makes describe work * latex.describe.single: changed to always issue vbox, to improve vertical spacing * histboxp: removed zero line so wouldn't collide with data * ggfreqScatter: added smoother to example * latex.summaryM: more transparent naming of quantiles preserving full precision * cut2,describe,dotchart3,ecdf,latex,list.tree,bpplt,panel.plsmo,pstamp,summary.formula,summaryM,transace,t.test.cluster,wtd.Ecdf,Dotplot,zoom: fixed on.exit to properly restore options() to original state * [.pBLock: fixed old behavior of only rataining the pBlock class if the result is a matrix (thanks: Michael Lawrence - lawremi) * addMarginal: added nested argument * putHfig: now throw error if lcap is given with expcoll because of interaction with divs in markdown sub-subheading * plotmathTranslate: improved to put any non-legal expression inside paste() * wtd.var: was returning Inf for fractional weights with method='unbiased'; chaged to use stats:cov.wt for both methods - https://github.com/harrelfe/Hmisc/issues/69 * plsmo, summaryS: new example in help files * plot.summaryS: separated xlim and ylim to fix bug where xlim was ignored * plsmo: stop on error if no. observations not large enough for m with method='intervals' * Fortran calls: registered all Fortran functions, removed package name and quotes in first argument in .Fortran(), added F_ to entry point name in those calls, changed useDynLib call in NAMESPACE, added \alias{} for all F_ entries Changes in version 4.0-2 (2016-12-30) * pngNeedle: fixed bug in html.describe under Windows by not specifying /tmp as the temporary file directory - https://github.com/harrelfe/Hmisc/issues/63; used tempdir() instead of tempfile() * html.describe.single: reuse the same temporary file for intermediate png output for spike histograms * htmlGreek, htmlSpecial: new internal functions * htmlTranslate: added unicode argument, default FALSE, use htmlGreek * tobase64image: new internal function. Thanks: Dirk Eddelbuettel * Changed IMPORTS: base64 to base64enc * spss.get, stata.get: made use of foreign package files' ability to use URLs as the file argument * sasxport.get, getZip: allowed for https (thanks: Kurt Hornik) * getRs: removed unused where argument * latex: revert changes that removed fix to column duplication in cellTexCmds * latex: added continued argument * plot.describe: fixed bug in not using formula notation for plotly * histSpikeg: added plotly support, stop curtaining x range when xlim omitted, use approxExtrap to show data density outside of prediction range for x * histSpikep: deleted function * dotchartpl: added lower, upper * describe.vector: changed structure(NULL, ...) to structure(list(), ...) Changes in version 4.0-1 (2016-12-04) * gbayes: new test file, fixed bug in posterior mean when mix=0,1 in gbayesMixPost * ggfreqScatter: added prfreq and cuts arguments * latex.summary.formula.reverse, latex.summaryM, html.summaryM: fixed bug with backslash added before % for html, improved typesetting language logic * latex.describe: fixed bug that prevented spike histogram from appearing * ggplot.summaryP: moved text out of aes in geom_point, left in main aes * plot.describe, plot.summaryP: put text aes in ggplot(), increased ggplotly height to allow popups for top continuous variables in plot.describe * dotchartp, dotchartpl, plot.describe: moved height and width from plotly::layout to plot_ly because of changes in plotly 4.5.6 * bppltp: added data= to plot::add_* because plotly was evaluating using the wrong copy of x,y, etc. * plot.summary.formula.reverse, plot.summaryM: removed subtitles argument since not used * upData: fixed bug when converting existing variable to factor; was not recognizing new storage model and tried to run floor() * latex.summary.formula.reverse: fixed bug where outer quantiles and frequencies were not being rendered * latex.default: put \par in front of insert.bottom if table.env is FALSE, so that text is not put to the side of the table * histboxp: new function for plotly stratified spike histograms with box plot statistics also depicted * format.df, latex.default: added arguments already.math.row.names, already.math.col.names that if TRUE prevent any math mode changes * latex.default: fixed problem with duplication of columns in the cellT...exCmds array (thanks: Thomas Dupont) * tests/summaryM-customtest: new test (see https://github.com/harrelfe/Hmisc/issues/61) * describe: improved logic for deciding when and how to show frequency table, changed condense argument * htmlVerbatim: added propts and omit1b arguments * latexNeedle, pngNeedle: added x argument * latex.describe: allowed for unequally-spaced x in spike histograms * html.describe: added spike histograms * base64: new IMPORTS in DESCRIPTION * sedit, latexTranslate: changed 1 : length(text) to seq_along(text). Thanks: Greg Snow * ggfreqScatter: better formatting of axis tick mark labels * latexVerbatim: implemented file='' Changes in version 4.0-0 (2016-10-31) * summaryP: fixed exclude1 logic - was not excluding enough levels (inconsistent use of variable names vs. labels) * latexTranslate: any NAs in first argument changed to "" before conversion * minor.tick: added arguments to pass through (thanks: vhorvath) * tests/latexpng.r: test conversion of LaTeX table to png * latexTabular: made it properly call latexTranslate separately for each column, convert argument to matrix or data frame if a vector * tests/latexTabular.r: new test * latexDotchart: added call to latexTranslate to escape special characters for LaTeX such as underscore and pound sign * ggfreqScatter: new function * grType: new non-exported service function to sense if plotly is in effect * plot.describe: new function to make plotly graph for describe objects * describe: changed output: 3 decimal places for Info, new format for frequency table, separated logic for 10 extreme values from logic for frequency table, significant changes to print.describe * dotchartp: new version of dotchart3 for plotly charts * summaryD: modified to use dotchartp if options(grType='plotly') is in effect * nFm: added argument html * label.default, label.Surv, labelPlotmath: added html argument to implement HTML markup in place of plotmath (for plotly) * now imports htmlTable, viridis * html.contents.data.frame: return invisibly if file='' * htmlVerbatim: new function * html.contents.data.frame: improved html * html.data.frame: changed column headers from h4 to <strong> * ggfreqScatter: added html argument * knitrSet: was ignoring default figure h and w * html.summaryM: new function using new version of latex.summaryM * markupSpecs: new list object defining markup elements for latex and html, used by summaryM * show.html, print.html: removed; conflicted with htmltools/rmarkdown * label: required exact match on attribute name "label" to not retrieve the "labels" attribute (thanks: Ivan Puzek) * htmlSN: new function to convert floating point to scientific format usint html * upData, cleanup.import: fix NA becoming new factor levels (thanks: Beau Bruce) * htmlTranslate: new function * plotlySave: new function * histSpikep: new function * upData: new argument html * plot.describe: added ggplot2 graphics, improved continuous display using ggplotly * labelPlotmath: cleaned up by using markupSpecs * capitalize: fixed bug - did not cap. first letter if other letters after it were upper case (thanks: dchiu911) * html.summaryM: added brmsd argument to put mean, SD on separate line * Save: changed default back to use gzip for speed * knitrSet: used new knitr way to set aliases for option names * latexTabular: made translate argument apply to body of table also; implemented multi-line header * html.data.frame: added several arguments * describe: added html method * html markupSpecs object: added bibliographic database utility functions * bppltp: new service function for extended box plots for plotly * plot.summaryM: new plotly method * prType: new service function used to detect user settings for html, latex for print methods * html.contents.data.frame: removed file and append arguments and output an html character vector instead * prList: new function * GiniMd: moved to Hmisc from rms * describe: added GMD (Gini's mean difference) and relabeled unique to distinct * latex.summaryM: added Needspace{2.7in} before 2nd and later strata * ggplot.summaryP: added point labels for use with plotly as hover text * latex.summaryP: fixed bad linebreak at strata boundaries * dotchartpl: new plotly dot chart function especially for summaryP * plot.summaryP: new plotly method using dotchartpl when options(grType='plotly') * putHfig: new function to render graphics with captions in HTML reports * pngNeedle: new function, like latexNeedle but useful with HTML reports * html.data.frame: added width and caption arguments * plotlyParm: list object with plotly helper functions * summaryD, dotchartp: added symbol and col arguments * upData: improved efficiency, added labelled class to variables without it but having labels (e.g., data imported using haven package) * gbayesMixPost: mixed error in posterior odds of the mixing parameter that resulted in incorrect posterior probabilities when the variance of the statistic was appreciable; added option to compute posterior mean Changes in version 3.17-4 (2016-05-02) * latex.summaryP: correctly ordered rows after running reshape because groups was present * rcspline.plot: took confidence limits into account when computing default ylim * bootkm: new test in tests * [.ynbind: Made it return an unclassed vector if the user was subsetting to a single column. Upcoming R release fails in table() otherwise. Thanks: Michael Lawrence <lawrence.michael@gene.com> * Merge: added function (moving from greport package) * data.table: imports Changes in version 3.17-3 (2016-04-03) * latex.default: added blank lines after textbf with insert.top unless center='center' (was unless 'none') * plot.summaryS: fixed bug were limits were not aligned with columns (thanks: Liping Du) * mgp.axis: respected par('tcl') but only as extreme as -0.4 * mgp.axis: added cex.axis, cex.lab arguments * aregImpute: added reformM function for permuting order of variables. Thanks: Yong Hao Pua * latex: fixed rownamesTexCmd error Changes in version 3.17-2 (2016-02-13) * consolidate: A simpler version of assignment form of consolidate, avoids eval/match.call and works with the byte-compiler (thanks: Tomas Kalibera) * relevel.labelled: new function (thanks: Max Gordon) * fit.mult.impute: remove contrasts attribute for imputations done by mice * formatCons: fixed bug when stats contains one row (thanks: Cole Beck) * Hmisc.h: removed reference to S.h * fit.mult.impute.s: moved source code to separate file, away from transcan.s * fit.mult.impute: added warning if fitter is lm, which does not use imputation variances for sigma or standard errors * spss.get: added reencode argument (Thanks: Andreas Osowski) Changes in version 3.17-1 (2015-12-17) * sasxport.get: added lowernames argument with default equal to the previous behavior (convert varixcable names to lower case) * biVar, rcorrp.cens, spower, transace: changed 1 - pnorm(z) to pnorm(-z) * latex.summaryM: added arguments prob, prN, and legend.bottom * print.summaryM: added arguments prob and prN * Save: changed to use compress='xz' by default, and added compress argument * drawPlot: fixed bug in defining Points and Curve functions; added col argument to Points and Curve; removed helper functions for Points and Curve from Key.s; returned object invisibly; added type='loess' to Curve() * upFirst: new function * mdb.get: added mdbexportArgs argument which among other things allows -b strip to be omitted * html: for hevea added definition of tabularnewline as \\, changed method to default to hevea * summaryM: fixed calculation of N for mChoice variables * stat-plsmo.r: re-written by Hadley Wickham (thanks!) to work with new ggplot2 * ggplot.summaryP, ggplot.transcan: added mapping and environment arguments to satisfy rules about generics Changes in version 3.17-0 (2015-09-20) * format.df (used by latex.default): added space after textless, textgreater * label: changed default for units to value of plot * getRs: replaced where argument with guser, grepo, gdir, dir to allow easy fetching of updated functions from Hmisc etc. * Separated sas.get source code from other .get functions and from upData/cleanup.import by putting into 3 separate files. Moved stata.get into misc.get.s * upData: for Stat/Transfer exported R workspaces, change variables into factors to incorporate value labels when present; added subset argument and reporting of number of observations pre and post subsetting * latex.default: added comma after botcap directive for ctable. Thanks: Paul Trowbridge * Hmisc-internal.Rd: removed alias{[.terms} * latex.default: for longtable when no caption is given, subtract one from table counter * latex.summaryM: quit ignoring insert.bottom if it is a character string (thanks: JoAnn Alvarez) * minor.tick: revised version by Earl Belllinger that fixes problem reported in https://github.com/harrelfe/Hmisc/issues/28 * several functions: used new names when assigning temporary functions * NAMESPACE: add imports to base functions to avoid new R CMD CHECK warnings * ffCompress: new function * knitrSet: changed fig.path default to '' instead of NULL to work with knitr 1.11 * html.latex: added argument rmarkdown * htmltools: added to suggests in DESCRIPTION * tests: new test script latex-html.Rmd for latex -> html under Rmarkdown/knitr/Rstudio, new test for cut2 * plsmo, panel.plsmo: added method='intervals', mobs, ifun arguments Changes in version 3.16-0 (2015-04-25) * html.contents.data.frame: corrected html space character to add semicolon * ggplot.summaryP: added size of points according to denominators * colorFacet: new function * labelPlotmath: added chexpr argument (used by rms::ggplot.Predict) * rcsplineFunction: added type='integral' * summaryP: fixed bug with sort=FALSE using mfreq when shouldn't * summaryP: stored levels(val) in original levels order * summaryM: removed observations added by addMarginal when computing N in left column of tables * html.latex: added method for htlatex, added where argument, cleaned up code, implemented file='' for knitr when using html/Rmarkdown * summaryM, summary.formula: changed calls to translate to gsub() * summaryP: corrected but in exclude1 logic, moved exclude1 to methods that operate on summaryP objects and out of summaryP itself * addMarginal: respect original levels, add argument margloc * added latticeExtra:: in front of function calls * numeric.string, all.is.numeric: replaced options(warn=-1) with suppressWarnings() (thanks: Yihui) * arrGrob, print.arrGrob: new functions * wtd.var: added maximum likelihood method, fixed unbiased method, improved documentation (all provided by Benjamin Tyner) * Changed all any(duplicated()) to anyDuplicated(); thanks Benjamin Tyler * getRs: new function to interact with https://github.com/harrelfe/rscripts * knitrSet: new function to setup knitr with nice defaults for books etc. * rcorr: fixed sensing of NAs and diagonal elements of n matrix; thanks: Keith Jewell, Campden BRI Group; similar for hoeffd Changes in version 3.15-0 (2015-02-15) * dvi.latex: For Windows switched from system() to shell() so that cd command works. Thanks: Rich Heiberger * histSpike: added test * histSpikeg: new function for ggplot2 * added ggplot2 in DESCRIPTION, NAMESPACE * largest.empty: clarified help file for use with ggplot2, changed how sensed xlim and ylim accordingly * stat_plsmo: new geom for ggplot2 * transcan: added ggplot method * transace: removed y-axis labels * residuals.lrm: remove y-axis labels * histSpikeg: added lowess and span arguments * format.df: Andreas Kiermeier: The "..." argument of format.df is no longer ignored, but included in the definition of formt() and passed to format(). The help file has also been updated and the example modified to show an example of big.mark. * getHdata: removed S-Plus specific stuff, updated url to DataSets * summaryP: added ggplot method * NAMESPACE: many additions to register all S3 methods * format.sep: renamed to formatSep Changes in version 3.14-6 (2014-11-16) * wtd.rank: fixed bug in no weights case (thanks: DPlat) * latex.summaryM, latex.summary.formula.reverse: added npct='slash' to present numerators and denominators horizontally instead of vertically * plsmo: put derivation of ylab earlier so will use original attributes * NAMESPACE: exported more S3 methods for label * contents: added arguments id, range, values * print.contents.data.frame, html.contents.data.frame: added maxlevels argument * curveRep: added option to have the color of frequencies displayed match line colors. Thanks: jstat10 * curveRep: changed call to strwidth to use units instead of unit argument * contents.data.frame: corrected omission in help file * wtd.var: corrected denominator. Thanks: Shan Huang * latex.default: changed to use colheads=FALSE to suppress column headings. Thanks: Michael Rose * mdb.get: added system option -b strip to mdb-export to skip binary output * describe help file: added note that correct terminology for "unique" should have been "distinct" Changes in version 3.14-5 (2014-09-11) * latex.summaryM: fixed bug in caption with test=TRUE. Thanks: Yonghao Pua * csv.get: uses data.table package's fread in place of read.csv if data.table is in effect * combined.levels: sensed all NA vector, now return non-factor numeric instead * dataframeReduce: handle all-NA factor variable * subplot: replaced with latest version from TeachingDemos package by Greg Snow * latexTabular: fixed error in example in help file; result is not a file * latex: another test added in tests/latex.s * summaryP: removed observations with a right-hand-side variable missing * latex.summaryP: fixed bug with wrong column labels due to reshape reordering columns coming from factor levels alphabetically instead of by original levels * format.df: added % & <= >= to list of characters handled, the last two by going into math mode * latex.summaryP: use blank if denominator 0, instead of NaN * summary.formula: fixed problem with deparse formula. Thanks: Andreas Kiermeier * describe: added relative information measure for numeric variables - a measure of how continuous the variable is * wtd.table: detect duplications using duplicated() instead of diff(x) to handle Inf. Thanks: Benjamin Tyner * DESCRIPTION, NAMESPACE: multiple function changes to work in R-devel Changes in version 3.14-4 (2014-04-13) * rcspline.eval: stop new logic for ignoring outer values when there are many ties when there are also many ties on interior values. Added new logic to use interior unique values of x when the number of unique x is small. * latexBuild: generalized with insert argument * latex.default: modified to use mods of latexBuild, fixed bug with caption.loc='bottom' (thanks: YacineH) * latex.default: fixed bug where comma did not appear after caption={} for ctable (thanks: Johannes Hofrichter) * tests: fit.mult.impute.bootstrap.r: added new example (thanks: Jane Cook) * fit.mult.impute: added call for fit.mult.impute in returned object, replacing call from fitter; makes update work for fit.mult.impute * summary.formula: fixed recognition of multiple left-hand-side variables to trigger call to summaryM (thanks: Kevin Thorpe) * summaryM: changed group label to '' instead of 0 for formulas like age + sex ~ 1 * Ecdf: added what argument to all functions * nobsY: return constructed id vector * addMarginal: instead of .marginal. being logical, make it contain names of variables being marginalized over * mChoice.c: fixed some inconsistencies Changes in version 3.14-3 (2014-03-02) * format.df: clarified in help file that col.just can contain specifications other than l,r,c, e.g., "p{3in}" to get paragraph formatting in a column. Thanks: Ben Bolker * latex.default: added example for the above in tests * label.Surv: got units from inputAttributes in addition, and added type argument Changes in version 3.14-2 (2014-02-26) * latex.default: improved logic using new function in Misc: latexBuild * latex.default: fixed bug with ctable=TRUE with no caption by removing default label * latex.default: improved formatting for insert.top * latex.default: added tests, fixed insert.bottom * latex.summaryM: return stat summary key as legend attribute, use this according to insert.bottom argument * latex.summary.formula.response: fixed bug related to computation of cdec. Thanks: Kevin Thorpe * latex.default: added new argument star: ctables uses this to spread over two columns when the LaTeX document is in \twocolumn mode. Thanks: David Whiting Changes in version 3.14-1 (2014-02-25) * Added latexNeedle function * Change latexTherm, latexNeedle to use user LaTeX macro \tooltipn to do the pop-up * latex.default: changed line breaks around \end{tabular} * latex.summaryM: put insert.bottom text in minipage so \tooltip will not devote wide space to it * sas.get: added defaultencoding argument and logic (Thanks: Reinhold Koch) * plot.summaryP: omit tick marks for proportion > 1.0 * format.df (used by latex): fixed na.blank logic for character var * latex: removed newlines when ending environments, added hyperref argument * latex: added center='centerline', fixed 'centering' * upData, cleanup.import, dataframeReduce: changed argument pr to print * rcspline.eval: added more evasive action in case of extreme ties Changes in version 3.14-0 (2014-01-22) * Added trans argument to varclus * Removed recode, existsFunction functions, under.unix object, survfitKM, functions used only by S-Plus: comment, mem, mulbar.chart, p.sunflowers * as.category, is.category, untangle.special: removed * Removed reference to .R. from many functions * Remove oldClass, oldUnclass, getFunction * latex.default: changed 'rotate' to 'sideways' for ctable mode. Thanks: Simon Zehnder <szehnder@uni-bonn.de> * gView: removed * ldBands: removed * summaryP: new function - graphical replacement for tables of proportions * ynbind: new function for combining related yes/no variables into a matrix with a label * added file argument to prn * summaryP: added autoarrange * added addMarginal and nobsY functions * pBlock: new function for blocking variables for summaryP * summaryP: changed text positioning to grid absolutes, added text.at argument * scat1d, histSpike: if grid used and y has grid units, fixed logic for frac * plsmo, panel.plsmo: added scat1d.opts argument * label.Surv, units.Surv: added, removed ::: in survival calls * summarize: added keepcolnames argument * Suppressed startup message unless options(Hverbose=TRUE) is set * summaryS: new function - multi-panel lattice xy and dot plots * summaryD: added ylab argument * dotchart3: quit letting left margin be less than pre-existing one * multLines: new function * Improved nobsY to respect subject IDs when counting number of subjects, and to return an attribute 'formula' without id variable; changed bpplotM, summaryP, summaryS to use this * Removed nobsY calculations from bpplotM, summaryP, summaryS, enhanced nobsY to allow stratification by treatment * panel.bpplot: added violin and violin.opts arguments * summaryS: added medvPanel support during-plot vertical violin plots * plot.summaryP: padded x-axis limits * latexTabular: added translate and hline arguments; moved to its own file and help page * latexTherm: added tooltip using LaTeX ocgtools package * summaryP: stopped reversing order of panels * summaryM: added table.env argument, changed how model.frame built * latex.summaryM: changed to print proportions by default, added round='auto' * character.table: added xpd=NA; thanks: Dale * summaryP: added latex method * latex.default: added insert.top argument * summaryM: added stratification (multiple tables) Changes in version 3.13-0 (2013-11-18) * Changed n location (nloc argument) in bpplotM * Improved dotchart3 to better compute string widths when there is a mixture of expressions and regular strings for auxdata/auxtitle * Changed rlegend to not take logs if log axes are in effect. Fixes Ecdf(..., log='x', label.curves=list(keys=1:3)). Thanks: Bayazid Sarker <sarkarbayazid@gmail.com> * Extended non-panel (regular) version of plsmo to handle matrix y * Likewise for summaryRc * Added xlim to bpplotM * Added latexTherm function to create LaTeX picture environments to add a series of thermometers to LaTeX text * Fixed deff to handle the case where R^2 = 1. Thanks: Matthieu Stigler <matthieu.stigler@gmail.com> * Added new test file for wtd.mean, wtd.quantile * New test aregImpute3.r for glm Poisson regression * Improved describe.vector to format single unique values * Took away warning about var, s.e., t, p in fit.mult.impute * Switched from subversion to github repository * Changed maintainer from Charles Dupont to Frank Harrell * Changed wtd.loess.noiter to use loess instead of stats:::simpleLoess ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/COPYING���������������������������������������������������������������������������������������0000644�0001762�0000144�00000001362�12243661443�012357� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Copyright (C) 2001 Frank E Harrell Jr ## ## This program is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by the ## Free Software Foundation; either version 2, or (at your option) any ## later version. ## ## These functions are distributed in the hope that they will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## The text of the GNU General Public License, version 2, is available ## as http://www.gnu.org/copyleft or by writing to the Free Software ## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ## ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/��������������������������������������������������������������������������������������������0000755�0001762�0000144�00000000000�14370730127�011522� 5����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/ecdf.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000032543�14112731327�012613� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ecdf <- function(x, ...) UseMethod('Ecdf') Ecdf.default <- function(x, what=c('F','1-F','f','1-f'), weights=rep(1, length(x)), normwt=FALSE, xlab, ylab, q, pl=TRUE, add=FALSE, lty=1, col=1, group=rep(1, length(x)), label.curves=TRUE, xlim, subtitles=TRUE, datadensity=c('none','rug','hist','density'), side=1, frac=switch(datadensity, none=NA,rug=.03,hist=.1,density=.1), dens.opts=NULL, lwd=1, log='', ...) { datadensity <- match.arg(datadensity) what <- match.arg(what) colspec <- FALSE if(datadensity != 'none') { if(side %in% c(2,4)) stop('side must be 1 or 3 when datadensity is specified') if('frac' %nin% names(dens.opts)) dens.opts$frac <- frac if('side' %nin% names(dens.opts)) dens.opts$side <- side if('col' %in% names(dens.opts)) colspec <- TRUE } if(missing(xlab)) xlab <- label(x, units=TRUE, plot=TRUE, default=deparse(substitute(x))) what <- match.arg(what) if(missing(ylab)) ylab <- switch(what, 'F'='Proportion <= x', '1-F'='Proportion > x', 'f'='Frequency <= x') group <- as.factor(group) group <- group[drop=TRUE] if(length(x) != length(group)) stop('length of x != length of group') nna <- !(is.na(x) | is.na(group) | is.na(weights)) X <- x[nna] group <- group[nna] lev <- levels(group) nlev <- length(lev) curves <- vector('list',nlev) names(curves) <- lev lty <- rep(lty, length.out=nlev) col <- rep(col, length.out=nlev) lwd <- rep(lwd, length.out=nlev) if(missing(xlim)) xlim <- range(X) n <- if(normwt) length(X) else sum(weights[nna]) m <- (if(normwt) length(nna) else sum(weights, na.rm=TRUE)) - n weights <- weights[nna] for(i in 1:nlev) { s <- group == lev[i] x <- X[s] wt <- weights[s] xorig <- x z <- wtd.Ecdf(x, wt, type='i/n', normwt=normwt, na.rm=FALSE) x <- z$x; y <- z$ecdf switch(what, '1-F' = {y <- 1 - y}, 'f' = {y <- y * sum(wt)}, '1-f' = {x <- x[-1] y <- as.vector((1 - y[- length(y)]) * sum(wt)) } ) if(pl) { if(i==1 && !add) plot(x, y, xlab=xlab, ylab=ylab, xlim=xlim, type='n', log=log, ...) lines(x, y, type="s", lty=lty[i], col=col[i], lwd=lwd[i]) if(subtitles && i == 1) { pm <- paste("n:", n, " m:", m, sep="") title(sub=pm, adj=0, cex=.5) } if(!missing(q)) { if(what == '1-f') stop('what="1-f" not yet implemented with q') if(what=='f') q <- q * y[length(y)] else if(what == '1-F') q <- 1 - q q <- switch(what, 'f' = q * sum(wt), '1-F' = 1 - q, 'F' = q) a <- par("usr") for(w in q) { quant <- if(what=='1-F') min(x[y <= w]) else min(x[y >= w]) lines(c(a[1], quant), c(w, w), lty=2, col=1) lines(c(quant, quant), c(w, a[3]), lty=2, col=col[i]) } } } curves[[i]] <- list(x=x, y=y) if(datadensity!='none') { if(!colspec) dens.opts$col <- col[i] do.call(switch(datadensity, rug = 'scat1d', hist = 'histSpike', density= 'histSpike'), c(list(x=xorig, add=TRUE), if(datadensity=='density') list(type='density'), dens.opts)) } } if(nlev > 1 && (is.list(label.curves) || label.curves)) labcurve(curves, type='s', lty=lty, col.=col, opts=label.curves) invisible(structure(if(nlev==1) list(x = x, y = y) else curves, N=list(n=n, m=m))) } Ecdf.data.frame <- function(x, group=rep(1, nrows), weights=rep(1,nrows), normwt=FALSE, label.curves=TRUE, n.unique=10, na.big=FALSE, subtitles=TRUE, vnames=c("labels","names"), ...) { vnames <- match.arg(vnames) mf <- par('mfrow') if(length(mf) == 0) mf <- c(1, 1) g <- function(v, n.unique) { if(is.character(v) || is.factor(v)) return(FALSE) length(unique(v[!is.na(v)])) >= n.unique } use <- sapply(x, g, n.unique=n.unique) automf <- FALSE if((la <- sum(use)) > 1 & max(mf) == 1) { mf <- if(la<=4) c(2,2) else if(la<=6) c(2,3) else if(la<=9) c(3,3) else if(la<=12)c(3,4) else if(la<=16)c(4,4) else c(4,5) automf <- TRUE } oldmf <- par('mfrow') par(mfrow=mf) on.exit(par(oldmf)) nam <- names(x) nrows <- nrow(x) i <- 0 j <- 0 group <- as.factor(group) for(j in (1 : length(x))[use]) { v <- x[[j]] i <- i + 1 lab <- if(vnames == 'names') nam[j] else label(v, units=TRUE, plot=TRUE, default=nam[j]) z <- Ecdf(v, group=group, weights=weights, normwt=normwt, xlab=lab, label.curves=label.curves, subtitles=subtitles, ...) if(na.big) { m <- attr(z, 'N')$m if(m > 0) mtext(paste(m,"NAs"), line=-2, cex=1) } if(automf && interactive() && names(dev.list()) %nin% c('postscript','win.printer') && (i %% prod(mf)==0)) { cat("click left mouse button to proceed\n") locator(1) } } invisible(ceiling(sum(use) / prod(mf))) } prepanel.Ecdf <- function(x, y, fun, what, ...) { xlim <- range(x, na.rm=TRUE) l <- length(x[! is.na(x)]) ylim <- switch(what, F = c(0, 1), '1-F' = c(0, 1), f = c(0, l), '1-f' = c(0, l)) ylim <- fun(ylim) if(any(is.infinite(ylim))) ylim <- fun(c(.001, .999)) list(xlim=xlim, ylim=ylim, dx=diff(xlim), dy=diff(ylim)) } panel.Ecdf <- function(x, y, subscripts, groups=NULL, q=NULL, type='s', method=c('i/n','(i-1)/(n-1)','i/(n+1)'), fun, what = c('F', '1-F', 'f', '1-f'), label.curves=TRUE, lwd = plot.line$lwd, lty = plot.line$lty, pch = plot.symbol$pch, cex = plot.symbol$cex, font= plot.symbol$font, col = NULL, ...) { method <- match.arg(method) what <- match.arg(what) if(length(groups)) groups <- as.factor(groups) type <- 's' # lattice histogram sets to 'percent' g <- unclass(groups)[subscripts] ng <- if(length(groups)) max(g, na.rm=TRUE) else 1 plot.symbol <- trellis.par.get( if(ng > 1) "superpose.symbol" else "plot.symbol") plot.line <- trellis.par.get( if(ng > 1) "superpose.line" else "plot.line") qrefs <- function(x, q, col, fun, llines, grid) { quant <- quantile(x, probs=q, na.rm=TRUE) a <- parGrid(grid)$usr for(i in 1 : length(q)) { llines(c(a[1], quant[i]), fun(c(q[i], q[i])), lty=2, col=1) llines(c(quant[i], quant[i]), fun(c(q[i], a[3])), lty=2, col=col) } } ppanel <- function(x, y, type, cex, pch, font, lwd, lty, col, q, qrefs, ecdf.type, fun=fun, what, datadensity=c('none','rug','hist','density'), side=1, frac=switch(datadensity, none=NA, rug=.03, hist=.1, density=.1), dens.opts=NULL, llines, ...) { ## y ignored z <- wtd.Ecdf(x, type=ecdf.type, na.rm=FALSE) zx <- z$x y <- z$ecdf switch(what, '1-F' = {y <- 1 - y}, 'f' = {y <- y * length(x)}, '1-f' = {zx <- zx[-1] y <- as.vector((1 - y[- length(y)]) * length(x)) } ) ## For some reason S-Plus will not plot anything the following way ## when lwd is a variable ##llines(z$x, fun(z$ecdf), lwd = lwd, lty = lty, col = col, ## type = type, ...) do.call('llines', list(zx, fun(y), lwd = lwd, lty = lty, col = col, type = type, ...)) if(length(q)) qrefs(x, q, col, fun=fun, llines=llines, grid=TRUE) datadensity <- match.arg(datadensity) if(datadensity != 'none') { if(side %in% c(2,4)) stop('side must be 1 or 3 when datadensity is specified') if('frac' %nin% names(dens.opts)) dens.opts$frac <- frac if('side' %nin% names(dens.opts)) dens.opts$side <- side if('col' %nin% names(dens.opts)) dens.opts$col <- col if('lwd' %nin% names(dens.opts)) dens.opts$lwd <- lwd do.call(switch(datadensity, rug ='scat1d', hist='histSpike', density='histSpike'), c(list(x=x, add=TRUE, grid=TRUE), if(datadensity == 'density') list(type='density'), dens.opts)) } } pspanel <- function(x, subscripts, groups, type, lwd, lty, pch, cex, font, col, q, qrefs, ecdf.type, fun, what, llines, ...) { ## y ignored lev <- levels(groups) groups <- as.numeric(groups)[subscripts] N <- seq(along = groups) curves <- list() for(i in 1:length(lev)) { which <- N[groups == i] ## sort in x j <- which # no sorting if(any(j)) { z <- wtd.Ecdf(x[j], type=ecdf.type, na.rm=FALSE) zx <- z$x y <- z$ecdf switch(what, '1-F' = {y <- 1 - y}, 'f' = {y <- y * length(x[j])}, '1-f' = {zx <- zx[-1] y <- as.vector((1 - y[- length(y)]) * length(x[j])) } ) do.call('llines', list(zx, fun(y), col = col[i], lwd = lwd[i], lty = lty[i], type = type, ...)) if(length(q)) qrefs(x[j], q, col[i], fun=fun, llines=llines, grid=TRUE) curves[[lev[i]]] <- list(x=zx, y=fun(y)) } } curves } lty <- rep(lty, length = ng) lwd <- rep(lwd, length = ng) pch <- rep(pch, length = ng) cex <- rep(cex, length = ng) font <- rep(font,length = ng) if(!length(col)) col <- plot.line$col col <- rep(col, length = ng) if(ng > 1) { levnum <- sort(unique(g)) curves <- pspanel(x, subscripts, groups, lwd=lwd, lty=lty, pch=pch, cex=cex, font=font, col=col, type=type, q=q, qrefs=qrefs, ecdf.type=method, fun=fun, what=what, llines=llines) if(!(is.logical(label.curves) && !label.curves)) { lc <- if(is.logical(label.curves)) list(lwd=lwd, cex=cex[1]) else c(list(lwd=lwd, cex=cex[1]), label.curves) labcurve(curves, lty=lty[levnum], lwd=lwd[levnum], col.=col[levnum], opts=lc, grid=TRUE, ...) } } else ppanel(x, lwd=lwd, lty=lty, pch=pch, cex=cex, font=font, col=col, type=type, q=q, qrefs=qrefs, ecdf.type=method, fun=fun, what=what, llines=llines, ...) if(ng > 1) { ##set up for key() if points plotted .Key <- function(x=0, y=1, lev, col, lty, lwd, ...) { oldpar <- par('usr', 'xpd') par(usr=c(0,1,0,1),xpd=NA) ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting on.exit(par(oldpar)) if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } if(!length(x)) x <- 0 if(!length(y)) y <- 1 ## because of formals() rlegend(x, y, legend=lev, lty=lty, lwd=lwd, col=col) invisible() } formals(.Key) <- list(x=NULL, y=NULL, lev=levels(groups), col=col, lty=lty, lwd=lwd,...=NULL) .setKey(.Key) } } Ecdf.formula <- function(x, data = sys.frame(sys.parent()), groups = NULL, prepanel=prepanel.Ecdf, panel=panel.Ecdf, ..., xlab, ylab, fun=function(x)x, what=c('F', '1-F', 'f', '1-f'), subset=TRUE) { what <- match.arg(what) vars <- all.vars(x) xname <- vars[1] if(missing(xlab)) xlab <- label(eval(parse(text=xname), data), units=TRUE, plot=TRUE, default=xname, grid=TRUE) if(missing(ylab)) ylab <- if(missing(fun)) paste(switch(what, F = 'Proportion <=', '1-F' = 'Proportion >=', 'f' = 'Number <=', '1-f' = 'Number >='), xname) else '' subset <- eval(substitute(subset), data) do.call("histogram", c(list(x, data=data, prepanel=prepanel, panel=panel, ylab=ylab, xlab=xlab, fun=fun, what=what), if(!missing(groups)) list(groups=eval(substitute(groups), data)), if(!missing(subset)) list(subset=subset), list(...))) } �������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/capitalize.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000000255�12752725127�014043� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������capitalize <- function(string) { capped <- grep("^[A-Z]", string, invert = TRUE) substr(string[capped], 1, 1) <- toupper(substr(string[capped], 1, 1)) return(string) }���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/nobsY.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000010542�13071564047�013005� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nobsY <- function(formula, group=NULL, data=NULL, subset=NULL, na.action=na.retain, matrixna=c('all', 'any')) { matrixna <- match.arg(matrixna) forig <- formula formula <- Formula(formula) environment(formula) <- new.env(parent = environment(formula)) en <- environment(formula) assign(envir = en, 'id', function(x) x) assign(envir = en, 'pending', function(x) x) assign(envir = en, 'randomized', function(x) x) assign(envir = en, 'cond', function(x, label, condition) rep(1, length(condition))) marg <- length(data) && '.marginal.' %in% names(data) if(marg) formula <- update(formula, .~. + .marginal.) mf <- if(length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) Y <- model.part(formula, data=mf, lhs=1) X <- model.part(formula, data=mf, rhs=1) ## Get id variable if present so can count unique subjects rhs <- terms(formula, rhs=1, specials='id') sr <- attr(rhs, 'specials') ## specials counts from lhs variables wid <- sr$id if(length(wid)) { xid <- X[[wid - ncol(Y)]] if(length(wid) > 1) { xid$sep <- '.' xid <- do.call('paste', xid) } ## Remove id() from formula forig <- as.character(forig) if(ncol(X) == 1) ## id() is the only right-hand term forig <- as.formula(paste(forig[2], ' ~ 1')) else { forig[3] <- sub(' \\+ id(.*)', '', forig[3]) forig <- as.formula(paste(forig[2], forig[3], sep=' ~ ')) } } else xid <- 1 : nrow(Y) idv <- xid group <- if(length(group) && group %in% names(X)) X[[group]] if(marg) { xm <- X$.marginal. if(length(group)) group <- group[xm == ''] Y <- Y [xm == '',, drop=FALSE] xid <- xid[xm == ''] } nY <- ncol(Y) nobs <- rep(NA, nY) ylab <- sapply(Y, label) ylab <- ifelse(ylab == '', names(Y), ylab) names(nobs) <- ylab nobsg <- if(length(group)) { glev <- if(is.factor(group)) levels(group) else sort(unique(group[! is.na(group)])) matrix(NA, ncol=nY, nrow=length(glev), dimnames=list(glev, ylab)) } if(nY > 0) for(i in 1 : nY) { y <- Y[[i]] ## is.na.Surv reduces to vector but need to keep as matrix notna <- if(is.matrix(y)) { numna <- rowSums(is.na(unclass(y))) if(matrixna == 'any') numna == 0 else numna < ncol(y) } else ! is.na(y) nobs[i] <- length(unique(xid[notna])) if(length(group)) nobsg[, i] <- tapply(xid[notna], group[notna], function(x) length(unique(x))) } structure(list(nobs=nobs, nobsg=nobsg, id=idv, formula=forig)) } addMarginal <- function(data, ..., label='All', margloc=c('last', 'first'), nested) { nested <- as.character(substitute(nested)) if(length(nested) && nested == '') nested <- NULL vars <- as.character(sys.call())[- (1 : 2)] vars <- intersect(vars, names(data)) data$.marginal. <- '' margloc <- match.arg(margloc) if(length(nested) && (nested %nin% names(data))) stop(paste('Variable', nested, 'is not in data')) labs <- sapply(data, function(x) { la <- attr(x, 'label') if(! length(la)) la <- '' la } ) un <- sapply(data, function(x) { u <- attr(x, 'units') if(! length(u)) u <- '' u } ) levs <- vector('list', length(vars)) names(levs) <- vars for(v in setdiff(vars, nested)) { d <- data d$.marginal. <- ifelse(d$.marginal. == '', v, paste(d$.marginal., v, sep=',')) levs[[v]] <- levels(as.factor(d[[v]])) levs[[v]] <- if(margloc == 'last') c(levs[[v]], label) else c(label, levs[[v]]) d[[v]] <- label if(length(nested)) { levs[[nested]] <- levels(as.factor(d[[nested]])) levs[[nested]] <- if(margloc == 'last') c(levs[[nested]], label) else c(label, levs[[nested]]) d[[nested]] <- label } data <- if(margloc == 'last') rbind(data, d) else rbind(d, data) } for(v in vars) data[[v]] <- factor(data[[v]], levs[[v]]) ## Restore any Hmisc attributes if(any(labs != '') || any(un != '')) for(i in 1 : length(data)) { if(labs[i] != '') { attr(data[[i]], 'label') <- labs[i] class(data[[i]]) <- c('labelled', class(data[[i]])) } if(un[i] != '') attr(data[[i]], 'units') <- un[i] } data } ��������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/sas.get.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000105105�14112734310�013244� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## $Id$ sas.get <- function(libraryName, member, variables = character(0), ifs = character(0), format.library = libraryName, id, dates. = c("sas","yymmdd","yearfrac","yearfrac2"), keep.log = TRUE, log.file = "_temp_.log", macro = sas.get.macro, data.frame.out = existsFunction("data.frame"), clean.up = FALSE, quiet = FALSE, temp = tempfile("SaS"), formats=TRUE, recode=formats, special.miss=FALSE, sasprog="sas", as.is=.5, check.unique.id=TRUE, force.single=FALSE, pos, uncompress=FALSE, defaultencoding="latin1", var.case="lower") { if(force.single) stop('force.single does not work under R') dates. <- match.arg(dates.) fexists <- function(name) { w <- file.exists(name) attr(w, 'which') <- name[w] w } file.is.dir <- function(name) { isdir <- file.info(name)$isdir isdir && !is.na(isdir) } file.is.readable <- function(name) file.access(name,4)==0 fileShow <- function(x) file.show(x) if(recode) formats <- TRUE if(missing(formats) || formats) { ## ***** Next line begins mod from Mike Kattan edits 11 Sep 97 ## Redone FEH 22Oct00 no.format <- all(!fexists(file.path(format.library, c('formats.sc2','formats.sct','formats.sct01','formats.sas7bcat')))) if(no.format) { if((!missing(formats) && formats) || (!missing(recode) && recode)) warning(paste(paste(format.library, "/formats.sc? or formats.sas7bcat",sep = ""), " not found. Formatting ignored. \n")) formats <- recode <- FALSE } ## ***** End Mike Kattan edits 11 Sep 97 } ## 5 Changes here from Claudie Berger <claudie@osteo1.ri.mgh.mcgill.ca> 19feb00 ## Allows work on sas v7. sasin <- paste(temp, ".3.sas", sep = "") sasout1 <- paste(temp, ".1.sas", sep = "") sasout2 <- paste(temp, ".2.sas", sep = "") sasout3 <- paste(temp, ".4.sas", sep = "") sasout4 <- paste(temp, ".5.sas", sep = "") nvariables <- length(variables) if(nvariables>0) { if(any(jdup <- duplicated(variables))) stop(paste("duplicate variables requested: ", variables[jdup])) } varstring <- paste(variables, collapse = "\n ") ifs <- paste("'",paste(ifs, collapse = ";\n "),"'",sep="") if(length(sasin) != 1) stop("Illegal temporary file name") temp.files <- c(sasin, sasout1, sasout2, sasout3, sasout4) if(!keep.log) temp.files <- c(temp.files, log.file) if(clean.up) on.exit(unlink(temp.files)) ##on.exit(sys(paste("rm -f", paste(temp.files, collapse = " ")))) ## 4oct03 if(missing(member)) stop("SAS member name is required") if(missing(libraryName)) stop("SAS library name is required") ## Encoding added by Reinhold Koch 24Jan14 <reinhold.koch@roche.com> cat("%LET DEFAULTE=", defaultencoding, ";\n", sep="", file=sasin) cat(macro, sep="\n", file=sasin, append=TRUE) sasds.suffix <- c('sd2','sd7','ssd01','ssd02','ssd03','ssd04','sas7bdat') ## 22Oct00 if(libraryName == "") libraryName <- "." if(!file.is.dir(libraryName)) stop(paste(sep = "", "library, \"", libraryName, "\", is not a directory")) unix.file <- file.path(libraryName, paste(member, sasds.suffix, sep=".")) if(uncompress) { if(any(fe <- fexists(paste(unix.file,".gz",sep="")))) system(paste("gunzip ", attr(fe,'which'),'.gz',sep='')) else if(any(fe <- fexists(paste(unix.file,".Z",sep="")))) system(paste("uncompress ",attr(fe,'which'),'.Z',sep='')) } if(!any(fe <- fexists(unix.file))) { stop(paste(sep = "", "Unix file, \"", paste(unix.file,collapse=' '), "\", does not exist")) } else { file.name <- attr(fe,'which') if(!file.is.readable(file.name)) { stop(paste(sep = "", "You do not have read permission for Unix file, \"", file.name, "\"")) # 22Oct00 } } cat("libname temp '", libraryName, "';\n", file = sasin, append = TRUE, sep = "") ## format.library should contain formats.sct containing user defined ## formats used by this dataset. It must be present. cat("libname library '", format.library, "';\n", file = sasin, append = TRUE, sep = "") cat("%sas_get(temp.", member, ",\n", " ", sasout1, ",\n", " ", sasout2, ",\n", " ", sasout3, ",\n", " ", sasout4, ",\n", " dates=", dates., ",\n", " vars=", varstring, ",\n", " ifs=", ifs, ",\n", " formats=", as.integer(formats), "\n,", " specmiss=", as.integer(special.miss), ");\n", file = sasin, append = TRUE, sep = "") status <- system(paste(shQuote(sasprog), shQuote(sasin), "-log", shQuote(log.file)), intern=FALSE) ## 24nov03 added output=F if(status != 0) { if(!quiet && fexists(log.file)) fileShow(log.file) ## 4oct03 stop(paste("SAS job failed with status", status)) } ## ## Read in the variable information ## if(!(fexists(sasout1) && fexists(sasout2))) { if(!quiet) fileShow(log.file) ## 4oct03 stop("SAS output files not found") } vars <- scan(sasout1, list(name = "", type = 0, length = 0, format = "", label = "", n = 0), multi.line = FALSE, sep = "\022", flush=TRUE, comment.char='', quote='') ## Thanks Don MacQueen for scan fix for R nvar <- length(vars$name) if(nvar == 0) { if(!quiet) fileShow(log.file) ## 4oct03 stop("First SAS output is empty") } nrow <- vars$n[1] #n is the same for each variable ## Read the data in ## We try to be clever about the variable type. If SAS is character ## use char of course. If is numeric and length >4, use double. If ## numeric and length <4, use single. We could also use the format to ## choose further, if it consists of a number followed by a "." ## can we safely assume integer. ## type <- ifelse(vars$type == 2, "character(nrow)", ifelse(force.single, ##28Mar01 "single(nrow)", "double(nrow)")) ##BILL: I corrected the macro so the following isn't needed: ## get rid of trailing blank on names ## vars$name <- unix("sed 's/ $//'", vars$name) inlist <- paste("\"", vars$name, "\"=", type, sep = "", collapse = ", ") inlist <- parse(text = paste("list(", inlist, ")")) ## Inlist would now be the size of the final data structure, if I had ## evaluated it. ## Read the data ds <- scan(sasout2, eval(inlist), sep = "\022", multi.line = FALSE, flush=TRUE, comment.char='', quote='') if(length(ds) < nvariables) { m <- variables[is.na(match(variables, names(ds)))] if(length(m) > 0) { warning(paste(length(m), "requested variables did not exist:", paste("\"", m, "\"", sep = "", collapse = " "), "\n\t(use sas.contents())")) } } format <- vars$format format[format=='$'] <- ' ' # 1Mar00 label <- vars$label name <- vars$name esasout3 <- formats && fexists(sasout3) #added formats && 1/20/93 if(recode && !esasout3) recode <- FALSE FORMATS <- NULL if(formats && esasout3) { FORMATS <- dget(sasout3) if(length(FORMATS)==0) { FORMATS <- NULL; recode <- FALSE } } smiss <- NULL if(special.miss && fexists(sasout4)) smiss <- scan(sasout4, list(name="", code="", obs=integer(1)), multi.line=FALSE, flush=TRUE, sep="\022", comment.char='', quote='') sasdateform <- c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy", "julian","qtr","weekdate","weekdatx","weekday","month") dateform <- list(as.name("ddmmmyy"),"m/d/y","y/m/d","d/m/y",as.name("ddmmmyy"), "mon year",as.name("ddmmmyy"),"mon",as.name("ddmmmyy"), as.name("ddmmmyy"), as.name("ddmmmyy"),"m") sastimeform <- c("hhmm","hour","mmss","time") timeform <- c("h:m","h","m:s","h:m:s") sasdatetimeform <- c("datetime","tod") datetimeform <- list(list(as.name("ddmmmyy"),"h:m:s"), c("m/d/y"," ")) z <- "%02d%b%Y" dateform4 <- c(z,"%02m/%02d/%Y","%Y/%02m/%02d","%02d/%02m/%Y", z,"%02m %Y", z,"%02m", z, z, z,"%02m") timeform4 <- c("%02H:%02M","%02H","%02M:%02S","%02H:%02M:%02S") datetimeform4 <- c("%02d%b%Y %02h:%02m:%02s","%02m/%02d/%Y") ## Don MacQueen days.to.adj <- as.numeric(difftime(ISOdate(1970,1,1,0,0,0) , ISOdate(1960,1,1,0,0,0), 'days')) secs.to.adj <- days.to.adj*24*60*60 for(i in 1:nvar) { atr <- list() dsi <- ds[[i]] fname <- format[i] rec <- FALSE if(fname!=" ") { ff <- fname if(dates.=="sas" & (m <- match(fname,sasdateform,0)) >0) { ##look for partial dates dd <- dsi-floor(dsi) ddn <- !is.na(dd) if(any(ddn) && any(dd[ddn]!=0)) { ll <- 1:length(dd) atr$partial.date <- list(month=ll[dd==.5],day=ll[dd==.25],both=ll[dd==.75]) atr$imputed <- ll[dd!=0] dsi <- floor(dsi) } dsi <- importConvertDateTime(dsi, 'date', 'sas', form=dateform[m]) if(length(atr$imputed)) attr(dsi,'class') <- c("impute",attr(dsi,'class')) ff <- NULL } else { if((m <- match(fname,sastimeform,0)) >0) { dsi <- importConvertDateTime(dsi, 'time', 'sas', form=timeform[m]) ff <- NULL } else if((m <- match(fname,sasdatetimeform,0))>0) { dsi <- importConvertDateTime(dsi, 'datetime', 'sas', form=datetimeform[m]) ff <- NULL } } atr$format <- ff if(recode & length(g <- FORMATS[[fname]])) { labs <- g$labels if(!is.logical(recode)) { labs <- if(recode==1) paste(g$values,":",labs,sep="") else paste(labs,"(",g$values,")",sep="") } dsi <- factor(dsi, g$values, labs) atr$sas.codes <- g$values rec <- TRUE } } if(data.frame.out && !rec && vars$type[i]==2 && ((is.logical(as.is) && !as.is) || (is.numeric(as.is) && length(unique(dsi)) < as.is*length(dsi)))) dsi <- factor(dsi, exclude="") #exclude added 5Mar93 ## For data frames, char. var usually factors if(label[i]!=" ") label(dsi) <- label[i] #atr$label <- label[i] if(length(smiss$name)) { j <- smiss$name==name[i] if(any(j)) { atr$special.miss <- list(codes=smiss$code[j],obs=smiss$obs[j]) attr(dsi,'class') <- c("special.miss",attr(dsi,'class')) } } if(!is.null(atr)) attributes(dsi) <- c(attributes(dsi),atr) if(missing(pos)) ds[[i]] <- dsi else assign(name[i], dsi, pos=pos) } if(!missing(pos)) return(structure(pos, class="pos")) atr <- list() if(missing(id)) { if(data.frame.out) atr$row.names <- as.character(1:nrow) } else { idname <- id jj <- match(idname, names(ds), 0) if(any(jj==0)) stop(paste("id variable(s) not in dataset:", paste(idname[jj==0],collapse=" "))) if(length(idname)==1) { id <- ds[[idname]] #Need since not use data.frame } else { id <- as.character(ds[[idname[1]]]) for(jj in 2:length(idname)) id <- paste(id, as.character(ds[[idname[jj]]])) } if(check.unique.id) { dup <- duplicated(id) if(any(dup)) warning(paste("duplicate IDs:", paste(id[dup], collapse=" "))) } if(data.frame.out) atr$row.names <- as.character(id) else atr$id <- id } if(var.case=="lower"){ names(ds)=tolower(names(ds)) } if(var.case=="upper"){ names(ds)=toupper(names(ds)) } if(!is.null(FORMATS)) atr$formats <- FORMATS if(data.frame.out) atr$class <- "data.frame" attributes(ds) <- c(attributes(ds),atr) ds } importConvertDateTime <- function(x, type=c('date','time','datetime'), input=c('sas','spss','dataload'), form) { type <- match.arg(type) input <- match.arg(input) if(input != 'sas' && type != 'date') stop('only date variables are support for spss, dataload') adjdays <- c(sas=3653, spss=141428, dataload=135080)[input] ## 1970-1-1 minus 1960-1-1, 1582-10-14, or 1600-3-1 origin <- c(sas='1960-01-01', spss='1582-10-14', dataload='1600-03-01')[input] if(input=='spss') x <- x/86400 switch(type, date = structure(x - adjdays, class='Date'), time = { ## Don MacQueen 3Apr02 z <- structure(x, class=c('POSIXt','POSIXct')) f <- format(z, tz='GMT') z <- as.POSIXct(format(z, tz='GMT'), tz='') structure(z, class=c('timePOSIXt','POSIXt','POSIXct'))}, datetime = as.POSIXct(x, origin=origin, tz='GMT')) # chron((x - adjdays*86400)/86400, # out.format=c(dates='day mon year', times='h:m:s'))}) } ## Don MacQueen 3Apr02 ## slightly modified copy of format.POSIXct() from R base format.timePOSIXt <- function (x, format = "%H:%M:%S", tz = "", usetz = FALSE, ...) { if (!inherits(x, c("timePOSIXt","POSIXct"))) stop("wrong class") class(x) <- class(x)[-1] structure(format.POSIXlt(as.POSIXlt(x, tz), format, usetz, ...), names = names(x)) } print.timePOSIXt <- function(x, ...) print(format(x, ...)) ##if(!.R.) { ## Output format routine needed by chron for usual SAS date format ddmmmyy <- function(x) { if (!requireNamespace("chron", quietly = TRUE)) stop("This function requires the 'chron' package.") y <- chron::month.day.year(trunc(unclass(x)), attr(x,"origin")) yr <- y$year m <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct", "Nov","Dec")[y$month] ifelse(yr<1900 | yr>=2000, paste(y$day,m,yr,sep=""), paste(y$day,m,yr-1900,sep="")) } ## Functions to handle special.miss class is.special.miss <- function(x, code) { sm <- attr(x, "special.miss") if(!length(sm)) return(rep(FALSE, length(x))) if(missing(code)) { z <- rep(FALSE, length(x)) z[sm$obs] <- TRUE } else { z <- rep(FALSE, length(x)) z[sm$obs[sm$codes==code]] <- TRUE } z } "[.special.miss" <- function(x, ..., drop=FALSE) { ats <- attributes(x) ats$dimnames <- NULL ats$dim <- NULL ats$names <- NULL attr(x,'class') <- NULL y <- x[..., drop = drop] if(length(y) == 0) return(y) k <- seq(along=x) names(k) <- names(x) k <- k[...] attributes(y) <- c(attributes(y), ats) smiss <- attr(y, "special.miss") codes <- rep("ZZ",length(x)) codes[smiss$obs] <- smiss$codes codes <- codes[...] which <- codes!="ZZ" if(sum(which)) attr(y,"special.miss") <- list(obs=seq(along=k)[codes!="ZZ"],codes=codes[codes!="ZZ"]) else { attr(y,"special.miss") <- NULL attr(y,'class') <- attr(y,'class')[attr(y,'class') != "special.miss"] if(length(attr(y,'class'))==0) attr(y,'class') <- NULL } y } format.special.miss <- function(x, ...) { w <- if(is.factor(x)) as.character(x) else { cl <- attr(x,'class'); cl <- cl[cl!="special.miss"] if(length(cl)) { attr(x,'class') <- cl; format(x, ...) } else format.default(x, ...) } sm <- attr(x, "special.miss") names(w) <- names(x) if(!length(sm)) return(w) w[sm$obs] <- sm$codes attr(w,"label") <- attr(w,"special.miss") <- attr(w,"class") <- NULL w } print.special.miss <- function(x, ...) { sm <- attr(x, "special.miss") if(!length(sm)) { print.default(x) return(invisible()) } w <- format.special.miss(x) print.default(w, quote=FALSE) invisible() } sas.codes <- function(object) attr(object, "sas.codes") code.levels <- function(object) { if(length(cod <- attr(object,"sas.codes"))) levels(object) <- paste(cod,":",levels(object),sep="") object } as.data.frame.special.miss <- function(x, row.names = NULL, optional = FALSE, ...) { nrows <- length(x) if(is.null(row.names)) { ## the next line is not needed for the 1993 version of data.class and is ## included for compatibility with 1992 version if(length(row.names <- names(x)) == nrows && !anyDuplicated(row.names)) { } else if(optional) row.names <- character(nrows) else row.names <- as.character(1:nrows) } value <- list(x) if(!optional) names(value) <- deparse(substitute(x))[[1]] structure(value, row.names=row.names, class='data.frame') } ## val{nval}=compress(value)||"" was =value 23mar04 sas.get.macro <- c("/* Macro sas_get (modified by F. Harrell 30Jan90, Bill Dunlap Dec90, FH Mar92,", "\t\t\tFH Apr95 (extend LENGTH smiss))", " Sets up for conversion of SAS dataset to S dataset.", " Arguments:", "\tdataset - name of SAS dataset", "\ttemp1\t- Name of temporary dataset to contain data dictionar (unquoted)", "\t\t default=/tmp/file.1", "\ttemp2\t- Name of temporary dataset to contain ASCII version of SAS", "\t\t dataset (unquoted)", "\t\t default=/tmp/file.2", "\ttemp3 - Name of temporary dataset to contain ASCII file with S", "\t\t program to store format values and labels", "\ttemp4 - Name of temporary dataset to contain ASCII file with", "\t\t locations of special missing values", "\tdates\t- SAS to store date variables in SAS format ( # days from 1/1/60)", "\t\t (default)", "\t\t- YEARFRAC to store as days from 1/1/1900, divided by 365.25", "\t\t- YEARFRAC2 to store as year + fraction of current year", "\t\t- YYMMDD to store as numeric YYMMDD", "\tvars - list of variable in dataset that you want returned to S", " (unquoted, separate variable names with spaces) If empty,", " then return all variables.", " ifs - sequence of SAS subsetting if statements, (unquoted,", " separated by semicolons).", "\tformats - 0 (default) - do not create file on temp3 containing S", "\t\t statements to store format values and labels, 1 do create", "\tspecmiss- 0 (default). Set to 1 to write a data file on temp4 with", "\t\t the fields: variable name, special missing value code,", "\t\t observation number", "\tdefencod - default encoding of dataset if it does not specify", " */", "%macro sas_get(dataset, temp1, temp2, temp3, temp4, dates=SAS, vars=, ifs=, ", "\tformats=0, specmiss=0, defencod=&DEFAULTE);", "OPTIONS NOFMTERR;", "%LET DSID=%SYSFUNC(open(&dataset,i));", "%LET ENCODE=%SCAN(%SYSFUNC(ATTRC(&DSID,ENCODING)),1);", "%IF &ENCODE=Default %THEN %LET dataset=&dataset(encoding=&defencod);", "%IF %QUOTE(&temp1)= %THEN %LET temp1=/tmp/file.1;", "%IF %QUOTE(&temp2)= %THEN %LET temp2=/tmp/file.2;", "%IF %QUOTE(&temp3)= %THEN %LET temp3=/tmp/file.3;", "%IF %QUOTE(&temp4)= %THEN %LET temp4=/tmp/file.4;", ## Next line had %QUOTE(&ifs),1,\"'\" 31oct02 "%LET dates=%UPCASE(&dates);", "%LET ifs=%SCAN(%QUOTE(&ifs),1,'');", "%LET _s_=_sav_;", "/* BILL: Can these 2 subsets be combined into one pass of the data? -Frank*/", "/* Subset by observation first */", "%IF %QUOTE(&ifs)^= %THEN %DO;", " data _osub_ ;", " set &dataset ;", " &ifs ;", " %LET dataset=_osub_ ;", " %END;", "/* Then subset by variable */", "%IF &vars^= %THEN %DO;", " data _vsub_ ;", " set &dataset ;", " keep &vars ;", " %LET dataset=_vsub_ ;", " %END;", "proc contents data=&dataset out=&_s_(KEEP=name type length label format nobs ", " varnum) noprint; ", "%IF &formats=1 %THEN %DO;", " PROC FORMAT LIBRARY=LIBRARY CNTLOUT=f(KEEP=fmtname type start end label);", " DATA f; SET f; RETAIN n 0; n+1; IF type=\"C\" THEN fmtname=\"$\"||fmtname;", " PROC SORT DATA=f OUT=f(DROP=n); BY fmtname n; ", " *Sort by n instead of start for numerics so 13 sorts after 2;", " *Dont consider formats containing ANY range of values;", " *Dont consider formats that dont have at least one non-missing (if", " numeric) starting value. This gets rid of formats that are used", " only to label special missing values;", " DATA f2; SET f; BY fmtname; RETAIN anyrange 0 anynmiss 0;", " IF FIRST.fmtname THEN DO;anyrange=0;anynmiss=0;END;", " IF start^=end THEN anyrange=1;", " IF TYPE=\"C\" THEN anynmiss=1; ", " ELSE IF (start+0)>. THEN anynmiss=1;", " IF LAST.fmtname & anynmiss & ^anyrange THEN OUTPUT; KEEP fmtname;", " DATA f; MERGE f f2(IN=in2); BY fmtname; IF in2;", " IF TYPE=\"N\" THEN DO; IF (start+0)>.; *S cannot handle special missings;", " END;", " RENAME fmtname=format start=value; DROP end;", " PROC SORT DATA=&_s_(KEEP=format) OUT=sform; BY format;", " DATA sform; SET sform; BY format; IF LAST.format;", " DATA f; MERGE sform(IN=in1) f(IN=in2); BY format; ", " IF in1 & in2;", " *This keeps formats ever used by any variable;", " DATA _NULL_; SET f END=_eof_; BY format;", " ARRAY val{*} $ 16 val1-val500; ARRAY lab{*} $ 40 lab1-lab500; ", " RETAIN done 0 nform 0 nval 0 val1-val500 \" \" lab1-lab500 \" \" bk -1; ", " FILE \"&temp3\" LRECL=4096;", " IF FIRST.format THEN DO;", " IF ^done THEN PUT 'list(' @@; done=1;", " nform=nform+1; nval=0;", " format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",", " \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");", " IF nform=1 THEN PUT '\"' format +bk '\"=list(' @@;", " ELSE PUT ', \"' format +bk '\"=list(' @@;", " END;", " nval=nval+1; ", " IF nval>500 THEN DO; ERROR \">500 format values not allowed\";ABORT ABEND;", " END;", ' val{nval}=compress(value)||""; lab{nval}=label; ', " IF LAST.format THEN DO;", " PUT \"values=c(\" @@; ", " DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;", " IF type=\"N\" THEN PUT val{i} +bk @@;", " ELSE PUT '\"' val{i} +bk '\"' @@;", " END;", " PUT \"),labels=c(\" @@;", " DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;", " PUT '\"' lab{i} +bk '\"' @@;", " END;", " PUT \"))\";", " END;", " IF _eof_ THEN PUT \")\";", " %END;", "PROC SORT DATA=&_s_;BY varnum;", "data _null_;", " set &_s_ end=eof;", " FILE \"&temp1\"; RETAIN _bk_ -1;", " if _n_ = 1 then do;", "%IF &specmiss=0 %THEN %LET ofile=_NULL_; ", "%ELSE %LET ofile=smiss(KEEP=vname val obs);", " put \"data &ofile; set &dataset end=eof;\";", " put ' file \"&temp2\" RECFM=D LRECL=4096;';", " put \" retain __delim 18 _bk_ -1 obs 0; LENGTH _xx_ $ 20 obs 5;obs+1; \";", "%IF &specmiss=1 %THEN %DO;", " put \"LENGTH vname $ 8 val $ 1;\"; %END;", " end;", " IF type=2 THEN DO;", " PUT 'FORMAT ' name ';' @;", " PUT 'IF ' name '=\" \" THEN PUT __delim IB1. @;';", "/* $char added F.H. 24Mar92, dropped +_bk_ before __delim */", "/* $CHAR. removed FEH 2Aug92, added null FORMAT above, added back +_bk_ */", " PUT 'ELSE PUT ' name '+_bk_ __delim IB1. @;';", " END;", " ELSE DO; ", " PUT 'IF ' name '<=.Z THEN _xx_=\"NA\";' @;", " PUT 'ELSE _xx_=LEFT(PUT(' @;", " format=UPCASE(format);", " IF format=\"DATE\"|format=\"MMDDYY\"|format=\"YYMMDD\"|", "format=\"DDMMYY\"|format=\"YYQ\"|format=\"MONYY\"|format=\"JULIAN\" THEN DO;", " %IF &dates=SAS %THEN", " PUT name \",BEST18.)\";", " %ELSE %IF &dates=YYMMDD %THEN", " PUT name \",YYMMDD6.)\";", " %ELSE %IF &dates=YEARFRAC %THEN", " PUT \"(\" name \"-MDY(1,1,1900))/365.25,7.3)\";", " %ELSE %IF &dates=YEARFRAC2 %THEN %DO;", " PUT \"YEAR(\" name \")-1900+(\" name \"-MDY(1,1,YEAR(\" name \")))/\" @;", " PUT \"(MDY(12,31,YEAR(\" name \"))-MDY(1,1,YEAR(\" name \"))+1),7.3)\";", " %END;", " ;", " END;\t", " ELSE DO;PUT name \",BEST18.)\" @;END;", " PUT '); PUT _xx_ +_bk_ __delim IB1. @;'; *Added +_bk_ 2Aug92;", "%IF &specmiss=1 %THEN %DO;", " put 'IF .A<=' name '<=.Z THEN DO;", " vname=\"' name +_bk_ '\"; val=put(' name ',1.); OUTPUT; END;';", " %END;", " END;", "if eof then PUT 'PUT; RUN;';", "run;", "%include \"&temp1\";", "data _null_; set &_s_;", " retain __delim 18 _bk_ -1; ", " file \"&temp1\" LRECL=4096;", " format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",", " \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");", " put name +_bk_ __delim IB1. type +_bk_ __delim IB1. length +_bk_ __delim IB1.", " format +_bk_ __delim IB1. label +_bk_ __delim IB1. nobs +_bk_ __delim IB1.;", "run;", "%IF &specmiss=1 %THEN %DO;", " PROC SORT DATA=smiss OUT=smiss;BY vname val obs;", " DATA _NULL_; SET smiss;FILE \"&temp4\" RECFM=D LRECL=30;", " RETAIN _bk_ -1 __delim 18;", " vname=TRANSLATE(vname,\".abcdefghijklmnopqrstuvwxyz\",", "\t\t \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");", " PUT vname +_bk_ __delim IB1. val +_bk_ __delim IB1. obs +_bk_ __delim IB1.;", " RUN;", " %END;", "%mend sas_get;") sasxport.get <- function(file, lowernames=TRUE,force.single=TRUE, method=c('read.xport','dataload','csv'), formats=NULL, allow=NULL, out=NULL, keep=NULL, drop=NULL, as.is=0.5, FUN=NULL) { method <- match.arg(method) if(length(out) && method!='csv') stop('out only applies to method="csv"') rootsoftware <- if(method=='dataload')'dataload' else 'sas' sasdateform <- toupper(c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy", "julian","qtr","weekdate","weekdatx","weekday","month")) sastimeform <- toupper(c("hhmm","hour","mmss","time")) sasdatetimeform <- toupper(c("datetime","tod")) ## Note: unlike read.spss, read.dta, SAS xport reading functions do not ## support URLs. And thanks to Kurt Hornik for https if(grepl("^https?://", tolower(file))) { tf <- tempfile() download.file(file, tf, mode='wb', quiet=TRUE) file <- tf } dsinfo <- if(method == 'csv') lookupSASContents(file) else lookup.xport(file) whichds <- if(length(keep)) keep else setdiff(names(dsinfo), c(drop,'_CONTENTS_','_contents_')) ds <- switch(method, read.xport= read.xport(file), dataload = read.xportDataload(file, whichds), csv = if(!length(out)) readSAScsv(file, dsinfo, whichds)) if(method=='read.xport' && (length(keep) | length(drop))) ds <- ds[whichds] ## PROC FORMAT CNTLOUT= dataset present? fds <- NULL if(!length(formats)) { fds <- sapply(dsinfo, function(x) all(c('FMTNAME','START','END','MIN','MAX','FUZZ') %in% x$name)) fds <- names(fds)[fds] if(length(fds) > 1) { warning('transport file contains more than one PROC FORMAT CNTLOUT= dataset; using only the first') fds <- fds[1] } } finfo <- NULL if(length(formats) || length(fds)) { finfo <- if(length(formats)) formats else if(length(out)) readSAScsv(file, dsinfo, fds) else ds[[fds]] ## Remove leading $ from char format names ## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME)) fmtname <- as.character(finfo$FMTNAME) finfo <- split(finfo[c('START','END','LABEL')], fmtname) finfo <- lapply(finfo, function(f) { rb <- function(a) { # remove leading + trailing blanks a <- sub('[[:space:]]+$', '', as.character(a)) sub('^[[:space:]]+', '', a) } st <- rb(f$START) en <- rb(f$END) lab <- rb(f$LABEL) ##j <- is.na(st) | is.na(en) ## st %in% c('','.','NA') | en %in% c('','.','NA') j <- is.na(st) | is.na(en) | st == '' | en == '' if(any(j)) { warning('NA in code in FORMAT definition; removed') st <- st[!j]; en <- en[!j]; lab <- lab[!j] } if(!all(st==en)) return(NULL) list(value = all.is.numeric(st, 'vector'), label = lab) }) } ## Number of non-format datasets nods <- length(whichds) nds <- nods - (length(formats) == 0 && length(finfo) > 0) which.regular <- setdiff(whichds, fds) dsn <- tolower(which.regular) if((nds > 1) && !length(out)) { res <- vector('list', nds) names(res) <- gsub('_','.',dsn) } if(length(FUN)) { funout <- vector('list', length(dsn)) names(funout) <- gsub('_','.',dsn) } possiblyConvertChar <- if(method=='read.xport') (is.logical(as.is) && as.is) || (is.numeric(as.is) && as.is < 1) else (is.logical(as.is) && !as.is) || (is.numeric(as.is) && as.is > 0) ## reverse logic because read.xport always converts characters to factors j <- 0 for(k in which.regular) { j <- j + 1 cat('Processing SAS dataset', k, '\t ') w <- if(length(out)) readSAScsv(file, dsinfo, k) else if(nods==1) ds else ds[[k]] cat('.') if(!length(w)) { cat('Empty dataset', k, 'ignored\n') next } chcase <- if(lowernames) tolower else function(x) x nam <- chcase(makeNames(names(w), allow=allow)) names(w) <- nam dinfo <- dsinfo[[k]] fmt <- sub('^\\$','',dinfo$format) lab <- dinfo$label ndinfo <- chcase(makeNames(dinfo$name, allow=allow)) names(lab) <- names(fmt) <- ndinfo for(i in 1:length(w)) { changed <- FALSE x <- w[[i]] fi <- fmt[nam[i]]; names(fi) <- NULL if(fi != '' && length(finfo) && (fi %in% names(finfo))) { f <- finfo[[fi]] if(length(f)) { ## may be NULL because had a range in format x <- factor(x, f$value, f$label) attr(x, 'format') <- fi changed <- TRUE } } if(is.numeric(x)) { if(fi %in% sasdateform) { x <- importConvertDateTime(x, 'date', rootsoftware) changed <- TRUE } else if(fi %in% sastimeform) { x <- importConvertDateTime(x, 'time', rootsoftware) changed <- TRUE } else if(fi %in% sasdatetimeform) { x <- importConvertDateTime(x, 'datetime', rootsoftware) changed <- TRUE } else if(force.single) { if(all(is.na(x))) { storage.mode(x) <- 'integer' changed <- TRUE } else if(max(abs(x),na.rm=TRUE) <= (2^31-1) && all(floor(x) == x, na.rm=TRUE)) { storage.mode(x) <- 'integer' changed <- TRUE } } } else if(method=='read.xport' && possiblyConvertChar && is.factor(x)) { if((is.logical(as.is) && as.is) || (is.numeric(as.is) && length(unique(x)) >= as.is*length(x))) { x <- as.character(x) changed <- TRUE } } else if(possiblyConvertChar && is.character(x)) { if((is.logical(as.is) && !as.is) || (is.numeric(as.is) && length(unique(x)) < as.is*length(x))) { x <- factor(x, exclude='') changed <- TRUE } } lz <- lab[nam[i]] if(lz != '') { names(lz) <- NULL label(x) <- lz changed <- TRUE } if(changed) w[[i]] <- x } cat('.\n') if(length(out)) { nam <- gsub('_','.',dsn[j]) assign(nam, w) ## ugly, but a way to get actual data frame name into first ## argument of save( ) eval(parse(text=paste('save(',nam,', file="', paste(out, '/', nam,'.rda',sep=''), '", compress=TRUE)',sep=''))) if(length(FUN) && length(w)) funout[[nam]] <- FUN(w) remove(nam) } else if(nds > 1) res[[j]] <- w } if(length(out)) { names(dsinfo) <- gsub('_','.',tolower(names(dsinfo))) if(length(FUN)) attr(dsinfo, 'FUN') <- funout invisible(dsinfo) } else if(nds > 1) res else w } ## Use dataload program to create a structure like read.xport does read.xportDataload <- function(file, dsnames) { outf <- substring(tempfile(tmpdir=''),2) file.copy(file, paste(tempdir(),outf,sep='/')) curwd <- getwd() on.exit(setwd(curwd)) setwd(tempdir()) n <- length(dsnames) w <- vector('list', n); names(w) <- dsnames for(a in dsnames) { status <- system(paste('dataload', outf, 'zzzz.rda', a), intern=FALSE) if(status==0) { load('zzzz.rda') names(zzzz) <- makeNames(names(zzzz)) w[[a]] <- zzzz } } w } utils::globalVariables(c("NOBS", "memname", "memlabel")) ## Read _contents_.csv and store it like lookup.xport output lookupSASContents <- function(sasdir) { w <- read.csv(paste(sasdir,'_contents_.csv',sep='/'), as.is=TRUE) z <- tapply(w$NOBS, w$MEMNAME, function(x)x[1]) if(any(z == 0)) { cat('\nDatasets with 0 observations ignored:\n') print(names(z)[z == 0], quote=FALSE) w <- subset(w, NOBS > 0) } w$TYPE <- ifelse(w$TYPE==1, 'numeric', 'character') names(w) <- tolower(names(w)) unclass(split(subset(w,select=-c(memname,memlabel)), w$memname)) } ## Read all SAS csv export files and store in a list readSAScsv <- function(sasdir, dsinfo, dsnames=names(dsinfo)) { sasnobs <- sapply(dsinfo, function(x)x$nobs[1]) multi <- length(dsnames) > 1 if(multi) { w <- vector('list', length(dsnames)) names(w) <- dsnames } for(a in dsnames) { z <- read.csv(paste(sasdir,'/',a,'.csv', sep=''), as.is=TRUE, blank.lines.skip=FALSE, comment.char="") importedLength <- length(z[[1]]) if(importedLength != sasnobs[a]) cat('\nError: NOBS reported by SAS (',sasnobs[a],') for dataset ', a,' is not the same as imported length (', importedLength, ')\n', sep='') if(multi) w[[a]] <- z } if(multi) w else z } sasdsLabels <- function(file) { w <- scan(file, sep='\n', what='', quiet=TRUE) i <- grep('Data Set Name:', w) if(!length(i)) return(NULL) n <- tolower(sub('.*\\.([A-Z0-9\\_]*)[[:space:]]+.*','\\1',w[i])) w <- gsub('\t','',w) labs <- ifelse(nchar(w[i-1])==0,w[i-2],w[i-1]) names(labs) <- n labs } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/Misc.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000171750�14370567746�012633� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������if(!exists("NROW", mode='function')) { NROW <- function(x) if (is.array(x) || is.data.frame(x)) nrow(x) else length(x) } if(!exists("NCOL", mode='function')) { NCOL <- function(x) if (is.array(x) && length(dim(x)) > 1 || is.data.frame(x)) ncol(x) else as.integer(1) } prn <- function(x, txt, file='') { calltext <- as.character(sys.call())[2] if(file != '') sink(file, append=TRUE) if(!missing(txt)) { if(nchar(txt) + nchar(calltext) +3 > .Options$width) calltext <- paste('\n\n ',calltext,sep='') else txt <- paste(txt, ' ', sep='') cat('\n', txt, calltext, '\n\n', sep='') } else cat('\n',calltext,'\n\n',sep='') print(x) if(file != '') sink() invisible() } formatSep <- function(x, digits, ...) { y <- character(length(x)) for(i in 1:length(x)) y[i] <- if(missing(digits)) format(x[i], ...) else format(x[i],digits=digits, ...) names(y) <- names(x) y } nomiss <- function(x) { if(is.data.frame(x)) na.exclude(x) else if(is.matrix(x)) x[!is.na(x %*% rep(1,ncol(x))),] else x[!is.na(x)] } fillin <- function(v, p) { v.f <- ifelse(is.na(v),p,v) if(length(p)==1) label(v.f) <- paste(label(v),"with",sum(is.na(v)), "NAs replaced with",format(p)) else label(v.f) <- paste(label(v),"with",sum(is.na(v)),"NAs replaced") v.f } spearman <- function(x, y) { x <- as.numeric(x) y <- as.numeric(y) ## 17Jul97 notna <- !is.na(x+y) ##exclude NAs if(sum(notna) < 3) c(rho=NA) else c(rho=cor(rank(x[notna]), rank(y[notna]))) } plotCorrPrecision <- function(rho=c(0,0.5), n=seq(10,400,length.out=100), conf.int=0.95, offset=.025, ...) { ## Thanks to Xin Wang for computations curves <- vector('list', length(rho)) names(curves) <- paste('r',format(rho),sep='=') zcrit <- qnorm(1-(1-conf.int)/2) for(i in 1:length(rho)) { r <- rho[i] z <- .5*log((1+r)/(1-r)) lo <- z - zcrit/sqrt(n-3) hi <- z + zcrit/sqrt(n-3) rlo <- (exp(2*lo)-1)/(exp(2*lo)+1) rhi <- (exp(2*hi)-1)/(exp(2*hi)+1) precision <- pmax(rhi-r, r-rlo) curves[[i]] <- list(N=n, Precision=precision) } labcurve(curves, pl=TRUE, xrestrict=quantile(n,c(.25,1)), offset=offset, ...) invisible() } trap.rule <- function(x,y) sum(diff(x)*(y[-1]+y[-length(y)]))/2 uncbind <- function(x, prefix="", suffix="") { nn <- dimnames(x)[[2]] warning("You are using uncbind. That was a really bad idea. If you had any variables in the global environment named ", paste(prefix, nn, suffix, sep="", collapse=", "), " they are now over writen.\n\nYou are now warned.", immediate. = TRUE, ) for(i in 1:ncol(x)) assign(paste(prefix,nn[i],suffix,sep=""), x[,i], pos=parent.env()) invisible() } ## Function to pick off ordinates of a step-function at user-chosen abscissas stepfun.eval <- function(x, y, xout, type=c("left","right")) { s <- !is.na(x+y) type <- match.arg(type) approx(x[s], y[s], xout=xout, method="constant", f=if(type=="left")0 else 1)$y } km.quick <- function(S, times, q) { S <- S[!is.na(S),] n <- nrow(S) stratvar <- factor(rep(1,nrow(S))) f <- survfitKM(stratvar, S, se.fit=FALSE, conf.type='none') tt <- c(0, f$time) ss <- c(1, f$surv) if(missing(times)) min(tt[ss <= q]) else approx(tt, ss, xout=times, method='constant', f=0)$y } oPar <- function() { ## Saves existing state of par() and makes changes suitable ## for restoring at the end of a high-level graphics functions oldpar <- par() oldpar$fin <- NULL oldpar$new <- FALSE invisible(oldpar) } setParNro <- function(pars) { ## Sets non-read-only par parameters from the input list i <- names(pars) %nin% c('cin','cra','csi','cxy','din','xlog','ylog','gamma','page') invisible(par(pars[i])) } mgp.axis.labels <- function(value, type=c('xy','x','y','x and y')) { type <- match.arg(type) if(missing(value)) { value <- .Options$mgp.axis.labels pr <- par(c('mgp', 'las')) mgp <- pr$mgp if(! length(value)) value <- c(.7, .7) return(switch(type, xy = value, x = c(mgp[1], value[1], mgp[3]), y = c(mgp[1], value[2], mgp[3]), 'x and y' = list(x = c(mgp[1], value[1], mgp[3]), y = c(mgp[1], value[2], mgp[3])))) } if(value[1]=='default') value <- c(.7,.7) ##c(.6, if(par('las')==1) 1.3 else .6) options(mgp.axis.labels=value, TEMPORARY=FALSE) invisible() } mgp.axis <- function(side, at=NULL, ..., mgp=mgp.axis.labels(type=if(side==1 | side==3)'x' else 'y'), axistitle=NULL, cex.axis=par('cex.axis'), cex.lab=par('cex.lab')) { ## Version of axis() that uses appropriate mgp from mgp.axis.labels and ## gets around bug in axis(2, ...) that causes it to assume las=1 mfrow <- par('mfrow') tcl <- max(par('tcl'), -0.4) nr <- mfrow[1]; nc <- mfrow[2] w <- list(side=side) w <- c(w, list(...)) w$cex.axis <- cex.axis if(length(at)) w$at <- at if(side == 1 || side == 3) { w$mgp <- mgp / nr w$tcl <- tcl / nr if(side==1 && length(axistitle)) title(xlab=axistitle, mgp = mgp / min(2.25, nr), cex.lab=cex.lab) } else { w$mgp <- mgp / nc w$tcl <- tcl / nc las <- par('las') w$srt <- 90 * (las == 0) w$adj <- if(las == 0) 0.5 else 1 if(side == 2 && length(axistitle)) title(ylab=axistitle, mgp=mgp / min(2.25, nc), cex.lab=cex.lab) } do.call('axis', w) invisible() } trellis.strip.blank <- function() { s.b <- trellis.par.get("strip.background") s.b$col <- 0 trellis.par.set("strip.background", s.b) s.s <- trellis.par.get("strip.shingle") s.s$col <- 0 trellis.par.set("strip.shingle", s.s) invisible() } lm.fit.qr.bare <- function(x, y, tolerance = NULL, intercept=TRUE, xpxi=FALSE, singzero=FALSE) { if(!length(tolerance)) tolerance <- 1e-7 if(intercept) x <- cbind(Intercept=1, x) else x <- as.matrix(x) z <- lm.fit(x, y, tol=tolerance) coef <- z$coefficients if(singzero && any(isna <- is.na(coef))) coef[isna] <- 0. res <- z$residuals sse <- sum(res^2) sst <- sum((y - mean(y))^2) res <- list(coefficients = coef, residuals = res, rsquared = 1 - sse / sst, fitted.values = z$fitted.values) if(xpxi) { p <- 1L : z$rank res$xpxi <- chol2inv(z$qr$qr[p, p, drop=FALSE]) } res } all.is.numeric <- function(x, what=c('test','vector','nonnum'), extras=c('.','NA')) { what <- match.arg(what) x <- sub('[[:space:]]+$', '', x) x <- sub('^[[:space:]]+', '', x) xs <- x[x %nin% c('', extras)] if(! length(xs) || all(is.na(x))) return(switch(what, test = FALSE, vector=x, nonnum=x[0])) isnon <- suppressWarnings(! is.na(xs) & is.na(as.numeric(xs))) isnum <- ! any(isnon) # suppressWarnings below handles extras present in x switch(what, test = isnum, vector = if(isnum) suppressWarnings(as.numeric(x)) else x, nonnum = xs[isnon]) } Lag <- function(x, shift=1) { ## Lags vector x shift observations, padding with NAs or blank strings ## preserving attributes of x xLen <- length(x) if(shift == 0) return(x) # Create base vector use character to generate "" for mode "character" # Coerce base vector to be type of x ret <- as.vector(character(xLen), mode=storage.mode(x)) # set resp attributes equal to x attributes attrib <- attributes(x) if(length(attrib$label)) attrib$label <- paste(attrib$label, 'lagged', shift, 'observations') if(abs(shift) < xLen) { if(shift > 0) ret[-(1:shift)] <- x[1:(xLen - shift)] else ret[1:(xLen+shift)] <- x[(1-shift):xLen] } attributes(ret) <- attrib return(ret) } xySortNoDupNoNA <- function(x, y) { if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } s <- !is.na(x + y) if(any(s)) { x <- x[s]; y <- y[s] } i <- order(x) x <- x[i] y <- y[i] i <- !duplicated(x) list(x=x[i], y=y[i]) } outerText <- function(string, y, cex=par('cex'), ...) { usr <- par('usr'); plt <- par('plt') pos <- usr[2] + (usr[2] - usr[1])/(plt[2] - plt[1]) * (1 - plt[2]) axis(2, at=y, labels=string, tick=FALSE, las=1, pos=pos, cex.axis=cex, xpd=NA) } ## if(missing(space)) space <- max(nchar(string))*.5 ## mtext(string, side=side, las=1, at=y, adj=adj, cex=cex, line=space) # This method does not survive shrinking the graphics window # Right justifies (if adj=1) a vector of strings against the right margin # (side=4) or against the y-axis (side=2) #outerText <- # function(string, y, side=4, cex=par('cex'), adj=1, ...) { # if(side %nin% c(2,4)) stop('only works for side=2 or 4') # x <- if(side==4) grconvertX(1, from='nfc', to='user') else # par('usr')[1] # text(x, y, paste(string,''), cex=cex, adj=adj, xpd=NA) #} ## Old method [dropped because does not scale upon resizing device] ## Use text() to put test strings in left or right margins ## Temporarily sets par(xpd=NA) if using R ## For adj=1 side=4, setAside is a character string used to determine ## the space to set aside for all strings ## space is the number of extra characters to leave to the left of ## the string(s) (adj=0) or to the right (adj=1) if(FALSE) outerText <- function(string, y, setAside=string[1], side=4, space=1, adj=1, cex=par('cex')) { usr <- par('usr') xpd <- par('xpd') if(!is.na(xpd)) { on.exit(par(xpd=xpd)) par(xpd=NA) } ie <- is.expression(string) ## 1sep02 if(ie) adj <- 0 ## adj=1 not work well for expressions in R if(side!=4) stop('only side=4 implemented') if(adj==0) text(usr[2], y, if(ie) string else paste(space,string,sep=''), adj=0) else { usr.space.needed <- strwidth(setAside, units='user', cex=cex) text(usr[2]+0.5*strwidth(space, units='user', cex=cex)+usr.space.needed, y, string, adj=1, cex=cex) # was usr[2]- 18jul02;added 0* 25jul02 ## was 0*strwidth(space,...) 31jan03 } invisible() } if(FALSE) { expandUsrCoord <- function() { ## Expands usr coordinates of current plot to entire figure region ## so that out of range plots may be plotted pr <- par() usr <- pr$usr p <- pr$plt invisible(pr) } } ## Author: Patrick Connolly <P.Connolly@hortresearch.co.nz> ## HortResearch ## Mt Albert ## Auckland, New Zealand print.char.matrix <- function (x, file = "", col.name.align = "cen", col.txt.align = "right", cell.align = "cen", hsep = "|", vsep = "-", csep = "+", row.names = TRUE, col.names = FALSE, append = FALSE, top.border = TRUE, left.border = TRUE, ...) { ### To print a data frame or matrix to a text file or screen ### and having names line up with stacked cells ### ### First, add row names as first column (might be removed later) ndimn <- names(dimnames(x)) ## FEH rownames <- dimnames(x)[[1]] x <- cbind(rownames, x) names(dimnames(x)) <- ndimn ## FEH cnam <- dimnames(x)[[2]] ## FEH if(length(ndimn)) cnam[1] <- ndimn[1] ## FEH ##dimnames(x)[[1]] <- seq(nrow(x)) 25Mar02 for R FEH dimnames(x) <- list(as.character(seq(nrow(x))), cnam) names(dimnames(x)) <- ndimn ## 26Mar02 FEH ### Set up some padding functions: ### pad.left <- function(z, pads) { ## Pads spaces to left of text padding <- paste(rep(" ", pads), collapse = "") paste(padding, z, sep = "") } pad.mid <- function(z, pads) { ## Centres text in available space padding.right <- paste(rep(" ", pads%/%2), collapse = "") padding.left <- paste(rep(" ", pads - pads%/%2), collapse = "") paste(padding.left, z, padding.right, sep = "") } pad.right <- function(z, pads) { ## Pads spaces to right of text padding <- paste(rep(" ", pads), collapse = "") paste(z, padding, sep = "") } ## (Padding happens on the opposite side to alignment) pad.types <- c("left", "mid", "right") names(pad.types) <- c("right", "cen", "left") pad.name <- pad.types[col.name.align] pad.txt <- pad.types[col.txt.align] pad.cell <- pad.types[cell.align] ## Padding character columns ## Need columns with uniform number of characters pad.char.col.right <- function(y) { ## For aligning text to LHS of column col.width <- nchar(y) biggest <- max(col.width) smallest <- min(col.width) padding <- biggest - col.width out <- NULL for (i in seq(y)) out[i] <- pad.right(y[i], pads = padding[i]) out } pad.char.col.left <- function(y) { ## For aligning text to RHS of column col.width <- nchar(y) biggest <- max(col.width) smallest <- min(col.width) padding <- biggest - col.width out <- NULL for (i in seq(y)) out[i] <- pad.left(y[i], pads = padding[i]) out } pad.char.col.mid <- function(y) { ## For aligning text to centre of column col.width <- nchar(y) biggest <- max(col.width) smallest <- min(col.width) padding <- biggest - col.width out <- NULL for (i in seq(y)) out[i] <- pad.mid(y[i], pads = padding[i]) out } ## which functions to use this time. pad.name.fn <- get(paste("pad.", pad.name, sep = "")) pad.txt.fn <- get(paste("pad.char.col.", pad.txt, sep = "")) pad.cell.fn <- get(paste("pad.", pad.cell, sep = "")) ## Remove troublesome factors x <- as.data.frame(x) fac.col <- names(x)[sapply(x, is.factor)] for (i in fac.col) x[, i] <- I(as.character(x[, i])) ## ARE ANY LINE BREAKS IN ANY COLUMNS? break.list <- list() for (i in seq(nrow(x))) { x.i <- unlist(x[i, ]) rows.i <- sapply(strsplit(unlist(x[i, ]), "\n"), length) rows.i[rows.i < 1] <- 1 break.list[[i]] <- rows.i } break.row <- sapply(break.list, function(x) any(x > 1)) names(break.row) <- seq(nrow(x)) xx <- x if (any(break.row)) { ## add in extra row/s xx <- NULL reprow <- lapply(break.list, unique) for (k in seq(nrow(x))) { x.k <- unlist(x[k, ]) x.k[x.k == ""] <- " " if (break.row[k]) { l.k <- strsplit(x.k, "\n") add.blanks <- max(break.list[[k]]) - break.list[[k]] names(l.k) <- names(add.blanks) <- seq(length(l.k)) if (any(add.blanks > 0)) { for (kk in names(add.blanks[add.blanks > 0])) l.k[[kk]] <- c(l.k[[kk]], rep(" ", add.blanks[kk])) } l.k.df <- as.data.frame(l.k) names(l.k.df) <- names(x) xx <- rbind(xx, as.matrix(l.k.df)) } else xx <- rbind(xx, x.k) } row.names(xx) <- paste(rep(row.names(x), sapply(reprow, max)), unlist(reprow), sep = ".") ## Make an index for the rows to be printed rn <- row.names(xx) rnb <- strsplit(rn, "\\.") rpref <- as.numeric(factor(sapply(rnb, function(z) z[1]))) ## was codes( ) 10oct03 } else rpref <- seq(nrow(x)) x <- as.data.frame(xx) ## Character columns need different treatment from numeric columns char.cols <- sapply(x, is.character) if (any(char.cols)) x[char.cols] <- sapply(x[char.cols], pad.txt.fn) ## Change numeric columns into character if (any(!char.cols)) x[!char.cols] <- sapply(x[!char.cols], format) ## now all character columns each of which is uniform element width ## ## Lining up names with their columns ## Sometimes the names of columns are wider than the columns they name, ## sometimes vice versa. names.width <- nchar(names(x)) if (!col.names) names.width <- rep(0, length(names.width)) cell.width <- sapply(x, function(y) max(nchar(as.character(y)))) ## (the width of the characters in the cells as distinct ## from their names) name.pads <- cell.width - names.width cell.pads <- -name.pads name.pads[name.pads < 0] <- 0 cell.pads[cell.pads < 0] <- 0 pad.names <- name.pads > 0 pad.cells <- cell.pads > 0 ## Pad out the column names if necessary: if (any(pad.names)) { stretch.names <- names(x)[pad.names] for (i in stretch.names) { names(x)[names(x) == i] <- pad.name.fn(i, name.pads[i]) } } ## likewise for the cells and columns if (any(pad.cells)) { stretch.cells <- names(x)[pad.cells] for (j in stretch.cells) x[, j] <- pad.cell.fn(x[, j], cell.pads[j]) } ## Remove row names if not required if (!row.names) x <- x[-1] ## Put the column names on top of matrix if (col.names) mat2 <- rbind(names(x), as.matrix(x)) else mat2 <- as.matrix(x) mat.names.width <- nchar(mat2[1, ]) ## character string to separate rows space.h <- "" for (k in seq(along=mat.names.width)) { ## added along= FEH 26Mar02 space.h <- c(space.h, rep(vsep, mat.names.width[k]), csep) } line.sep <- paste(c(ifelse(left.border, csep, ""), space.h), collapse = "") if (col.names) rpref <- c(0, rpref, 0) else rpref <- c(rpref, 0) ## print to screen or file if(top.border && line.sep !='') { write(line.sep, file = file, append = append) append <- TRUE } for (i in 1:nrow(mat2)) { if (left.border) write(paste(paste(c("", mat2[i, ]), collapse = hsep), hsep, sep = ""), file = file, append = append) else write(paste(paste(mat2[i, ], collapse = hsep), hsep, sep = ""), file = file, append = append) append <- TRUE ## print separator if row prefix is not same as next one if (rpref[i] != rpref[i + 1] && line.sep != '') write(line.sep, file = file, append = TRUE) } } unPaste <- function(str, sep='/') { w <- strsplit(str, sep) w <- matrix(unlist(w), ncol=length(str)) nr <- nrow(w) ans <- vector('list', nr) for(j in 1:nr) ans[[j]] <- w[j,] ans } get2rowHeads <- function(str) { w <- strsplit(str, '\n') ## strsplit returns character(0) when element="" 23may03 list(sapply(w, function(x)if(length(x)) x[[1]] else ''), sapply(w, function(x)if(length(x) > 1)x[[2]] else '')) } ## Note: can't say f[vector of names] <- list(...) to update args ## In R you have to put ALL arguments in list(...) so sometimes we set ## unneeded ones to NULL. Ignore this assignment in S ## Two lists of functions, one for primitives for S+ or R (either Trellis ## or low-level), one for R grid ## Note: rect is only defined in R, not S+ ordGridFun <- function(grid) { if(!grid) list(lines = function(...) lines(...), points = function(..., size=NULL) { if(length(size)) warning('size not implemented yet') points(...) }, text = function(...) text(...), segments = function(...) segments(...), arrows = function(..., open, size) arrows(..., length=size*.8), rect = function(...) rect(...), polygon = function(x, y=NULL, ..., type=c('l','s')) { type <- match.arg(type) if(!length(y)) { y <- x$y x <- x$x } j <- !is.na(x+y) x <- x[j] y <- y[j] if(type=='s') polygon(makeSteps(x, y), ..., border=NA) else polygon(x, y, ..., border=NA) }, abline = function(...) abline(...), unit = function(x, units='native') { if(units!='native') stop('units="native" is only units implemented outside of grid') x }, axis = function(...) axis(...)) else { list(lines = function(x, y, ...) { if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } llines(if(is.unit(x)) convertX(x, 'native', valueOnly=TRUE) else x, if(is.unit(y)) convertY(y, 'native', valueOnly=TRUE) else y, ...) }, points = function(x, y, ...) { if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } lpoints(if(is.unit(x)) convertX(x, 'native', valueOnly=TRUE) else x, if(is.unit(y)) convertY(y, 'native', valueOnly=TRUE) else y, ...) }, text = function(x, y, ...) { if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } ltext(if(is.unit(x)) convertX(x, 'native', valueOnly=TRUE) else x, if(is.unit(y)) convertY(y, 'native', valueOnly=TRUE) else y, ...) }, segments = function(x0, y0, x1, y1, ...) { grid.segments(x0, y0, x1, y1, default.units='native', gp=gpar(...)) }, arrows = function(...) larrows(...), rect = function(xleft, ybottom, xright, ytop, density, angle, border, xpd, ...) { grid.rect(xleft, ybottom, width=xright-xleft, height=ytop-ybottom, just='left', default.units='native', gp=gpar(...)) }, polygon = function(x, y=NULL, col=par('col'), type=c('l','s'), ...) { type <- match.arg(type) if(!length(y)) { y <- x$y x <- x$x } j <- !is.na(x+y) x <- x[j] y <- y[j] if(type=='s') grid.polygon(makeSteps(x, y), default.units='native', gp=gpar(fill=col, col='transparent', ...)) else grid.polygon(x, y, default.units='native', gp=gpar(fill=col,col='transparent',...)) }, abline=function(...) panel.abline(...), unit = function(x, units='native', ...) unit(x, units=units, ...), axis = function(side=1, at=NULL, labels, ticks=TRUE, distn, line, pos, outer, ...) { if(!length(at))stop('not implemented for at= unspecified') if(side > 2) stop('not implemented for side=3 or 4') ## ticks=ticks removed from grid.?axis FEH 30Aug09 if(side==1) grid.xaxis(at=at, label=labels, gp=gpar(...)) if(side==2) grid.yaxis(at=at, label=labels, gp=gpar(...)) }) } } parGrid <- function(grid=FALSE) { pr <- par() cin <- pr$cin cex <- pr$cex lwd <- pr$lwd if(grid) { ## cvp <- current.viewport() ## usr <- c(cvp$xscale, cvp$yscale) usr <- c(convertX(unit(0:1, "npc"), "native", valueOnly=TRUE), convertY(unit(0:1, "npc"), "native", valueOnly=TRUE)) pin <- c(convertWidth(unit(1, "npc"), "inches", valueOnly=TRUE), convertHeight(unit(1, "npc"), "inches", valueOnly=TRUE)) uin <- c(convertWidth(unit(1, "native"), "inches", valueOnly=TRUE), convertHeight(unit(1, "native"), "inches", valueOnly=TRUE)) } else { usr <- pr$usr pin <- pr$pin uin <- c(pin[1]/(usr[2]-usr[1]), pin[2]/(usr[4]-usr[3])) ## 22Mar01 - R does not have par(uin) } list(usr=usr, pin=pin, uin=uin, cin=cin, cex=cex, lwd=lwd) } ## Replaces R's xinch, yinch, extending them to grid ## These convert inches to data units xInch <- function(x=1, warn.log=!grid, grid=FALSE) { if (warn.log && par("xlog")) warning("x log scale: xInch() is nonsense") pr <- parGrid(grid) x * diff(pr$usr[1:2])/pr$pin[1] } yInch <- function (y = 1, warn.log=!grid, grid=FALSE) { if (warn.log && par("ylog")) warning("y log scale: yInch is nonsense") pr <- parGrid(grid) y * diff(pr$usr[3:4])/pr$pin[2] } na.include <- function(obj) { if(inherits(obj,'data.frame')) for(i in seq(along=obj)) obj[[i]] <- na.include(obj[[i]]) else { if(length(levels(obj)) && any(is.na(obj))) obj <- factor(obj,exclude=NULL) } obj } if(FALSE) { whichClosest <- function(x, w) { ## x: vector of reference values ## w: vector of values to find closest matches in x ## Returns: subscripts in x corresponding to w i <- order(x) x <- x[i] n <- length(x) br <- c(-1e30, x[-n]+diff(x)/2,1e30) m <- length(w) i[.C("bincode", as.double(w), m, as.double(br), length(br), code = integer(m), right = TRUE, include = FALSE, NAOK = TRUE, DUP = FALSE, PACKAGE = "base")$code] } NULL } ## Just as good, ties shuffled to end ## function(x, w) round(approx(x,1:length(x),xout=w,rule=2,ties='ordered')$y) ## Remove ties= for S-Plus. Note: does not work when 2nd arg to ## approx is not uniformly spaced ## NO! ties='ordered' bombs in x not ordered ## Try ## approx(c(1,3,5,2,4,2,4),1:7,xout=c(1,3,5,2,4,2,4),rule=2,ties=function(x)x[1]) ## NO: only works in general if both x and y are already ordered ## The following runs the same speed as the previous S version (in R anyway) whichClosest <- function(x, w) { ## x: vector of reference values ## w: vector of values for which to lookup closest matches in x ## Returns: subscripts in x corresponding to w ## Assumes no NAs in x or w .Fortran(F_wclosest,as.double(w),as.double(x), length(w),length(x), j=integer(length(w)))$j } whichClosePW <- function(x, w, f=0.2) { lx <- length(x) lw <- length(w) .Fortran(F_wclosepw,as.double(w),as.double(x), as.double(runif(lw)),as.double(f), lw, lx, double(lx), j=integer(lw))$j } whichClosek <- function(x, w, k) { ## x: vector of reference values ## w: vector of values for which to lookup close matches in x ## Returns: subscripts in x corresponding to w ## Assumes no NAs in x or w ## First jitters x so there are no ties ## Finds the k closest matches and takes a single random pick of these k y <- diff(sort(x)) mindif <- if(all(y == 0)) 1 else min(y[y > 0]) x <- x + runif(length(x), -mindif/100, mindif/100) z <- abs(outer(w, x, "-")) s <- apply(z, 1, function(u) order(u)[1:k]) if(k == 1) return(s) apply(s, 2, function(u) sample(u, 1)) } if(FALSE) { sampWtdDist <- function(x, w) { ## x: vector of reference values ## w: vector of values to find closest matches in x ## Returns: subscripts in x corresponding to w ## 25% slower but simpler method: ## z <- abs(outer(w, x, "-")) ## s <- apply(z, 1, max) ## z <- (1 - sweep(z, 1, s, FUN='/')^3)^3 ## sums <- apply(z, 1, sum) ## z <- sweep(z, 1, sums, FUN='/') lx <- length(x) lw <- length(w) z <- matrix(abs( rep( x , lw ) - rep( w, each = lx ) ), nrow=lw, ncol=lx, byrow=TRUE) ## Thanks: Chuck Berry ## s <- pmax( abs( w - min(x) ), abs( w - max(x) ) ) # to use max dist s <- rowSums(z)/lx/3 # use 1/3 mean dist for each row tricube <- function(u) (1 - pmin(u,1)^3)^3 ## z <- (1 - (z/rep(s,length.out=lx*lw))^3)^3 # Thanks: Tim Hesterberg z <- tricube(z/s) # Thanks: Tim Hesterberg sums <- rowSums(z) z <- z/sums as.vector(rMultinom(z, 1)) } NULL } approxExtrap <- function(x, y, xout, method='linear', n=50, rule=2, f=0, ties='ordered', na.rm=FALSE) { ## Linear interpolation using approx, with linear extrapolation ## beyond the data if(is.list(x)) { y <- x[[2]]; x <- x[[1]] } ## remove duplicates and order so can do linear extrapolation if(na.rm) { d <- ! is.na(x + y) x <- x[d]; y <- y[d] } x <- as.numeric(x) # handles dates etc. y <- as.numeric(y) d <- ! duplicated(x) x <- x[d] y <- y[d] d <- order(x) x <- x[d] y <- y[d] w <- approx(x, y, xout=xout, method=method, n=n, rule=2, f=f, ties=ties)$y r <- range(x) d <- xout < r[1] if(any(is.na(d))) stop('NAs not allowed in xout') if(any(d)) w[d] <- (y[2]-y[1])/(x[2]-x[1])*(xout[d]-x[1])+y[1] d <- xout > r[2] n <- length(y) if(any(d)) w[d] <- (y[n]-y[n-1])/(x[n]-x[n-1])*(xout[d]-x[n-1])+y[n-1] list(x=xout, y=w) } inverseFunction <- function(x, y) { d <- diff(y) xd <- x[-1] dl <- c(NA, d[-length(d)]) ic <- which(d>=0 & dl<0 | d>0 & dl<=0 | d<=0 & dl>0 | d<0 & dl>=0) nt <- length(ic) k <- nt + 1 if(k==1) { h <- function(y, xx, yy, turns, what, coef) approx(yy, xx, xout=y, rule=2)$y formals(h) <- list(y=numeric(0), xx=x, yy=y, turns=numeric(0), what=character(0), coef=numeric(0)) return(h) } turns <- x[ic] turnse <- c(-Inf, turns, Inf) xrange <- yrange <- matrix(NA, nrow=k, ncol=2) for(j in 1:k) { l <- which(x >= turnse[j] & x <= turnse[j+1]) xrange[j,] <- x[l[c(1,length(l))]] yrange[j,] <- y[l[c(1,length(l))]] } for(j in 1:length(ic)) { l <- (ic[j]-1):(ic[j]+1) turns[j] <- approxExtrap(d[l], xd[l], xout=0, na.rm=TRUE)$y } hh <- function(y, xx, yy, turns, xrange, yrange, what, coef) { what <- match.arg(what) ## Find number of monotonic intervals containing a given y value ylo <- pmin(yrange[,1],yrange[,2]) yhi <- pmax(yrange[,1],yrange[,2]) n <- outer(y, ylo, function(a,b) a >= b) & outer(y, yhi, function(a,b) a <= b) ## Columns of n indicate whether or not y interval applies ni <- nrow(yrange) fi <- matrix(NA, nrow=length(y), ncol=ni) turnse <- c(-Inf, turns, Inf) for(i in 1:ni) { w <- n[,i] if(any(w)) { l <- xx >= turnse[i] & xx <= turnse[i+1] fi[w,i] <- approx(yy[l], xx[l], xout=y[w])$y } } noint <- !apply(n, 1, any) if(any(noint)) { ## Determine if y is closer to yy at extreme left or extreme right ## of an interval m <- length(yy) yl <- as.vector(yrange); xl <- as.vector(xrange) fi[noint,1] <- xl[whichClosest(yl, y[noint])] } if(what=='sample') apply(fi, 1, function(x) { z <- x[!is.na(x)] if(length(z)==1) z else if(length(z)==0) NA else sample(z, size=1) }) else fi } formals(hh) <- list(y=numeric(0), xx=x, yy=y, turns=turns, xrange=xrange, yrange=yrange, what=c('all', 'sample'), coef=numeric(0)) ## coef is there for compatibility with areg use hh } Names2names <- function(x) { if(is.list(x)) { } else { n <- names(attributes(x)) if(any(n=='.Names')) names(attributes(x)) <- ifelse(n=='.Names','names',n) } x } ##xedit <- function(file, header, title, delete.file=FALSE) { ## In R, use e.g. options(pager=xedit); page(x,'p') ## sys(paste('xedit -title "', title, '" ', file, ' &', ## sep='')) ## invisible() ##} if(FALSE) { gless <- function(x, ...) { ## Usage: gless(x) - uses print method for x, puts in window with ## gless using name of x as file name prefixed by ~, leaves window open nam <- substring(deparse(substitute(x)), 1, 40) file <- paste('/tmp/',nam,sep='~') #tempfile('Rpage.') sink(file) ## cat(nam,'\n' ) ## if(length(attr(x,'label')) && !inherits(x,'labelled')) ## cat(attr(x,'label'),'\n') ## cat('\n') print(x, ...) sink() sys(paste('gless --geometry=600x400 "',file,'" &',sep='')) ## gless does not have a title option invisible() } NULL } xless <- function(x, ..., title=substring(deparse(substitute(x)),1,40)) { ## Usage: xless(x) - uses print method for x, puts in persistent window with ## xless using name of x as title (unless title= is specified) file <- tempfile() sink(file) print(x, ...) sink() cmd <- paste('xless -title "',title,'" -geometry "90x40" "', file,'" &',sep='') system(cmd) invisible() } pasteFit <- function(x, sep=',', width=.Options$width) { ## pastes as many elements of character vector x as will fit in a line ## of width 'width', starting new lines when needed ## result is the lines of pasted text m <- nchar(x) out <- character(0) cur <- '' n <- 0 for(i in 1:length(x)) { if(cur=='' | (m[i] + nchar(cur) <= width)) cur <- paste(cur, x[i], sep=if(cur=='')'' else sep) else { out <- c(out, cur) cur <- x[i] } } if(cur != '') out <- c(out, cur) out } ## Determine if variable is a date, time, or date/time variable in R. ## The following 2 functions are used by describe.vector ## timeUsed assumes is date/time combination variable and has no NAs testDateTime <- function(x, what=c('either','both','timeVaries')) { what <- match.arg(what) cl <- class(x) if(!length(cl)) return(FALSE) dc <- c('Date', 'POSIXt','POSIXct','dates','times','chron') dtc <- c('POSIXt','POSIXct','chron') switch(what, either = any(cl %in% dc), both = any(cl %in% dtc), timeVaries = { if('chron' %in% cl || 'Date' %in% cl) { ## chron or S+ timeDate y <- as.numeric(x) length(unique(round(y - floor(y),13))) > 1 } else length(unique(format(x,'%H%M%S'))) > 1 }) } ## Format date/time variable from either R or S+ ## x = a numeric summary of the original variable (e.g., mean) ## at = attributes of original variable formatDateTime <- function(x, at, roundDay=FALSE) { cl <- at$class w <- if(any(cl %in% c('chron','dates','times'))){ attributes(x) <- at fmt <- at$format if(roundDay) { if (!requireNamespace("chron", quietly = TRUE)) stop("'roundDay = TRUE' requires the 'chron' package.") if(length(fmt)==2 && is.character(fmt)) format(chron::dates(x), fmt[1]) else format(chron::dates(x)) } else x } else { attributes(x) <- at if(roundDay && 'Date' %nin% at$class) as.POSIXct(round(x, 'days')) else x } format(w) } ## Try to guess whether a factor or character variable is a date, and ## allow for partial dates of the form YYYY and mm/YYYY, the former ## only used if at least one observation has a month in it. If a minority ## fraction of observations fracnn or less is not convertable to a date, ## set those observations to NA ## Allows for mixing of common date forms across observations ## ## Example: ## ## x <- convertPdate(c(rep('2019-03-04',7), '2018', '03/2018', 'junk', '3/11/17','2017-01-01','2017-01-01', NA, '')) ## x ## describe(x) convertPdate <- function(x, fracnn=0.3, considerNA=NULL) { xo <- x if(is.factor(x)) x <- as.character(x) if(! is.character(x)) return(xo) x <- trimws(x) if(all(is.na(x)) || all(x =='')) return(xo) ymd <- grepl('^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}$', x) mdy <- grepl('^[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}$', x) y <- grepl('^[0-9]{4}$', x) my <- grepl('^[0-9]{1,2}/[0-9]{4}$', x) m <- sum(x %nin% c('', considerNA) & ! is.na(x)) * (1 - fracnn) dny <- ymd | mdy | my # date other than just YYYY d <- dny | (y & any(dny)) ## The variable is a date if at least m values are dates if(sum(d) < m) return(xo) special <- obs <- NULL ndnm <- ! d & ! is.na(x) # not date and not missing if(any(ndnm)) { special <- setdiff(x[ndnm], c('', ' ')) obs <- x %in% special x[ndnm] <- NA # values such as text comments } x <- ifelse(y, paste0(x, '-07-03'), ifelse(my, gsub('^([0-9]{1,2})/([0-9]{4})$', '\\2-\\1-15', x), ifelse(ymd, x, ifelse(mdy, gsub('^([0-9]{1,2})/([0-9]{1,2})/([0-9]{4})$', '\\3-\\1-\\2', x), NA)))) x <- as.Date(x) if(length(special)) attr(x, 'special.miss') <- list(codes=special, obs=obs) if(any(y | my)) attr(x, 'imputed') <- which(y | my) x } getHdata <- function(file, what=c('data','contents','description','all'), where='https://hbiostat.org/data/repo') { what <- match.arg(what) fn <- as.character(substitute(file)) localrepo <- .Options$localHfiles localrepo <- length(localrepo) && is.logical(localrepo) && localrepo if(localrepo) where <- '~/web/data/repo' ads <- readLines(paste0(where, '/Rcontents.txt')) a <- unlist(strsplit(ads,'.sav|.rda')) if(missing(file)) return(a) wds <- paste(substitute(file), c('rda','sav'), sep='.') if(!any(wds %in% ads)) stop(paste(paste(wds, collapse=','), 'are not on the web site.\nAvailable datasets:\n', paste(a, collapse=' '))) wds <- wds[wds %in% ads] if(what %in% c('contents','all')) { w <- paste(if(fn=='nhgh')'' else 'C',fn,'.html',sep='') browseURL(paste(where, w, sep='/')) } if(what %in% c('description','all')) { ades <- scan(paste(where,'Dcontents.txt',sep='/'),list(''), quiet=TRUE)[[1]] i <- grep(paste(fn,'\\.',sep=''),ades) if(!length(i)) warning(paste('No description file available for',fn)) else { w <- ades[i[1]] browseURL(paste(where, w, sep='/')) } } if(what %nin% c('data','all')) return(invisible()) f <- paste(where, wds, sep='/') if(length(f) > 1) warning(paste('More than one file matched; using the first:', paste(f, collapse=', '))) if(localrepo) return(invisible(load(f, .GlobalEnv))) tf <- tempfile() download.file(f, tf, mode='wb', quiet=TRUE) load(tf, .GlobalEnv) invisible() } hdquantile <- function(x, probs=seq(0, 1, 0.25), se=FALSE, na.rm=FALSE, names=TRUE, weights=FALSE) { if(na.rm) { na <- is.na(x) if(any(na)) x <- x[!na] } x <- sort(x, na.last=TRUE) n <- length(x) if(n < 2) return(rep(NA, length(probs))) m <- n + 1 ps <- probs[probs > 0 & probs < 1] qs <- 1 - ps a <- outer((0:n)/n, ps, function(x,p,m) pbeta(x, p*m, (1-p)*m), m=m) w <- a[-1,] - a[-m,] r <- drop(x %*% w) rp <- range(probs) pp <- ps if(rp[1]==0) { r <- c(x[1], r); pp <- c(0,pp) } if(rp[2]==1) { r <- c(r, x[n]); pp <- c(pp,1) } r <- r[match(pp, probs)] if(names) names(r) <- format(probs) if(weights) attr(r,'weights') <- structure(w, dimnames=list(NULL,format(ps))) if(!se) return(r) if(n < 3) stop('must have n >= 3 to get standard errors') l <- n - 1 a <- outer((0:l)/l, ps, function(x,p,m) pbeta(x, p*m, (1-p)*m), m=m) w <- a[-1,] - a[-n,] storage.mode(x) <- 'double' storage.mode(w) <- 'double' nq <- length(ps) ## Get all n leave-out-one quantile estimates S <- matrix(.Fortran(F_jacklins, x, w, as.integer(n), as.integer(nq), res=double(n*nq))$res, ncol=nq) se <- l * sqrt(diag(var(S))/n) if(rp[1]==0) se <- c(NA, se) if(rp[2]==1) se <- c(se, NA) se <- se[match(pp,probs)] if(names) names(se) <- names(r) attr(r, 'se') <- se r } sepUnitsTrans <- function(x, conversion=c(day=1, month=365.25/12, year=365.25, week=7), round=FALSE, digits=0) { if(!any(is.present(x))) return(x) target <- names(conversion[conversion==1]) if(!length(target)) stop('must specify a target unit with conversion factor=1') lab <- attr(x,'label') x <- ifelse(is.present(x),casefold(as.character(x)),'') for(w in names(conversion)) { i <- grep(w, x) if(length(i)) x[i] <- as.character(as.numeric(gsub(paste(w,'s*',sep=''), '', x[i]))* conversion[w]) } i <- grep('[a-z]', x) if(any(i)) warning(paste('variable contains units of measurement not in', paste(names(conversion), collapse=','),':', paste(unique(x[i]),collapse=' '))) x <- as.numeric(x) if(round) x <- round(x, digits) units(x) <- target if(length(lab)) label(x) <- lab x } makeNames <- function(names, unique=FALSE, allow=NULL) { ## Runs make.names with exceptions in vector allow ## By default, R 1.9 make.names is overridden to convert _ to . as ## with S-Plus and previous versions of R. Specify allow='_' otherwise. n <- make.names(names, unique) if(!length(allow)) n <- gsub('_', '.', n) n } Load <- function(object) { nam <- deparse(substitute(object)) path <- .Options$LoadPath if(length(path)) path <- paste(path,'/',sep='') file <- paste(path, nam, '.rda', sep='') load(file, .GlobalEnv) } Save <- function(object, name=deparse(substitute(object)), compress=TRUE) { path <- .Options$LoadPath if(length(path)) path <- paste(path, '/', sep='') .FileName <- paste(path, name, '.rda', sep='') assign(name, object) if(is.logical(compress) && compress) compress <- 'gzip' eval(parse(text=paste('save(', name, ', file="', .FileName, '", compress="', compress, '")', sep=''))) } getZip <- function(url, password=NULL) { ## Allows downloading and reading a .zip file containing one file ## File may be password protected. Password will be requested unless given. ## Password is 'foo' ## url may also be a local file ## Note: to make password-protected zip file z.zip, do zip -e z myfile if(grepl("^https?://", tolower(url))) { f <- tempfile() download.file(url, f) } else f <- url cmd <- if(length(password)) paste('unzip -p -P', password) else 'unzip -p' pipe(paste(cmd, f)) } getLatestSource <- function(x=NULL, package='Hmisc', recent=NULL, avail=FALSE) { urlf <- paste0('https://hbiostat.org/R/', package, '/dir.txt') fs <- scan(urlf, what=list('', ''), sep=' ', quiet=TRUE) dates <- fs[[1]] files <- fs[[2]] url <- if(length(recent)) paste0('https://github.com/harrelfe/', package, '/commits/master/R') else paste0('https://github.com/harrelfe/', package, '/tree/master/R/') if(avail) return(data.frame(file=files, date=as.Date(dates))) if(length(recent)) x <- files[1:recent] if(length(x)==1 && x=='all') x <- files for(fun in x) { i <- which(files==fun) if(!length(i)) stop(paste('no file ', fun,' in ',package, sep='')) cat('Fetching', fun, dates[i],'\n') url <- paste0('https://raw.githubusercontent.com/harrelfe/', package, '/master/R/', fun) source(url) } } clowess <- function(x, y=NULL, iter=3, ...) { ## to get around bug in lowess with occasional wild values with iter>0 r <- range(if(length(y)) y else x$y) f <- lowess(x, y, iter=iter, ...) if(iter != 0 && any(f$y < r[1] | f$y > r[2])) f <- lowess(x, y, iter=0) f } prselect <- function(x, start=NULL, stop=NULL, i=0, j=0, pr=TRUE) { f <- function(pattern, x) { y <- grep(pattern, x) if(length(y) > 1) y <- y[1] y } lx <- length(x) k <- if(length(start)) f(start, x) else 1 if(length(k)) { k <- k + i m <- if(length(stop)) { w <- f(stop, x[k:lx]) if(length(w)) w + k - 1 + j else -1 } else lx if(m > 0) x <- if(k==1) (if(m==lx) '...' else c('...', x[-(k:m)])) else { if(m==lx) c(x[-(k:m)], '...') else c(x[1:(k-1)], '...', x[(m+1):lx]) } } else # no start specified; keep lines after stop { m <- f(stop, x) if(length(m) > 0) { m <- if(length(m)) m + j - 1 else lx x <- if(m==lx) '...' else c('...', x[-(1:m)]) } } if(pr) cat(x, sep='\n') invisible(x) } ## The following is taken from survival:::plot.survfit internal dostep function ## Remove code to remove duplicates in y makeSteps <- function(x, y) { if (is.na(x[1] + y[1])) { x <- x[-1] y <- y[-1] } n <- length(x) if (n > 2) { xrep <- rep(x, c(1, rep(2, n - 1))) yrep <- rep(y, c(rep(2, n - 1), 1)) list(x = xrep, y = yrep) } else if (n == 1) list(x = x, y = y) else list(x = x[c(1, 2, 2)], y = y[c(1, 1, 2)]) } latexBuild <- function(..., insert=NULL, sep='') { w <- list(...) l <- length(w) if(l %% 2 != 0) stop('# arguments must be multiple of 2') k <- l / 2 j <- 1 txt <- op <- character(0) for(i in 1 : k) { a <- w[[j]] if(length(a)) { txt <- c(txt, a) if(w[[j + 1]] != '') op <- c(op, w[[j + 1]]) } j <- j + 2 } txt <- paste(txt, collapse=sep) w <- character(0) close <- if(length(op)) { for(y in rev(op)) { if(length(insert)) for(ins in insert) if(length(ins) && ins[[1]] == y && ins[[2]] == 'before') w <- c(w, '\n', ins[[3]]) w <- c(w, if(y == '(') ')' else if(y == '{') '}' else if(y == '[') ']' else sprintf('\\end{%s}', y)) if(length(insert)) for(ins in insert) if(length(ins) && ins[[1]] == y && ins[[2]] == 'after') w <- c(w, '\n', ins[[3]]) } paste(w, collapse=sep) } structure(txt, close=close) } getRs <- function(file=NULL, guser='harrelfe', grepo='rscripts', gdir='raw/master', dir=NULL, browse=c('local', 'browser'), cats=FALSE, put=c('source', 'rstudio')) { browse <- match.arg(browse) put <- match.arg(put) localrepo <- .Options$localHfiles localrepo <- length(localrepo) && is.logical(localrepo) && localrepo if(localrepo) where <- '~/R/rscripts' else { where <- paste('https://github.com', guser, grepo, gdir, sep='/') if(length(dir)) where <- paste(where, dir, sep='/') } trim <- function(x) sub('^[[:space:]]+','',sub('[[:space:]]+$','', x)) pc <- function(s) { wr <- function(x) { n <- length(x) z <- character(n) for(i in 1 : n) z[i] <- paste(strwrap(x[i], width=15), collapse='\n') z } s <- with(s, cbind(Major = wr(Major), Minor = wr(Minor), File = wr(File), Type = wr(Type), Description = wr(Description))) print.char.matrix(s, col.names=TRUE) } read.table.HTTPS <- function(url) { res <- tryCatch(read.table(url, sep='|', quote='', header=TRUE, as.is=TRUE), error=function(e) e) if(inherits(res, "simpleError")) { if(res$message == "https:// URLs are not supported") { res$message <- paste(res$message, "Try installing R version >= 3.2.0", sep="\n\n") } stop(res) } res } download.file.HTTPS <- function(url, file, method='libcurl', quiet=TRUE, extra='--no-check-certificate') { res <- tryCatch(download.file(url, file, method, quiet=quiet, extra=extra), error=function(e) e) if(inherits(res, "simpleError")) { if(res$message == "download.file(method = \"libcurl\") is not supported on this platform") { warning(paste(res$message, "Try installing R version >= 3.2.0", "Attempting method=\"wget\"", sep="\n\n")) return(download.file.HTTPS(url, file, method='wget')) } if(res$message == "https:// URLs are not supported") { res$message <- paste(res$message, "Try installing R version >= 3.2.0", sep="\n\n") } stop(res) } invisible(res) } if(! length(file)) { s <- read.table.HTTPS(paste(where, 'contents.md', sep='/')) s <- s[-1,] names(s) <- c('Major', 'Minor', 'File', 'Type', 'Description') sd <- s; n <- nrow(s) # sd = s with dittoed items duplicated for(x in c('Major', 'Minor')) { u <- v <- gsub('\\*\\*', '', trim(s[[x]])) for(i in 2 : n) if(u[i] == '"') u[i] <- u[i - 1] v <- gsub('"', '', v) s[[x]] <- v; sd[[x]] <- u } s$File <- trim(gsub('\\[(.*)\\].*', '\\1', s$File)) d <- trim(gsub('\\[.*\\]\\(.*\\)', '', s$Description)) s$Description <- gsub('\\[report\\].*', '', d) if(is.logical(cats)) { if(cats) { ## List all major and minor categories maj <- sort(unique(sd$Major)) min <- setdiff(sort(unique(sd$Minor)), '') cat('\nMajor categories:\n', maj, '\nMinor categories:\n', min, '', sep='\n') return(invisible(list(Major=maj, Minor=min))) } } else { ## list all scripts whose "first hit" major category contains cats i <- grepl(tolower(cats), tolower(sd$Major)) if(! any(i)) cat('No scripts with', cats, 'in major category\n') else pc(s[i, ]) return(invisible(s[i, ])) } if(browse == 'local') pc(s) else browseURL(if(localrepo) '~/R/rscripts/contents.md' else 'https://github.com/harrelfe/rscripts/blob/master/contents.md') return(invisible(s)) } if(put == 'source') return(invisible(source(paste(where, file, sep='/')))) if(localrepo) file.copy(paste(where, file, sel='/'), file) else download.file.HTTPS(paste(where, file, sep='/'), file) if(requireNamespace('rstudioapi', quietly=TRUE) && rstudioapi::isAvailable()) rstudioapi::navigateToFile(file) else file.edit(file) invisible() } knitrSet <- function(basename = NULL, w=if(! bd) 4, h=if(! bd) 3, wo=NULL, ho=NULL, fig.path = if(length(basename)) basename else '', fig.align = if(! bd) 'center', fig.show = 'hold', fig.pos = if(! bd) 'htbp', fig.lp = if(! bd) paste('fig', basename, sep=':'), dev = switch(lang, latex='pdf', markdown='png', blogdown=NULL, quarto=NULL), tidy=FALSE, error=FALSE, messages=c('messages.txt', 'console'), width=61, decinline=5, size=NULL, cache=FALSE, echo=TRUE, results='markup', capfile=NULL, lang=c('latex','markdown','blogdown','quarto')) { if(! requireNamespace('knitr')) stop('knitr package not available') messages <- match.arg(messages) lang <- match.arg(lang) options(knitrSet.lang = lang) bd <- lang %in% c('blogdown', 'quarto') ## Specify e.g. dev=c('pdf','png') or dev=c('pdf','postscript') ## to produce two graphics files for each plot ## But: dev='CairoPNG' is preferred for png if(length(basename)) basename <- paste(basename, '-', sep='') ## Default width fills Sweavel boxes when font size is \small and svmono.cls ## is in effect (use 65 without svmono) if(lang == 'latex') knitr::render_listings() if(messages != 'console') { unlink(messages) # Start fresh with each run hook_log = function(x, options) cat(x, file=messages, append=TRUE) knitr::knit_hooks$set(warning = hook_log, message = hook_log) } else knitr::opts_chunk$set(message=FALSE, warning=FALSE) if(length(size)) knitr::opts_chunk$set(size = size) ## For htmlcap see http://stackoverflow.com/questions/15010732 ## Causes collisions in html and plotly output; Original (no better) ## enclosed in <p class="caption"> ... </p> # if(lang == 'markdown') # knitr::knit_hooks$set(htmlcap = function(before, options, envir) { # if(! before) options$htmlcap # htmltools::HTML(paste0('<br><div style="font-size: 75%;">', # options$htmlcap, "</div><br>")) # }) if(length(decinline)) { rnd <- function(x, dec) if(!is.numeric(x)) x else round(x, dec) formals(rnd) <- list(x=NULL, dec=decinline) knitr::knit_hooks$set(inline = rnd) } knitr::knit_hooks$set(par=function(before, options, envir) if(before && options$fig.show != 'none') { p <- c('bty','mfrow','ps','bot','top','left','rt','lwd', 'mgp','las','tcl','axes','xpd') pars <- knitr::opts_current$get(p) pars <- pars[! is.na(names(pars))] ## knitr 1.6 started returning NULLs for unspecified pars i <- sapply(pars, function(x) length(x) > 0) .spar. <- function(mar=if(!axes) c(2.25+bot-.45*multi,2*(las==1)+2+left,.5+top+.25*multi, .5+rt) else c(3.25+bot-.45*multi,2*(las==1)+3.5+left,.5+top+.25*multi, .5+rt), lwd = if(multi)1 else 1.75, mgp = if(!axes) mgp=c(.75, .1, 0) else if(multi) c(1.5, .365, 0) else c(2.4-.4, 0.475, 0), tcl = if(multi)-0.25 else -0.4, xpd=FALSE, las=1, bot=0, left=0, top=0, rt=0, ps=if(multi) 14 else 12, mfrow=NULL, axes=TRUE, cex.lab=1.15, cex.axis=1, ...) { multi <- length(mfrow) > 0 par(mar=mar, lwd=lwd, mgp=mgp, tcl=tcl, ps=ps, xpd=xpd, cex.lab=cex.lab, cex.axis=cex.axis, las=las, ...) if(multi) par(mfrow=mfrow) } if(any(i)) do.call(.spar., pars[i]) else .spar.() }) knitr::opts_knit$set(width=width) if(length(capfile)) { options(FigCapFile=capfile) cf <- function(before, options, envir) { if(before) return() lang <- getOption('knitrSet.lang') label <- knitr::opts_current$get('label') prefx <- if(lang == 'quarto') '' else options$fig.lp figname <- paste0(prefx, label) ## Quarto uses a chunk figure label convention fig-... ## and figures are referenced by @fig-... figref <- if(grepl('^fig-', figname)) paste0('@', figname) else paste0('\\@ref(', figname, ')') cap <- options$fig.cap scap <- options$fig.scap if(length(cap) && is.call(cap)) cap <- eval(cap) if(length(scap) && is.call(scap)) scap <- eval(scap) if( ! length(scap) || scap == '') scap <- cap if(length(scap) && scap != '') cat(label, figref, paste0('"', scap, '"\n'), sep=',', append=TRUE, file=getOption('FigCapFile')) } knitr::knit_hooks$set(capfileFun=cf) } ## May want to see https://stackoverflow.com/questions/37116632/r-markdown-html-number-figures ## aliases=c(h='fig.height', w='fig.width', cap='fig.cap', scap='fig.scap')) ## eval.after = c('fig.cap','fig.scap'), ## error=error) #, keep.source=keep.source (TRUE)) ## See if need to remove dev=dev from below because of plotly graphics w <- list(fig.path=fig.path, fig.align=fig.align, fig.width=w, fig.height=h, out.width=wo,out.height=ho, fig.show=fig.show, fig.lp=fig.lp, fig.pos=fig.pos, dev=dev, par=TRUE, capfileFun=length(capfile) > 0, tidy=tidy, cache=cache, echo=echo, error=error, comment='', results=results) if(bd) w$fig.path <- NULL w <- w[sapply(w, function(x) length(x) > 0)] ## knitr doesn't like null fig.align etc. do.call(knitr::opts_chunk$set, w) if(lang %in% c('markdown', 'blogdown')) knitr::knit_hooks$set(uncover=markupSpecs$html$uncover) hook_chunk = knitr::knit_hooks$get('chunk') ## centering will not allow too-wide figures to go into left margin if(lang == 'latex') knitr::knit_hooks$set(chunk = function(x, options) { res = hook_chunk(x, options) if (options$fig.align != 'center') return(res) gsub('\\{\\\\centering (\\\\includegraphics.+)\n\n\\}', '\\\\centerline{\\1}', res) }) knitr::set_alias(w = 'fig.width', h = 'fig.height', wo = 'out.width', ho = 'out.height', cap = 'fig.cap', scap ='fig.scap') } ## see http://yihui.name/knitr/options#package_options ## Use caption package options to control caption font size grType <- function() { if(! length(find.package('plotly', quiet=TRUE))) return('base') if(length(g <- .Options$grType) && g == 'plotly') 'plotly' else 'base' } prType <- function() { g <- .Options$prType if(! length(g)) 'plain' else g } htmlSpecialType <- function() { if(length(g <- .Options$htmlSpecialType) && g == '&') '&' else 'unicode' } ## Save a plotly graphic with name foo.png where foo is the name of the ## current chunk ## http://stackoverflow.com/questions/33959635/exporting-png-files-from-plotly-in-r plotlySave <- function(x, ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") chunkname <- knitr::opts_current$get("label") path <- knitr::opts_chunk$get('fig.path') if(is.list(x) & ! inherits(x, 'plotly_hash')) { for(w in names(x)) { file <- paste0(path, chunkname, '-', w, '.png') plotly::plotly_IMAGE(x[[w]], format='png', out_file=file, ...) } } else { file <- paste0(path, chunkname, '.png') plotly::plotly_IMAGE(x, format='png', out_file=file, ...) } invisible() } ## Miscellaneous functions helpful for plotly specifications plotlyParm = list( ## Needed height in pixels for a plotly dot chart given the number of ## rows in the chart heightDotchart = function(rows, per=25, low=200, high=800) min(high, max(low, per * rows)), ## Given a vector of row labels that appear to the left on a dot chart, ## compute the needed chart height taking label line breaks into account ## Since plotly devotes the same vertical space to each category, ## just need to find the maximum number of breaks present heightDotchartb = function(x, per=40, low=c(200, 200, 250, 300, 375)[min(nx, 5)], high=1700) { x <- if(is.factor(x)) levels(x) else sort(as.character(x)) nx <- length(x) m <- sapply(strsplit(x, '<br>'), length) # If no two categories in a row are at the max # lines, # reduce max by 1 mx <- max(m) lm <- length(m) mlag <- if(lm == 1) 0 else c(0, m[1:(lm - 1)]) if(! any(m == mx & mlag == mx)) mx <- mx - 1 z <- 1 + (if(mx > 1) 0.5 * (mx - 1) else 0) min(high, max(low, per * length(x) * z)) }, ## Colors for unordered categories colUnorder = function(n=5, col=colorspace::rainbow_hcl) { if(! is.function(col)) rep(col, length.out=n) else col(n) }, ## Colors for ordered levels colOrdered = function(n=5, col=viridis::viridis) { if(! is.function(col)) rep(col, length.out=n) else col(n) }, ## Margin to leave enough room for long labels on left or right as ## in dotcharts lrmargin = function(x, wmax=190, mult=7) { if(is.character(x)) x <- max(nchar(x)) min(wmax, max(70, x * mult)) } ) ## Function written by Dirk Eddelbuettel: tobase64image <- function (file, Rd = FALSE, alt = "image") { input <- normalizePath(file, mustWork = TRUE) buf <- readBin(input, raw(), file.info(input)$size) base64 <- base64enc::base64encode(buf) sprintf("%s<img src=\"data:image/png;base64,\n%s\" alt=\"%s\" />%s", if (Rd) "\\out{" else "", base64, alt, if (Rd) "}" else "") } plotp <- function(data, ...) UseMethod("plotp") keepHattrib <- function(obj) { g <- function(x) { a <- attributes(x) i <- intersect(names(a), c('label', 'units')) if(length(i)) a[i] } if(! is.list(obj)) list(.single.variable.=g(obj)) else sapply(obj, g) } restoreHattrib <- function(obj, attribs) { nam <- names(obj) for(n in names(attribs)) { a <- attribs[[n]] if(length(a)) { sv <- n == '.single.variable.' if(sv || n %in% nam) { x <- if(sv) obj else obj[[n]] if(length(a$label)) label(x) <- a$label if(length(a$units)) units(x) <- a$units if(sv) return(x) obj[[n]] <- x } } } obj } if(FALSE) { Hglossary <- list(Gmd=list('Gini\'s mean difference', 'a measure of dispersion defined as the mean absolute difference over all possible pairs of different observations. It is more robust than the standard deviation.', 'https://www.researchgate.net/publication/5182211_Gini\'s_Mean_Difference_A_Superior_Measure_of_Variability_for_Non-Normal_Distributions'), Info=list('Information index', 'a measure of the information content in a numeric variable relative to the information in a continuous numeric variable with no ties. The lowest value of Info occurs in a very imbalanced binary variable. Info comes from the approximate formula for the variance of a log odds ratio for a proportional odds model/Wilcoxon test, due to Whitehead (1993). Info is the ratio of the variance if there no ties in the data to the variance for the frequency distribution of observed values.', 'http://hbiostat.org/bib/r2.html') ) rHglossary <- function(x, html=TRUE, collapse=TRUE) { nams <- names(Hglossary) i <- which(tolower(nams) == tolower(x)) if(! length(i)) stop(paste(x, 'is not defined in Hglossary')) w <- Hglossary[[i]] sname <- nams[i] lname <- w[[1]] def <- w[[2]] href <- w[[3]] if(html) { lname <- paste0('<a href="', href[1], '">', lname, '</a>') if(length(href) > 1) def <- paste0(def, ' <a href="', href[2], '">More information</a>') } if(collapse) paste0('<details><summary>', sname, '</summary>', lname, ': ', def, '</details>') else paste0(sname, ': ', lname, ', ', def) } } ## Function to render HTML ## Converts argument to one character string with \n delimiters ## If knitr is currently running, runs this string through ## knitr::asis_output ## Otherwise, makes it browsable HTML using htmltools so that ## an RStudio Viewer or a new browser window will display the result ## See https://github.com/quarto-dev/quarto-cli/discussions/4248 which ## explains that you meed to enclose the text to keep from fooling ## Pandoc's reader rendHTML <- function(x, html=TRUE) { x <- paste(x, collapse='\n') if(length(getOption('knitr.in.progress'))) { if(html) x <- paste0('```{=html}\n', x, '\n```') return(knitr::asis_output(x)) } print(htmltools::browsable(htmltools::HTML(x))) } ������������������������Hmisc/R/substi.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000004471�12250442200�013210� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##Substitute y when element of x is missing ##also return an attribute "substi.source"=vector of var names and NAs substi <- function(x,y,pr=TRUE) { if(length(x)!=length(y)) stop("lengths of x and y are different") nf <- is.factor(x) + is.factor(y) if(nf==1) stop("both x and y must be factor variables if either is") isna <- is.na(x) vnames <- sys.call()[c(2,3)] if(pr) { cat("Variables:",vnames,"\n") cat("Used first variable:",sum(!is.na(x)),"\n") cat("Used second variable:",sum(is.na(x) & !is.na(y)),"\n") } if(nf) { levs <- unique(c(levels(x),levels(y))) x <- as.character(x) y <- as.character(y) x[isna] <- y[isna] x <- factor(x,levs) y <- factor(y,levs) } else x[isna] <- y[isna] ss <- ifelse(isna & is.na(y),NA,ifelse(isna,2,1)) attr(ss,"names") <- NULL ss <- factor(ss,labels=vnames) if(pr) cat("Obs:",sum(!is.na(x))," Obs missing:",sum(is.na(x)),"\n") attr(x,"substi.source") <- ss attr(x,'class') <- c("substi",attr(x,'class')) x } substi.source <- function(x) attr(x,"substi.source") "[.substi" <- function(x, ...) { ss <- attr(x,"substi.source") ats <- attributes(x) ats$dimnames <- ats$dim <- ats$names <- ats$substi.source <- attr(x,'class') <- NULL x <- (x)[...] attributes(x) <- ats attr(x,"substi.source") <- ss[...] x } print.substi <- function(x, ...) { i <- unclass(attr(x, "substi.source")) if(!length(i)) { print.default(x) return(invisible()) } if(is.factor(x)) w <- as.character(x) else w <- format(x) names(w) <- names(x) w[i==2] <- paste(w[i==2], "*", sep = "") attr(w, "label") <- attr(w, "substi.source") <- attr(w, "class") <- NULL print.default(w, quote = FALSE) invisible() } as.data.frame.substi <- function(x, row.names = NULL, optional = FALSE, ...) { nrows <- length(x) if(!length(row.names)) { ## the next line is not needed for the 1993 version of data.class and is ## included for compatibility with 1992 version if(length(row.names <- names(x)) == nrows && !any(duplicated(row.names))) { } else if(optional) row.names <- character(nrows) else row.names <- as.character(1:nrows) } value <- list(x) if(!optional) names(value) <- deparse(substitute(x))[[1]] structure(value, row.names=row.names, class='data.frame') } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/AFirst.lib.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000000602�12260042135�013630� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## $Id$ .noGenenerics <- TRUE # faster loading as new methods not used .onAttach <- function(libname, pkgname, ...) { verbose <- .Options$Hverbose if(length(verbose) && verbose) packageStartupMessage("Hmisc library by Frank E Harrell Jr\n\n", "Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview')\n", "to see overall documentation.\n") invisible() } ������������������������������������������������������������������������������������������������������������������������������Hmisc/R/redun.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000021042�12250435530�013015� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������redun <- function(formula, data=NULL, subset=NULL, r2=.9, type=c('ordinary','adjusted'), nk=3, tlinear=TRUE, allcat=FALSE, minfreq=0, iterms=FALSE, pc=FALSE, pr=FALSE, ...) { acall <- match.call() type <- match.arg(type) if(!inherits(formula,'formula')) stop('formula must be a formula') a <- as.character(formula) if(length(a)==2 && a[1]=='~' && a[2]=='.' && length(list(...))) data <- dataframeReduce(data, ...) Terms <- terms(formula, specials='I', data=data) m <- list(formula=formula, data=data, subset=subset, na.action=na.delete) data <- do.call('model.frame', m) nam <- names(data) linear <- nam[attr(Terms,'specials')$I] p <- length(data) n <- nrow(data) at <- attributes(data) na.action <- at$na.action if(pr) cat(n, 'observations used in analysis\n') cat.levels <- vector('list',p) names(cat.levels) <- nam vtype <- rep('s', p); names(vtype) <- nam enough <- rep(TRUE, p) for(i in 1:p) { xi <- data[[i]] ni <- nam[i] iscat <- FALSE if(is.character(xi)) { xi <- as.factor(xi) lev <- levels(xi) iscat <- TRUE } else if(is.factor(xi)) { lev <- levels(xi) iscat <- TRUE } if(iscat) { data[[i]] <- as.integer(xi) cat.levels[[ni]] <- lev vtype[ni] <- 'c' if(minfreq > 0 && sum(table(xi) >= minfreq) < 2) enough[i] <- FALSE } else { u <- unique(xi) if(length(u) == 1) { warning(paste(ni,'is constant')) enough[i] <- FALSE } if(minfreq > 0 && length(u)==2 && sum(table(xi) >= minfreq) < 2) enough[i] <- FALSE if(nk==0 || length(u) < 3 || ni %in% linear) vtype[ni] <- 'l' } } toofew <- nam[!enough] if(length(toofew)) { p <- sum(enough) nam <- nam[enough] cat.levels <- cat.levels[enough] vtype <- vtype[enough] data <- data[enough] } xdf <- ifelse(vtype=='l', 1, nk-1) j <- vtype=='c' if(any(j)) for(i in which(j)) xdf[i] <- length(cat.levels[[i]]) - 1 names(xdf) <- nam orig.df <- sum(xdf) X <- matrix(NA, nrow=n, ncol=orig.df) st <- en <- integer(p) start <- 1 for(i in 1:p) { xi <- data[[i]] x <- aregTran(xi, vtype[i], nk) st[i] <- start nc <- ncol(x) xdf[i]<- nc end <- start + nc - 1 en[i] <- end if(end > orig.df) stop('program logic error') X[,start:end] <- x start <- end + 1 } if(end < orig.df) X <- X[, 1:end, drop=FALSE] ## if couldn't derive the requested number of knots in splines fcan <- function(ix, iy, X, st, en, vtype, tlinear, type, allcat, r2, minfreq) { ## Get all subscripts for variables in the right hand side k <- rep(FALSE, ncol(X)) for(i in ix) k[st[i]:en[i]] <- TRUE ytype <- if(tlinear && vtype[iy]=='s')'l' else vtype[iy] Y <- if(ytype=='l') X[,st[iy],drop=FALSE] else X[,st[iy]:en[iy],drop=FALSE] d <- dim(Y); n <- d[1]; ny <- d[2] f <- cancor(X[,k,drop=FALSE], Y) R2 <- f$cor[1]^2 if(type=='adjusted') { dof <- sum(k) + ny - 1 R2 <- max(0, 1 - (1 - R2)*(n-1)/(n-dof-1)) } ## If variable to possibly remove is categorical with more than 2 ## categories (more than one dummy variable) make sure ALL frequent ## categories are redundant (not just the linear combination of ## dummies) if allcat is TRUE. Do this by substituting for R^2 the ## minimum R^2 over predicting each dummy variable. if(R2 > r2 && allcat && ytype=='c' && (en[iy] > st[iy])) { for(j in st[iy]:en[iy]) { y <- X[,j,drop=FALSE] if(sum(y) >= minfreq && n-sum(y) >= minfreq) { f <- cancor(X[,k,drop=FALSE], y) R2c <- f$cor[1]^2 if(type=='adjusted') { dof <- sum(k) R2c <- max(0, 1 - (1 - R2c)*(n-1)/(n-dof-1)) } R2 <- min(R2, R2c, na.rm=TRUE) } } } R2 } if(iterms) { nc <- ncol(X) nm <- NULL for(i in 1:p) { m <- nam[i] np <- en[i] - st[i] + 1 if(np > 1) for(j in 1:(np-1)) m <- c(m, paste(nam[i], paste(rep("'", j), collapse=''), sep='')) nm <- c(nm, m) if(pc) X[,st[i]:en[i]] <- prcomp(X[,st[i]:en[i]], scale=TRUE)$x } colnames(X) <- nm p <- nc nam <- nm st <- en <- 1:nc vtype <- rep('l', nc) } In <- 1:p; Out <- integer(0) r2r <- numeric(0) r2l <- list() for(i in 1:p) { if(pr) cat('Step',i,'of a maximum of', p, '\r') ## For each variable currently on the right hand side ("In") ## find out how well it can be predicted from all the other "In" variables if(length(In) < 2) break Rsq <- In*0 l <- 0 for(j in In) { l <- l + 1 k <- setdiff(In, j) Rsq[l] <- fcan(k, j, X, st, en, vtype, tlinear, type, allcat, r2, minfreq) } if(i==1) {Rsq1 <- Rsq; names(Rsq1) <- nam[In]} if(max(Rsq) < r2) break removed <- In[which.max(Rsq)] r2removed <- max(Rsq) ## Check that all variables already removed can be predicted ## adequately if new variable 'removed' is removed k <- setdiff(In, removed) r2later <- NULL if(length(Out)) { r2later <- Out*0 names(r2later) <- nam[Out] l <- 0 for(j in Out) { l <- l+1 r2later[l] <- fcan(k, j, X, st, en, vtype, tlinear, type, allcat, r2, minfreq) } if(min(r2later) < r2) break } Out <- c(Out, removed) In <- setdiff(In, Out) r2r <- c(r2r, r2removed) if(length(r2later)) r2l[[i]] <- r2later } if(length(r2r)) names(r2r) <- nam[Out] if(length(r2l)) names(r2l) <- nam[Out] if(pr) cat('\n') structure(list(call=acall, formula=formula, In=nam[In], Out=nam[Out], toofew=toofew, rsquared=r2r, r2later=r2l, rsq1=Rsq1, n=n, p=p, na.action=na.action, vtype=vtype, tlinear=tlinear, allcat=allcat, minfreq=minfreq, nk=nk, df=xdf, cat.levels=cat.levels, r2=r2, type=type), class='redun') } print.redun <- function(x, digits=3, long=TRUE, ...) { cat("\nRedundancy Analysis\n\n") dput(x$call) cat("\n") cat('n:',x$n,'\tp:',x$p, '\tnk:',x$nk,'\n') cat('\nNumber of NAs:\t', length(x$na.action$omit), '\n') a <- x$na.action if(length(a)) naprint(a) if(x$tlinear) cat('\nTransformation of target variables forced to be linear\n') if(x$allcat) cat('\nAll levels of a categorical variable had to be redundant before the\nvariable was declared redundant\n') if(x$minfreq > 0) cat('\nMinimum category frequency required for retention of a binary or\ncategorical variable:', x$minfreq, '\n') if(length(x$toofew)) { cat('\nBinary or categorical variables removed because of inadequate frequencies:\n\n') cat(x$toofew, '\n') } cat('\nR-squared cutoff:', x$r2, '\tType:', x$type,'\n') if(long) { cat('\nR^2 with which each variable can be predicted from all other variables:\n\n') print(round(x$rsq1, digits)) if(x$allcat) cat('\n(For categorical variables the minimum R^2 for any sufficiently\nfrequent dummy variable is displayed)\n\n') } if(!length(x$Out)) { cat('\nNo redundant variables\n\n') return(invisible()) } cat('\nRendundant variables:\n\n') cat(x$Out) cat('\n\nPredicted from variables:\n\n') cat(x$In, '\n\n') w <- x$r2later vardel <- names(x$rsquared) if(!long) { print(data.frame('Variable Deleted'=vardel, 'R^2'=round(x$rsquared,digits), row.names=NULL, check.names=FALSE)) return(invisible()) } later <- rep('', length(vardel)) i <- 0 for(v in vardel) { i <- i + 1 for(z in w) { if(length(z) && v %in% names(z)) later[i] <- paste(later[i], round(z[v], digits), sep=' ') } } print(data.frame('Variable Deleted'=vardel, 'R^2'=round(x$rsquared,digits), 'R^2 after later deletions'=later, row.names=NULL, check.names=FALSE)) invisible() } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/ynbind.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000004211�13070455033�013163� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ynbind <- function(..., label=deparse(substitute(...)), asna=c('unknown', 'unspecified'), sort=TRUE) { w <- list(...) k <- length(w) if(! k) stop('no variables to process') nam <- as.character(sys.call())[-1] nam <- nam[1 : k] lab <- nam W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam)) for(j in 1 : k) { x <- w[[j]] na <- is.na(x) la <- label(x) if(la != '') lab[j] <- la if(is.numeric(x) && all(x %in% 0 : 1)) x <- x == 1 if(! is.logical(x)) { x <- tolower(as.character(x)) if(length(asna)) { i <- x %in% asna if(any(i)) na[i] <- TRUE } x <- x %in% c('y', 'yes', 'present') if(any(na)) x[na] <- NA } W[, j] <- x } ## Sort columns in ascending order of overall proportion prop <- apply(W, 2, mean, na.rm=TRUE) if(sort) { i <- order(prop) W <- W[, i, drop=FALSE] lab <- lab[i] } structure(W, label=label, labels=lab, class=c('ynbind', 'matrix')) } '[.ynbind' <- function(x, rows=1:d[1], cols=1:d[2], ...) { d <- dim(x) at <- attributes(x)[c('label', 'labels')] x <- NextMethod('[') at$labels <- at$labels[cols] attributes(x) <- c(attributes(x), at) if(is.matrix(x)) class(x) <- 'ynbind' x } pBlock <- function(..., subset=NULL, label=deparse(substitute(...))) { w <- list(...) k <- length(w) if(! k) stop('no variables to process') nam <- as.character(sys.call())[-1] nam <- nam[1 : k] lab <- nam W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam)) for(j in 1 : k) { x <- w[[j]] na <- is.na(x) la <- label(x) if(la != '') lab[j] <- la W[, j] <- if(is.factor(x)) as.character(x) else x } if(length(subset)) { if(is.logical(subset) && (length(subset) != nrow(W))) stop('length of subset does not match length of analysis variables') subset <- if(is.logical(subset)) ! subset else - subset W[subset, ] <- NA } structure(W, label=label, labels=lab, class=c('pBlock', 'matrix')) } '[.pBlock' <- function(x, rows=1:d[1], cols=1:d[2], ...) { d <- dim(x) at <- attributes(x)[c('label', 'labels')] x <- NextMethod('[') if (is.matrix(x)) { at$labels <- at$labels[cols] attributes(x) <- c(attributes(x), at) class(x) <- 'pBlock' } x } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/rcspline.plot.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000017532�14112727067�014515� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rcspline.plot <- function(x, y, model=c("logistic","cox","ols"), xrange, event, nk=5, knots=NULL, show=c("xbeta", "prob"), adj=NULL, xlab, ylab, ylim, plim=c(0,1), plotcl=TRUE, showknots=TRUE, add=FALSE, subset, lty=1, noprint=FALSE, m, smooth=FALSE, bass=1, main="auto", statloc) { model <- match.arg(model) show <- match.arg(show) if(! missing(event)) model<-"cox" if(model == "cox" & missing(event)) stop('event must be given for model="cox"') if(show == "prob" & ! missing(adj)) stop('show="prob" cannot be used with adj') if(show == "prob" & model != "logistic") stop('show="prob" can only be used with model="logistic"') if(length(x) != length(y)) stop('x and y must have the same length') if(! missing(event) && length(event) != length(y)) stop('y and event must have the same length') if(! missing(adj)) { if(! is.matrix(adj)) adj <- as.matrix(adj) if(dim(adj)[1] != length(x)) stop('x and adj must have the same length') } if(missing(xlab)) xlab <- label(x) if(missing(ylab)) ylab <- label(y) isna <- is.na(x) | is.na(y) if(! missing(event)) isna <- isna | is.na(event) nadj <- 0 if(! missing(adj)) { nadj <- ncol(adj) isna <- isna | apply(is.na(adj), 1, sum) > 0 } if(! missing(subset)) isna <- isna | (! subset) x <- x[! isna] y <- y[! isna] if(! missing(event)) event <- event[! isna] if(! missing(adj)) adj <- adj[! isna, ] n <- length(x) if(n<6) stop('fewer than 6 non-missing observations') if(missing(xrange)) { frac<-10./max(n, 200) xrange<-quantile(x, c(frac, 1.-frac)) } if(missing(knots)) xx <- rcspline.eval(x, nk=nk) else xx <- rcspline.eval(x, knots) knots <- attr(xx, "knots") nk <- length(knots) df1 <- nk-2 if(model == "logistic") { if (!requireNamespace("rms", quietly = TRUE)) stop("The 'logistic' model requires the 'rms' package.") b <- rms::lrm.fit(cbind(x, xx, adj), y) beta <- b$coef cov <- b$var model.lr <- b$stats["Model L.R."] offset <- 1 #to skip over intercept parameter ylabl <- if(show == "prob") "Probability" else "log Odds" sampled <- paste("Logistic Regression Model, n=", n," d=", sum(y), sep="") } if(model == "cox") { if(! existsFunction('coxph.fit')) coxph.fit <- getFromNamespace('coxph.fit', 'survival') ##11mar04 ## added coxph.control around iter.max, eps 11mar04 lllin <- coxph.fit(cbind(x, adj), cbind(y, event), strata=NULL, offset=NULL, init=NULL, control=coxph.control(iter.max=10, eps=.0001), method="efron", rownames=NULL)$loglik[2] b <- coxph.fit(cbind(x, xx, adj), cbind(y, event), strata=NULL, offset=NULL, init=NULL, control=coxph.control(iter.max=10, eps=.0001), method="efron", rownames=NULL) beta <- b$coef if(! noprint) { print(beta); print(b$loglik) } beta <- b$coef cov <- b$var model.lr<-2*(b$loglik[2]-b$loglik[1]) offset <- 0 ylabl <- "log Relative Hazard" sampled <- paste("Cox Regression Model, n=",n," events=",sum(event), sep="") } if(model == "logistic"|model == "cox") { model.df <- nk - 1 + nadj model.aic <- model.lr-2.*model.df v <- solve(cov[(1 + offset) : (nk + offset - 1), (1 + offset) : (nk + offset - 1)]) assoc.chi <- beta[(1 + offset) : (nk + offset - 1)] %*% v %*% beta[(1 + offset) : (nk + offset - 1)] assoc.df <- nk - 1 #attr(v,"rank") assoc.p <- 1.-pchisq(assoc.chi, nk - 1) v <- solve(cov[(2 + offset) : (nk + offset - 1), (2 + offset) : (nk + offset - 1)]) linear.chi <- beta[(2 + offset) : (nk + offset - 1)] %*% v %*% beta[(2 + offset) : (nk + offset - 1)] linear.df <- nk - 2 #attr(v,"rank") linear.p <- 1. - pchisq(linear.chi, linear.df) if(nadj > 0) { ntot <- offset + nk - 1 + nadj v <- solve(cov[(nk + offset) : ntot, (nk + offset) : ntot]) adj.chi <- beta[(nk + offset) : ntot] %*% v %*% beta[(nk + offset) : ntot] adj.df <- ncol(v) #attr(v,"rank") adj.p <- 1. - pchisq(adj.chi, adj.df) } else { adj.chi <- 0 adj.p <- 0 } } ## Evaluate xbeta for expanded x at desired range xe <- seq(xrange[1], xrange[2], length=600) if(model == "cox") xx <- rcspline.eval(xe, knots, inclx=TRUE) else xx<- cbind(rep(1, length(xe)), rcspline.eval(xe, knots, inclx=TRUE)) xbeta <- xx %*% beta[1 : (nk - 1 + offset)] var <- drop(((xx %*% cov[1 : (nk - 1 + offset), 1 : (nk - 1 + offset)])*xx) %*% rep(1, ncol(xx))) lower <- xbeta - 1.96*sqrt(var) upper <- xbeta + 1.96*sqrt(var) if(show == "prob") { xbeta <- 1./(1. + exp(-xbeta)) lower <- 1./(1. + exp(-lower)) upper <- 1./(1. + exp(-upper)) } xlim <- range(pretty(xe)) if(missing(ylim)) ylim <- range(pretty(c(xbeta, if(plotcl) lower, if(plotcl) upper))) if(main == "auto") { if(show == "xbeta") main <- "Estimated Spline Transformation" else main <- "Spline Estimate of Prob{Y=1}" } if(! interactive() & missing(statloc)) statloc<-"ll" if(! add) { oldmar<-par("mar") if(! missing(statloc) && statloc[1] == "ll") oldmar[1]<- 11 oldpar <- par(err= - 1, mar=oldmar) plot(xe, xbeta, type="n", main=main, xlab=xlab, ylab=ylabl, xlim=xlim, ylim=ylim) lines(xe, xbeta, lty=lty) ltext<-function(z, line, label, cex=.8, adj=0) { zz<-z zz$y<-z$y-(line - 1)*1.2*cex*par("csi")*(par("usr")[4]-par("usr")[3])/ (par("fin")[2]) #was 1.85 text(zz, label, cex=cex, adj=adj) } sl<-0 if(missing(statloc)) { cat("Click left mouse button at upper left corner for statistics\n") z<-locator(1) statloc<-"l" } else if(statloc[1] != "none") { if(statloc[1] == "ll") { z<-list(x=par("usr")[1], y=par("usr")[3]) sl<-3 } else z<-list(x=statloc[1], y=statloc[2]) } if(statloc[1] != "none" & (model == "logistic" | model == "cox")) { rnd <- function(x, r=2) as.single(round(x, r)) ltext(z, 1 + sl, sampled) ltext(z, 2 + sl, " Statistic X2 df") chistats<-format(as.single(round(c(model.lr, model.aic, assoc.chi, linear.chi, adj.chi), 2))) pvals<-format(as.single(round(c(assoc.p, linear.p, adj.p), 4))) ltext(z, 3 + sl, paste("Model L.R. ", chistats[1], model.df, " AIC=", chistats[2])) ltext(z, 4 + sl, paste("Association Wald ", chistats[3], assoc.df, " p= ", pvals[1])) ltext(z, 5 + sl, paste("Linearity Wald ", chistats[4], linear.df, " p= ", pvals[2])) if(nadj > 0)ltext(z, 6 + sl, paste("Adjustment Wald " , chistats[5], adj.df, " p= ", pvals[3]))} } else lines(xe, xbeta, lty=lty) if(plotcl) { prn(cbind(xe, lower, upper)) lines(xe, lower, lty=2) lines(xe, upper, lty=2) } if(showknots) { bot.arrow <- par("usr")[3] top.arrow <- bot.arrow + .05 * (par("usr")[4]-par("usr")[3]) for(i in 1 : nk) arrows(knots[i], top.arrow, knots[i], bot.arrow, length=.1) } if(model == "logistic" & nadj == 0) { if(smooth) { z<-supsmu(x, y, bass=bass) if(show == "xbeta") z$y <- logb(z$y/(1.-z$y)) points(z, cex=.4) } if(! missing(m)) { z<-groupn(x, y, m=m) if(show == "xbeta") z$y <- logb(z$y/(1.-z$y)) points(z, pch=2, mkh=.05)} } if(! add) par(oldpar) invisible(list(knots=knots, x=xe, xbeta=xbeta, lower=lower, upper=upper)) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/hoeffd.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000005127�13101440601�013130� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hoeffd <- function(x, y) { phoeffd <- function(d, n) { d <- as.matrix(d); n <- as.matrix(n) b <- d + 1 / 36 / n z <- .5 * (pi ^ 4) * n * b zz <- as.vector(z) zz[is.na(zz)] <- 1e50 # so approx won't bark tabvals <- c(5297,4918,4565,4236,3930, 3648,3387,3146,2924,2719,2530,2355, 2194,2045,1908,1781,1663,1554,1453, 1359,1273,1192,1117,1047,0982,0921, 0864,0812,0762,0716,0673,0633,0595, 0560,0527,0496,0467,0440,0414,0390, 0368,0347,0327,0308,0291,0274,0259, 0244,0230,0217,0205,0194,0183,0173, 0163,0154,0145,0137,0130,0123,0116, 0110,0104,0098,0093,0087,0083,0078, 0074,0070,0066,0063,0059,0056,0053, 0050,0047,0045,0042,0025,0014,0008, 0005,0003,0002,0001)/10000 P <- ifelse(z < 1.1 | z > 8.5, pmax(1e-8, pmin(1, exp(.3885037 -1.164879 * z))), matrix(approx(c(seq(1.1, 5, by=.05), seq(5.5,8.5,by=.5)), tabvals, zz)$y, ncol=ncol(d))) dimnames(P) <- dimnames(d) P } if(!missing(y)) x <- cbind(x, y) x[is.na(x)] <- 1e50 storage.mode(x) <- "double" p <- as.integer(ncol(x)) if(p < 1) stop("must have > 1 column") n <- as.integer(nrow(x)) if(n<5) stop("must have >4 observations") h <- .Fortran(F_hoeffd, x, n, p, hmatrix=double(p*p), aad=double(p*p), maxad=double(p*p), npair=integer(p*p), double(n), double(n), double(n), double(n), double(n)) nam <- dimnames(x)[[2]] npair <- matrix(h$npair, ncol=p) aad <- maxad <- NULL aad <- matrix(h$aad, ncol=p) maxad <- matrix(h$maxad, ncol=p) dimnames(aad) <- dimnames(maxad) <- list(nam, nam) h <- matrix(h$hmatrix, ncol=p) h[h > 1e49] <- NA dimnames(h) <- list(nam, nam) dimnames(npair) <- list(nam, nam) P <- phoeffd(h, npair) diag(P) <- NA structure(list(D=30*h, n=npair, P=P, aad=aad, maxad=maxad), class="hoeffd") } print.hoeffd <- function(x, ...) { cat("D\n") print(round(x$D,2)) if(length(aad <- x$aad)) { cat('\navg|F(x,y)-G(x)H(y)|\n') print(round(aad,4)) } if(length(mad <- x$maxad)) { cat('\nmax|F(x,y)-G(x)H(y)|\n') print(round(mad,4)) } n <- x$n if(all(n == n[1,1])) cat("\nn=", n[1,1], "\n") else { cat("\nn\n") print(x$n) } cat("\nP\n") P <- x$P P <- ifelse(P < .0001, 0, P) p <- format(round(P, 4)) p[is.na(P)] <- "" print(p, quote=FALSE) invisible() } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/panel.bpplot.s������������������������������������������������������������������������������0000644�0001762�0000144�00000035561�14275746450�014330� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������panel.bpplot <- function(x, y, box.ratio = 1, means=TRUE, qref=c(.5, .25, .75), probs= c(.05, .125, .25, .375), nout=0, nloc=c('right lower', 'right', 'left', 'none'), cex.n=.7, datadensity=FALSE, scat1d.opts=NULL, violin=FALSE, violin.opts=NULL, font = box.dot$font, pch = box.dot$pch, cex.means = box.dot$cex, col = box.dot$col, nogrid=NULL, height=NULL, ...) { grid <- TRUE if(length(nogrid) && nogrid) grid <- FALSE if(missing(nloc) && !grid) nloc <- 'none' else nloc <- match.arg(nloc) if(grid) { lines <- llines points <- lpoints segments <- lsegments } y <- as.numeric(y) y <- rep(y, length.out=length(x)) ok <- !is.na(x) & !is.na(y) x <- x[ok] y <- y[ok] y.unique <- sort(unique(y)) width <- box.ratio / (1 + box.ratio) w <- width / 2 if(length(height)) w <- height probs2 <- sort(c(probs, 1 - probs)) if(grid) { box.dot <- trellis.par.get("box.dot") lineopts <- trellis.par.get("box.rectangle") box.dot.par <- c(list(pch = pch, cex = cex.means, col = col, font = font), ...) } else { pr <- par() box.dot <- c(pr[c('cex', 'pch', 'col', 'font')], xpd=NA) lineopts <- c(pr[c('lty', 'col', 'lwd')], xpd=NA) box.dot.par <- c(list(pch=pch, cex=cex.means, col=col, xpd=NA)) } m <- length(probs) m2 <- length(probs2) j <- c(1, sort(rep(2 : m2, 2)), - sort(- rep(1 : (m2 - 1),2))) z <- c(sort(rep(probs, 2)), - sort(- rep(probs[1 : (m - 1)], 2))) z <- c(z, -z, probs[1]) k <- max(z) k <- if(k > .48) .5 else k if(length(qref)) { size.qref <- pmin(qref, 1 - qref) size.qref[qref == .5] <- k } for(Y in y.unique) { X <- x[y == Y] if(!length(X)) next q <- quantile(X, c(probs2, qref)) if(length(qref)) do.call('segments',c(list(q[-(1 : m2)], Y - w * size.qref / k, q[-(1 : m2)], Y + w * size.qref / k), lineopts)) do.call('lines',c(list(x=q[j], y=Y + w * z / k), lineopts)) if(means) { mean.value <- list(x=mean(X), y=Y) do.call('points', c(mean.value, box.dot.par)) } xlimits <- if(grid) current.panel.limits()$xlim else par('usr')[1:2] switch(nloc, right= ltext(xlimits[2] - .01*diff(xlimits), Y, paste('n=', length(X), sep=''), adj=c(1, .5), cex=cex.n), left= ltext(xlimits[1] + .01*diff(xlimits), Y, paste('n=', length(X), sep=''), adj=c(0, .5), cex=cex.n), 'right lower'= ltext(xlimits[2] - .01*diff(xlimits), Y - w * min(size.qref) / k, paste('n=', length(X), sep=''), adj=c(1, 1), cex=cex.n)) if(datadensity) do.call('scat1d',c(list(x=X, y=Y, grid=grid), scat1d.opts)) if(violin) do.call('panel.violin', c(list(x=X, y=Y), violin.opts)) if(nout > 0) { ii <- if(nout < 1) { ## Note - bug in quantile - endless loop if probs=c(.5,.5) if(nout == .5) stop('instead of nout=.5 use datadensity=TRUE') cuts <- quantile(X, c(nout, 1 - nout)) X < cuts[1] | X > cuts[2] } else { X <- sort(X) nx <- length(X) ll <- 1 : nx (ll <= min(nout, nx / 2)) | (ll >= max(nx - nout + 1, nx / 2)) } if(sum(ii)) do.call('scat1d',c(list(x=X[ii], y=Y, grid=grid), scat1d.opts)) } } } # Given a matrix where rows are groups and columns have all the # quantiles already computed, plus the Mean, draw a panel containing # horizontal box-percentile plots like the default in panel.bpplot. This is # primarily for plot.summary.formula.reverse's continuous variable # plots bpplt <- function(stats, xlim, xlab='', box.ratio = 1, means=TRUE, qref=c(.5,.25,.75), qomit=c(.025,.975), pch=16, cex.labels=par('cex'), cex.points=if(prototype) 1 else .5, grid=FALSE) { prototype <- missing(stats) if(prototype) { x <- c(.025,.05,.125,.25,.375,.5,.625,.75,.875,.95,.975) stats <- matrix(x, nrow=1, dimnames=list('',format(x))) Means <- .56 } else { Means <- stats[, 'Mean'] stats <- stats[, dimnames(stats)[[2]] %nin% c('Mean', 'SD', 'N'), drop=FALSE] } stats <- stats[, order(as.numeric(dimnames(stats)[[2]])), drop=FALSE] groups <- dimnames(stats)[[1]] qq <- as.numeric(dimnames(stats)[[2]]) probs2 <- qq if(missing(xlim)) xlim <- range(stats) i <- integer(0) for(a in c(.5,qomit)) i <- c(i, seq.int(along.with=probs2)[abs(probs2 - a) < .001]) probs2 <- probs2[-i] probs <- probs2[seq.int(length.out=floor(length(probs2) / 2))] if(grid) { lines <- llines points <- lpoints segments <- lsegments } width <- box.ratio / (1 + box.ratio) w <- width / 2 m <- length(probs) m2 <- length(probs2) j <- c(1, rep(seq.int(along.with = probs2[c(-1, -m2)]) + 1, each=2), m2) j <- c(j, rev(j), NA) z <- c(rep(probs[-m], each=2), probs[m]) z <- c(z, rev(z)) z <- c(z, -z, NA) k <- max(z, na.rm=TRUE) k <- if(k > .48) .5 else k if(length(qref)) { size.qref <- pmin(qref, 1 - qref) size.qref[qref == .5] <- k } plot.new() mai <- par('mai') mxlab <- .3+max(strwidth(groups, units='inches', cex=cex.labels)) mai[2] <- mxlab opar <- par('mai') par(mai=mai) on.exit(par(mai=opar)) plot.window(xlim=xlim, ylim=c(0.5,length(groups) + 0.5)) if(!prototype) { box() mgp.axis(1, axistitle=xlab) } mtext(paste(groups,''), 2, 0, at=length(groups) : 1, adj=1, las=1, cex=cex.labels) y <- seq.int(from=length(groups), to=1, length.out=length(groups)) qref.x <- as.vector(stats[, match(qref, qq)]) qref.y <- rep.int(y, times=length(size.qref)) qref.mod <- rep(w * size.qref / k, each=length(groups)) segments(x0=qref.x, y0=qref.y - qref.mod, x1=qref.x, y1=qref.y + qref.mod) polygon(x=as.vector(t(stats[, match(probs2, qq)[j]])), y=rep(y, each=length(j)) + w * z / k) if(means) points(Means, y, pch=pch, cex=cex.points) if(prototype) { mar <- par('mar') on.exit(par(mar=mar)) par(mar=rep(.5,4)) text(Means, 1.025+.02, 'Mean') for(a in c(.5, probs2)) { arrows(a, .6, a, .725, length=.1) f <- format(a) text(a, .575, format(a)) } text(.5, .52, 'Quantiles') xd <- .004 text(.485 - xd, 1, expression(Median==Q[2]), srt=90) text(.235 - xd, 1, expression(Q[1]), srt=90) text(.735 - xd, 1, expression(Q[3]), srt=90) lines(c(.375, .625), rep(1.3, 2)); text(.635, 1.3, '1/4', adj=0, cex=.9) lines(c(.25, .75 ), rep(1.35, 2)); text(.76, 1.35, '1/2', adj=0, cex=.9) lines(c(.125, .875), rep(1.4, 2)); text(.885, 1.4, '3/4', adj=0, cex=.9) lines(c(.05, .95), rep(1.45, 2)); text(.96, 1.45, '9/10', adj=0, cex=.9) text(.68, 1.24, 'Fraction of Sample Covered', adj=0, srt=13, cex=.7) } } bpplotM <- function(formula=NULL, groups=NULL, data=NULL, subset=NULL, na.action=NULL, qlim=0.01, xlim=NULL, nloc=c('right lower','right','left','none'), vnames=c('labels', 'names'), cex.n=.7, cex.strip=1, outerlabels=TRUE, ...) { nloc <- match.arg(nloc) vnames <- match.arg(vnames) if(! length(formula)) { g <- function(x) is.numeric(x) && length(unique(x)) > 5 v <- setdiff(names(data), groups) z <- sapply(data[, v], g) if(!any(z)) stop('no variable was numeric with > 5 unique levels') formula <- v[z] } if(!inherits(formula, 'formula')) { if(!length(groups)) stop('must specify group if first argument is not a formula') formula <- paste(paste(formula, collapse=' + '), '~', paste(groups, collapse=' + ')) formula <- as.formula(formula) } form <- Formula(formula) Y <- if(length(subset)) model.frame(form, data=data, subset=subset, na.action=na.action) else model.frame(form, data=data, na.action=na.action) X <- model.part(form, data=Y, rhs=1) if(ncol(X) == 0) X <- rep('', nrow(Y)) Y <- model.part(form, data=Y, lhs=1) vars <- names(Y) labs <- vars if(vnames == 'labels') { ylabs <- sapply(Y, label) labs <- ifelse(ylabs == '', labs, ylabs) } names(labs) <- vars w <- reshape(cbind(X, Y), direction='long', v.names='x', varying=vars, times=vars) w$time <- factor(w$time, levels=vars) lims <- lapply(Y, function(x) quantile(x, c(qlim, 1 - qlim), na.rm=TRUE)) if(length(xlim)) lims[names(xlim)] <- xlim scales <- list(x=list(relation='free', limits=lims)) nv <- length(vars) lev <- NULL for(v in levels(w$time)) { un <- units(Y[[v]]) l <- if(labs[v] == v && un == '') v else labelPlotmath(labs[v], un) lev <- c(lev, l) } strip <- function(which.given, which.panel, var.name, factor.levels, ...) { current.var <- var.name[which.given] levs <- if(current.var == 'time') lev else factor.levels strip.default(which.given, which.panel, var.name, factor.levels=levs, ...) } namx <- names(X) form <- paste(namx[1], '~ x | time') if(length(namx) > 1) form <- paste(form, '+', paste(namx[-1], collapse= '+')) form <- as.formula(form) d <- bwplot(form, panel=panel.bpplot, scales=scales, data=w, xlab='', nloc=nloc, cex.n=cex.n, strip=strip, par.strip.text=list(cex=cex.strip), ...) if(outerlabels && length(dim(d)) == 2) d <- latticeExtra::useOuterStrips(d, strip=strip, strip.left=strip) d } bppltp <- function(p=plotly::plot_ly(), stats, xlim, xlab='', box.ratio = 1, means=TRUE, qref=c(.5,.25,.75), qomit=c(.025,.975), teststat=NULL, showlegend=TRUE) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") ## Do what segments does with broken (by NAs) lines for plotly segm <- function(x0, y0, x1, y1, wquan, quan, group='') { n <- length(x0) m <- 3 * n x <- rep(NA, m) y <- rep(NA, m) z <- rep('', m) ## Quantiles other than median are already represented in polygon below quan <- ifelse(wquan == 0.5, paste0(group, if(group != '') '<br>', 'Q<sub>', wquan, '</sub>=', signif(quan, 4)), '') return(list(x0=x0, y0=y0, x1=x1, y1=y1, z=quan)) x[seq(1, m, by=3)] <- x0 x[seq(2, m, by=3)] <- x1 y[seq(1, m, by=3)] <- y0 y[seq(2, m, by=3)] <- y1 z[seq(1, m, by=3)] <- quan z[seq(2, m, by=3)] <- quan list(x=x, y=y, z=z) } ## polygon that closes the loop and adds NA at end so will break from ## any polygons that follow polyg <- function(x, y, qq, group='') { qq <- paste0(group, if(group != '') '<br>', 'Q<sub>', qq, '</sub>=', signif(x, 4)) list(x=c(x, x[1], NA), y=c(y, y[1], NA), z=c(qq, qq[1], '')) # list(x=c(x, x[1]), y=c(y, y[1]), z=c(qq, qq[1])) } Means <- stats[, 'Mean'] N <- stats[, 'N'] stats <- stats[, colnames(stats) %nin% c('Mean', 'SD', 'N'), drop=FALSE] stats <- stats[, order(as.numeric(colnames(stats))), drop=FALSE] groups <- rownames(stats) ng <- length(groups) qq <- as.numeric(colnames(stats)) probs2 <- qq if(missing(xlim)) xlim <- range(stats) i <- integer(0) for(a in c(.5, qomit)) i <- c(i, seq.int(along.with=probs2)[abs(probs2 - a) < 0.001]) probs2 <- probs2[-i] probs <- probs2[seq.int(length.out=floor(length(probs2) / 2))] width <- box.ratio / (1 + box.ratio) w <- width / 2 m <- length(probs) m2 <- length(probs2) j <- c(1, rep(seq.int(along.with = probs2[c(-1, -m2)]) + 1, each=2), m2) j <- c(j, rev(j)) z <- c(rep(probs[-m], each=2), probs[m]) z <- c(z, rev(z)) z <- c(z, -z) k <- max(z, na.rm=TRUE) k <- if(k > .48) .5 else k if(length(qref)) { size.qref <- pmin(qref, 1 - qref) size.qref[qref == .5] <- k } leftmargin <- plotlyParm$lrmargin(as.character(groups)) y <- ng + 1 X <- Y <- numeric(0) Z <- character(0) X0 <- X1 <- Y0 <- Y1 <- numeric(0) Zs <- character(0) # Z <- Zs <- character(0) for(i in 1 : ng) { y <- y - 1 qref.x <- as.vector(stats[i, match(qref, qq)]) qref.y <- rep.int(y, times=length(size.qref)) qref.mod <- w * size.qref / k ## Form vertical line segments seg <- segm(x0=qref.x, y0=qref.y - qref.mod, x1=qref.x, y1=qref.y + qref.mod, wquan=qref, quan=qref.x, group=groups[i]) # X <- c(X, seg$x) # Y <- c(Y, seg$y) # Z <- c(Z, seg$z) X0 <- c(X0, seg$x0) X1 <- c(X1, seg$x1) Y0 <- c(Y0, seg$y0) Y1 <- c(Y1, seg$y1) Zs <- c(Zs, seg$z) ## Add polygon jj <- match(probs2, qq)[j] po <- polyg(x=as.vector(t(stats[i, jj])), y=rep(y, each=length(j)) + w * z / k, qq=qq[jj], group=groups[i]) X <- c(X, po$x) Y <- c(Y, po$y) Z <- c(Z, po$z) } # p <- plotly::add_segments(p, x=~X0, y=~X0, xend=~X1, yend=~Y1, name='seg') # p <- plotly::add_polygons(p, x=~ X, y=~ Y, text=~ Z, # name='Quantiles', mode='lines', # color=I('LightGray'), # line=list(color='MidnightBlue'), # hoverinfo='text') dat <- data.frame(X, Y, Z, X0, Y0, X1, Y1, Zs) p <- plotly::add_markers(p, x=~X, y=~Y, text=~Z, hoverinfo='text', marker=list(symbol='asterisk'), data=dat) p <- plotly::add_polygons(p, x=~X, y=~Y, color=I('LightGray'), mode='markers', showlegend=FALSE, data=dat) p <- plotly::add_segments(p, x=~X0, y=~Y0, xend=~X1, yend=~Y1, text=~Zs, hoverinfo='text', color=I('LightGreen'), data=dat) # p <- plotly::add_lines(p, x=~X, y=~Y, text=~Z, name='Quantiles', # color=I('LightGray'), hoverinfo='text') if(means) { z <- paste0(groups, '<br>', 'Mean=', signif(Means, 4), '<br>N=', N) if(length(teststat)) z <- paste0(z, '<br>', teststat) dam <- data.frame(Means, y=ng : 1, z) p <- plotly::add_markers(p, x=~ Means, y=~ y, text=~ z, mode='markers', marker=list(color='LightBlue'), hoverinfo='text', name='Means', data=dam) } plotly::layout(p, autosize=TRUE, showlegend=showlegend, margin=list(l=leftmargin), xaxis=list(title=xlab, range=xlim), yaxis=list(zeroline=FALSE, title='', tickvals=ng : 1, ticktext=groups)) } �����������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/label.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000030317�14225271641�012771� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##"label<-" <- function(x, value) { ## attr(x, "label") <- value ## x ##} ## exact=TRUE and [['label']] to prevent matching with a different attribute ## "labels" (Thanks: Ivan Puzek) label <- function(x, default=NULL, ...) UseMethod("label") label.default <- function(x, default=NULL, units=plot, plot=FALSE, grid=FALSE, html=FALSE, ...) { if(length(default) > 1) stop("the default string cannot be of length greater then one") at <- attributes(x) lab <- at[['label']] if(length(default) && (!length(lab) || lab=='')) lab <- default un <- at$units labelPlotmath(lab, if(units) un else NULL, plotmath=plot, grid=grid, html=html) } label.Surv <- function(x, default=NULL, units=plot, plot=FALSE, grid=FALSE, html=FALSE, type=c('any', 'time', 'event'), ...) { type <- match.arg(type) if(length(default) > 1) stop("the default string cannot be of length greater then one") at <- attributes(x) lab <- at[['label']] ia <- at$inputAttributes if((! length(lab) || lab == '') && length(ia)) { poss <- switch(type, any = c(ia$event$label, ia$time2$label, ia$time$label), time = c( ia$time2$label, ia$time$label), event = ia$event$label ) for(lb in poss) if(! length(lab) && lb != '') lab <- lb } if(length(default) && (!length(lab) || lab=='')) lab <- default un <- NULL if(units) { un <- at$units if(! length(un) && length(ia)) { un <- ia$time2$units if(! length(un)) un <- ia$time$units } } labelPlotmath(lab, un, plotmath=plot, grid=grid, html=html) } label.data.frame <- function(x, default=NULL, self=FALSE, ...) { if(self) { label.default(x) } else { if(length(default) > 0 && length(default) != length(x)) { stop('length of default must same as x') } else if(length(default) == 0) { default <- list(default) } labels <- mapply(FUN=label, x=x, default=default, MoreArgs=list(self=TRUE), USE.NAMES=FALSE) names(labels) <- names(x) return(labels) } } labelPlotmath <- function(label, units=NULL, plotmath=TRUE, html=FALSE, grid=FALSE, chexpr=FALSE) { if(! length(label)) label <- '' if(! length(units) || (length(units) == 1 && is.na(units))) units <- '' if(html) return(markupSpecs$html$varlabel (label, units)) if(! plotmath) return(markupSpecs$plain$varlabel(label, units)) g <- function(x, y=NULL, xstyle=NULL, ystyle=NULL) { h <- function(w, style=NULL) if(length(style)) sprintf('%s(%s)', style, w) else w tryparse <- function(z, original, chexpr) { p <- try(parse(text=z), silent=TRUE) if(is.character(p)) original else if(chexpr) sprintf('expression(%s)', z) else p } if(! length(y)) return(tryparse(h(plotmathTranslate(x), xstyle), x, chexpr)) w <- paste('list(',h(plotmathTranslate(x), xstyle), ',', h(plotmathTranslate(y), ystyle), ')', sep='') tryparse(w, paste(x, y), chexpr) } if(units=='') g(label) else if(label=='') g(units) else g(label, units, ystyle='scriptstyle') } plotmathTranslate <- function(x) { if(length(grep('paste', x))) return(x) specials <- c(' ','%','_') spec <- FALSE for(s in specials) if(length(grep(s,x))) spec <- TRUE ## If x is not a legal expression, also put in paste() if(! spec && is.character(try(parse(text=x), silent=TRUE))) spec <- TRUE if(spec) x <- paste('paste("',x,'")',sep='') else if(substring(x,1,1)=='/') x <- paste('phantom()', x, sep='') x } labelLatex <- function(x=NULL, label='', units='', size='smaller[2]', hfill=FALSE, bold=FALSE, default='', double=FALSE) { if(length(x)) { if(label == '') label <- label(x) if(units == '') units <- units(x) } if(default == '' && length(x)) default <- deparse(substitute(x)) if(label == '') return(default) label <- latexTranslate(label) bs <- if(double) '\\\\' else '\\' if(bold) label <- paste('{', bs, 'textbf ', label, '}', sep='') if(units != '') { units <- latexTranslate(units) if(length(size) && size != '') units <- paste('{', bs, size, ' ', units, '}', sep='') if(hfill) units <- paste(bs, 'hfill ', units, sep='') else units <- paste(' ', units, sep='') label <- paste(label, units, sep='') } label } "label<-" <- function(x, ..., value) UseMethod("label<-") ##From Bill Dunlap, StatSci 15Mar95: "label<-.default" <- function(x, ..., value) { if(is.list(value)) { stop("cannot assign a list to be a object label") } if(length(value) != 1L) { stop("value must be character vector of length 1") } attr(x, 'label') <- value if(! inherits(x, 'labelled')) class(x) <- c('labelled', class(x)) return(x) } "label<-.data.frame" <- function(x, self=TRUE, ..., value) { if(!is.data.frame(x)) { stop("x must be a data.frame") } if(missing(self) && is.list(value)) { self <- FALSE } if(self) { xc <- class(x) xx <- unclass(x) label(xx) <- value class(xx) <- xc return(xx) } else { if(length(value) != length(x)) { stop("value must have the same length as x") } for (i in seq(along.with=x)) { label(x[[i]]) <- value[[i]] } } return(x) } "[.labelled"<- function(x, ...) { tags <- valueTags(x) x <- NextMethod("[") valueTags(x) <- tags x } "print.labelled"<- function(x, ...) { x.orig <- x u <- attr(x, 'units', exact=TRUE) if(length(u)) attr(x,'units') <- NULL # so won't print twice cat(attr(x, "label", exact=TRUE), if(length(u)) paste('[', u, ']', sep=''), "\n") attr(x, "label") <- NULL class(x) <- if(length(class(x)) == 1 && inherits(x, 'labelled')) NULL else class(x)[class(x) != 'labelled'] ## next line works around print bug if(! length(attr(x, 'class'))) attr(x, 'class') <- NULL NextMethod("print") invisible(x.orig) } as.data.frame.labelled <- as.data.frame.vector Label <- function(object, ...) UseMethod("Label") Label.data.frame <- function(object, file='', append=FALSE, ...) { nn <- names(object) for(i in 1:length(nn)) { lab <- attr(object[[nn[i]]], 'label', exact=TRUE) lab <- if(length(lab)==0) '' else lab cat("label(",nn[i],")\t<- '",lab,"'\n", append=if(i==1) append else TRUE, file=file, sep='') } invisible() } relevel.labelled <- function(x, ...) { lab <- label(x) x <- NextMethod(x) label(x) <- lab x } reLabelled <- function(object) { for(i in 1:length(object)) { x <- object[[i]] lab <- attr(x, 'label', exact=TRUE) cl <- class(x) if(length(lab) && ! any(cl == 'labelled')) { class(x) <- c('labelled',cl) object[[i]] <- x } } object } llist <- function(..., labels=TRUE) { dotlist <- list(...) lname <- names(dotlist) name <- vname <- as.character(sys.call())[-1] for(i in 1:length(dotlist)) { vname[i] <- if(length(lname) && lname[i]!='') lname[i] else name[i] ## R barked at setting vname[i] to NULL lab <- vname[i] if(labels) { lab <- attr(dotlist[[i]],'label', exact=TRUE) if(length(lab) == 0) lab <- vname[i] } label(dotlist[[i]]) <- lab } names(dotlist) <- vname[1:length(dotlist)] dotlist } prList <- function(x, lcap=NULL, htmlfig=0, after=FALSE) { if(! length(names(x))) stop('x must have names') if(length(lcap) && (length(lcap) != length(x))) stop('if given, lcap must have same length as x') mu <- markupSpecs$html g <- if(htmlfig == 0) function(x, X=NULL) paste(x, X) else if(htmlfig == 1) function(x, X=NULL) paste(mu$cap(x), mu$lcap(X)) else function(x, X=NULL) paste0('\n### ', mu$cap(x), if(length(X) && X != '') paste0('\n', mu$lcap(X))) i <- 0 for(n in names(x)) { i <- i + 1 y <- x[[n]] if(length(names(y)) && length(class(y)) == 1 && inherits(y, 'list') && length(y) > 1) { for(m in names(y)) { if(! after) cat('\n', g(paste0(n, ': ', m)), '\n', sep='') suppressWarnings(print(y[[m]])) # for plotly warnings if(after) cat('\n', g(paste0(n, ': ', m)), '\n', sep='') } if(length(lcap) && lcap[i] != '') cat(mu$lcap(lcap[i])) } else { if(! after) cat('\n', g(n, if(length(lcap)) lcap[i]), '\n', sep='') suppressWarnings(print(x[[n]])) if(after) cat('\n', g(n, if(length(lcap)) lcap[i]), '\n', sep='') } } invisible() } putHfig <- function(x, ..., scap=NULL, extra=NULL, subsub=TRUE, hr=TRUE, table=FALSE, file='', append=FALSE, expcoll=NULL) { ec <- length(expcoll) > 0 if(ec && ! table) stop('expcoll can only be specified for tables, not figures') mu <- markupSpecs$html lcap <- unlist(list(...)) if(length(lcap)) lcap <- paste(lcap, collapse=' ') if(ec && length(lcap)) stop('does not work when lcap is specified because of interaction with markdown sub-subheadings') if(! length(lcap) && ! length(scap)) { if(ec) { if(hr) x <- c(mu$hrule, x) x <- mu$expcoll(paste(expcoll, collapse=' '), paste(x, collapse='\n')) cat(x, file=file, append=append, sep='\n') return(invisible()) } if(hr) cat(mu$hrule, '\n', sep='', file=file, append=append) if(table) cat(x, file=file, append=append || hr, sep='\n') else suppressWarnings(print(x)) # because of # colors in pallette warning return(invisible()) } if(! length(scap)) { scap <- lcap lcap <- NULL } scap <- if(table) mu$tcap(scap) else mu$cap(scap) if(subsub) scap <- paste0('\n### ', scap) if(hr && ! ec) cat(mu$hrule, '\n', sep='', file=file, append=append) if(! ec) cat(scap, '\n', sep='', file=file, append=append | hr) if(length(lcap)) { lcap <- if(table) mu$ltcap(lcap) else mu$lcap(lcap) if(length(extra)) lcap <- paste0( '<TABLE width="100%" BORDER="0" CELLPADDING="3" CELLSPACING="3">', '<TR><TD>', lcap, '</TD>', paste(paste0('<TD style="text-align:right;padding: 0 1ex 0 1ex;">', extra, '</TD>'), collapse=''), '</TR></TABLE>') if(ec) x <- c(lcap, x) else cat(lcap, '\n', sep='', file=file, append=TRUE) } if(ec) x <- mu$expcoll(paste(expcoll, collapse=' '), paste(c(if(hr) mu$hrule, scap, x), collapse='\n')) if(table) cat(x, sep='\n', file=file, append=TRUE) else suppressWarnings(print(x)) invisible() } putHcap <- function(..., scap=NULL, extra=NULL, subsub=TRUE, hr=TRUE, table=FALSE, file='', append=FALSE) { mu <- markupSpecs$html fcap <- if(table) mu$tcap else mu$cap flcap <- if(table) mu$ltcap else mu$lcap output <- function(r) if(is.logical(file)) return(r) else { cat(r, sep='\n', file=file, append=append) return(invisible()) } lcap <- unlist(list(...)) if(length(lcap)) lcap <- paste(lcap, collapse=' ') r <- NULL if(! length(lcap) && ! length(scap)) return('') if(! length(scap)) { scap <- lcap lcap <- NULL } scap <- fcap(scap) scap <- if(is.logical(subsub)) paste0('\n', if(subsub) '### ', scap) else if(is.character(subsub) && subsub != '') paste0(subsub, scap) if(hr) r <- c(r, mu$hrule) r <- c(r, scap) if(length(lcap)) { lcap <- flcap(lcap) if(length(extra)) lcap <- paste0( '<TABLE width="100%" BORDER="0" CELLPADDING="3" CELLSPACING="3">', '<TR><TD>', lcap, '</TD>', paste(paste0('<TD style="text-align:right;padding: 0 1ex 0 1ex;">', extra, '</TD>'), collapse=''), '</TR></TABLE>') r <- c(r, lcap) } output(r) } combineLabels <- function(...) { w <- list(...) labs <- sapply(w[[1]], label) lw <- length(w) if(lw > 1) for(j in 2:lw) { lab <- sapply(w[[j]], label) lab <- lab[lab != ''] if(length(lab)) labs[names(lab)] <- lab } labs[labs != ''] } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/upFirst.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000003407�12627703317�013352� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������upFirst <- function(txt, lower=FALSE, alllower=FALSE) { f <- function(x) { notcap <- c('a', 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'an', 'and', 'around', 'as', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'besides', 'between', 'beyond', 'but', 'by', 'despite', 'down', 'during', 'except', 'following', 'for', 'from', 'in', 'inside', 'into', 'like', 'mid', 'minus', 'near', 'next', 'nor', 'of', 'off', 'on', 'onto', 'opposite', 'or', 'out', 'outside', 'over', 'past', 'per', 'plus', 'regarding', 'round', 'save', 'since', 'so', 'than', 'the', 'through', 'throughout', 'till', 'times', 'to', 'toward', 'towards', 'under', 'underneath', 'unlike', 'until', 'up', 'upon', 'via', 'vs.', 'when', 'with', 'within', 'without', 'worth', 'yet') s <- strsplit(x, " ")[[1]] ## Find words that have more than one upper case letter; assume these ## are acronyms that need capitalization preserved a <- grepl('[A-Z]{1,}.*[A-Z]{1,}', s) s <- if(alllower) ifelse(a, s, tolower(s)) else if(lower) ifelse(a, s, ifelse((1 : length(s)) == 1, paste(toupper(substring(s, 1, 1)), tolower(substring(s, 2)), sep=''), tolower(s))) else ifelse(a, s, ifelse((1 : length(s)) == 1 | s %nin% notcap, paste(toupper(substring(s, 1, 1)), tolower(substring(s, 2)), sep=''), tolower(s))) paste(s, collapse=' ') } for(i in 1 : length(txt)) txt[i] <- f(txt[i]) txt } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/latexDotchart.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000013373�12723405372�014524� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������latexDotchart <- function(data, labels, groups = NULL, gdata = NA, xlab = "", auxdata, auxgdata=NULL, auxtitle, w=4, h=4, margin, lines = TRUE, dotsize = .075, size='small', size.labels = 'small', size.group.labels = 'normalsize', ttlabels = FALSE, sort.=TRUE, xaxis=TRUE, lcolor='gray', ...) { txt <- function(x, y, s, size=NULL, just=c('c','l','r'), tt=FALSE) { just <- match.arg(just) s <- latexTranslate(s) n <- max(length(x), length(y), length(s)) x <- rep(x, length.out=n) y <- rep(y, length.out=n) s <- rep(s, length.out=n) z <- character(n) if(tt) s <- paste('\\texttt{', s, '}', sep='') if(length(size)) s <- paste('\\', size, ' ', s, sep='') for(i in 1:n) z[i] <- sprintf('\\put(%g,%g){\\makebox(.001,.001)[%s]{%s}}', x[i], y[i], just, s[i]) z } ln <- function(x1, y1, x2, y2, color='black') { n <- max(length(x1), length(x2), length(y1), length(y2)) x1 <- rep(x1, length.out=n) y1 <- rep(y1, length.out=n) x2 <- rep(x2, length.out=n) y2 <- rep(y2, length.out=n) z <- character(n) for(i in 1:n) z[i] <- if(x1[i] == x2[i]) sprintf('\\put(%g,%g){\\line(0,%g){%g}}', x1[i], y1[i], 1*(y2[i] >= y1[i]) - 1*(y2[i] < y1[i]), abs(y2[i]-y1[i])) else if(y1[i] == y2[i]) sprintf('\\put(%g,%g){\\line(%g,0){%g}}', x1[i], y1[i], 1*(x2[i] >= x1[i]) - 1*(x2[i] < x1[i]), abs(x2[i]-x1[i])) else sprintf('\\drawline(%g,%g)(%g,%g)', x1[i], y1[i], x2[i], y2[i]) if(color != 'black') z <- c(if(color == 'gray') '\\color[gray]{0.8}' else sprintf('\\color{%s}', color), z, '\\color{black}') z } ## Approximate length in inches of longest char. string acl <- function(s) 0.09 * max(nchar(s)) f <- sprintf if(size.labels == size) size.labels <- NULL if(size.group.labels == size) size.group.labels <- NULL z <- c('\\setlength{\\unitlength}{1in}', f('\\begin{picture}(%g,%g)', w, h), f('\\%s', size)) ndata <- length(data) if(missing(labels)) { if(length(names(data))) labels <- names(data) else labels <- paste("#", seq(along = ndata)) } else labels <- rep(as.character(labels), length = ndata) if(missing(groups)) { glabels <- NULL gdata <- NULL if(sort.) { ord <- order(-data) data <- data[ord] labels <- labels[ord] if(! missing(auxdata)) auxdata <- auxdata[ord] } } else { if(! sort.) { ##assume data sorted in groups, but re-number groups ##to be as if groups given in order 1,2,3,... ug <- unique(as.character(groups)) groups <- factor(as.character(groups), levels=ug) } groups <- unclass(groups) glabels <- levels(groups) gdata <- rep(gdata, length = length(glabels)) ord <- if(sort.) order(groups, -data) else order(groups, seq(along = groups)) groups <- groups[ord] data <- data[ord] labels <- labels[ord] if(! missing(auxdata)) auxdata <- auxdata[ord] } alldat <- c(data, gdata) if(! missing(auxdata)) auxdata <- format(c(auxdata, auxgdata)) alllab <- c(labels, glabels) ## set up margins and user coordinates, draw box xl <- range(p <- pretty(alldat)) yl <- c(1, length(alldat)) if(missing(margin)) margin <- c(acl(alllab), ifelse(xlab == '', .2, .4), ifelse(missing(auxdata), 0, acl(auxdata)), ifelse(missing(auxtitle), 0, .1)) xt <- function(x) round((w - sum(margin[c(1,3)]))*(x - xl[1])/diff(xl) + margin[1], 5) yt <- function(y) round((h - sum(margin[c(2,4)]))*(y - yl[1])/diff(yl) + margin[2], 5) ## \color screws up line and circle placement if first multiputlist ## and put occur after \color if(xaxis) { z <- c(z, paste(f('\\multiputlist(%g,%g)(%g,%g){', xt(xl[1]), yt(yl[1]) - .15, diff(xt(p[1:2])), 0), paste(p, collapse=','), '}', sep='')) z <- c(z, ln(xt(p), yt(yl[1]) - 0.05, xt(p), yt(yl[1]))) if(xlab != '') z <- c(z, txt(xt(xl[1] + diff(xl)/2), .1, xlab)) } z <- c(z, ln(xt(xl), yt(yl[1]), xt(xl), yt(yl[2])), ln(xt(xl[1]), yt(yl), xt(xl[2]), yt(yl))) den <- ndata + 2 * length(glabels) + 1 delt <- ( - (yl[2] - yl[1]))/den ypos <- seq(yl[2], by = delt, length = ndata) if(! missing(groups)) { ypos1 <- ypos + 2 * delt * (if(length(groups)>1) cumsum(c(1, diff(groups) > 0)) else 1) diff2 <- c(3 * delt, diff(ypos1)) ypos2 <- ypos1[abs(diff2 - 3 * delt) < abs(0.001 * delt)] - delt ypos <- c(ypos1, ypos2) - delt } ##put on labels and data ypos <- ypos + delt nongrp <- 1:ndata if(lines) z <- c(z, ln(xt(xl[1]), yt(ypos[nongrp]), xt(xl[2]), yt(ypos[nongrp]), color=lcolor)) for(i in seq(along = alldat)) if(! is.na(alldat[i] + ypos[i])) z <- c(z, f('\\put(%g,%g){\\circle*{%g}}', xt(alldat[i]), yt(ypos[i]), dotsize)) if(! missing(auxdata)) { z <- c(z, txt(w - 0.02, yt(ypos[nongrp]), auxdata, size=size.labels, just='r')) if(! missing(auxtitle)) z <- c(z, txt(w - 0.02, yt(yl[2]) + 0.1, auxtitle, size=size.labels, just='r')) } labng <- alllab[nongrp] yposng <- ypos[nongrp] z <- c(z, txt(margin[1] - 0.05, yt(yposng), labng, size=size.labels, just='r', tt=ttlabels)) if(! missing(groups)) z <- c(z, txt(margin[1] - 0.05, yt(ypos[-nongrp]), alllab[-nongrp], size=size.group.labels, just='r')) z <- c(z, '\\end{picture}') z } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/dotchartpl.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000031173�14247425314�014061� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dotchartpl <- function(x, major=NULL, minor=NULL, group=NULL, mult=NULL, big=NULL, htext=NULL, num=NULL, denom=NULL, numlabel='', denomlabel='', fun=function(x) x, ifun=function(x) x, op='-', lower=NULL, upper=NULL, refgroup=NULL, sortdiff=TRUE, conf.int=0.95, minkeep=NULL, xlim=NULL, xlab='Proportion', tracename=NULL, limitstracename='Limits', nonbigtracename='Stratified Estimates', dec=3, width=800, height=NULL, col=colorspace::rainbow_hcl ) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") mu <- markupSpecs$html bold <- mu$bold if(! length(xlim)) xlim <- c(min(c(x, lower), na.rm=TRUE), max(c(x, upper), na.rm=TRUE)) majorpres <- length(major) > 0 major <- if(majorpres) as.character(major) else rep(' ', length(x)) minorpres <- length(minor) > 0 if(! (majorpres || minorpres)) stop('must specify major or minor or both') grouppres <- length(group) > 0 ## superpositioning variable multpres <- length(mult) > 0 limspres <- length(lower) * length(upper) > 0 rgpres <- length(refgroup) > 0 if(minorpres) minor <- as.character(minor) if(grouppres) group <- as.character(group) if(multpres) mult <- as.character(mult) ugroup <- if(grouppres) unique(group) else '' fmt <- function(x) format(round(x, dec)) if(rgpres) { if(! grouppres || multpres || length(big)) stop('when refgroup is given, group must be given and mult and big must not be used') ## big=TRUE for non-stratified estimates if(length(ugroup) != 2) stop('refgroup only works for 2 groups') if(refgroup %nin% unique(group)) stop(paste('refgroup must be one of the following:', paste(unique(group), collapse=', '))) altgroup <- setdiff(ugroup, refgroup) } cols <- if(length(col)) { if(! is.function(col)) col else if(grouppres) col(length(unique(group))) else col(1) } if(! length(col) && ! grouppres) cols <- 'black' dropped <- character(0) D <- NULL if(rgpres && minorpres) { z <- pairUpDiff(x, major, minor, group, refgroup=refgroup, lower=lower, upper=upper, minkeep=minkeep, sortdiff=sortdiff, conf.int=conf.int) Z <- z$X D <- z$D dropped <- z$dropped i <- Z[,'subscripts'] x <- x[i] major <- major[i] minor <- minor[i] group <- group[i] if(length(num)) num <- num[i] if(length(denom)) denom <- denom[i] if(length(mult)) mult <- mult[i] if(length(big)) big <- big[i] if(length(lower)) lower <- lower[i] if(length(upper)) upper <- upper[i] if(length(htext)) htext <- htext[i] } # end if(rgpres && ... ht <- htext if(numlabel != '') numlabel <- paste0(' ', numlabel) if(denomlabel != '') denomlabel <- paste0(' ', denomlabel) if(length(num)) ht <- paste0(ht, if(length(htext)) mu$lspace, fmt(fun(x)), mu$lspace, mu$frac(paste0(num, numlabel), paste0(denom, denomlabel), size=95)) ## if confidence interval for differences are not to be displayed, ## put point estimate confidence intervals in point hypertext if(length(ugroup) != 2 && limspres) ht <- paste0(ht, ' [', fmt(fun(lower)), ', ', fmt(fun(upper)), ']') ht <- paste0(ht, if(length(ht)) '<br>', if(majorpres) paste0(major, ': ')) if(minorpres) ht <- paste0(ht, minor) if(grouppres) ht <- paste0(ht, '<br>', gsub(' stratified<br>by .*', '', group)) if(multpres) ht <- paste0(ht, '<br>', mult) n <- length(x) minor <- if(minorpres) minor else rep('', n) group <- if(grouppres) group else rep('', n) mult <- if(multpres) mult else rep('', n) if(! length(big)) big <- rep(TRUE, n) w <- c(length(x), length(major), length(minor), length(group), length(num), length(denom), length(mult), length(big), length(htext), length(lower), length(upper)) w <- w[w > 0] if(diff(range(w)) > 0) stop('x, major, minor, group, num, denom, mult, big, htext, lower, upper must all have same length when used') y <- 1 X <- Y <- Ydiff <- Lower <- Upper <- numeric(0) Group <- Htext <- Htextl <- character(0) Big <- logical(0) yl <- numeric(0); yt <- ytnb <- character(0) difflower <- diffupper <- Diff <- numeric(0) coldiff <- htdiff <- character(0) lines <- 0 for(ma in unique(major)) { y <- y - 1 yl <- c(yl, y) yt <- c(yt, bold(ma)) ytnb <- c(ytnb, ma) lminor <- unique(minor[major == ma]) y <- y + if(all(lminor == '')) 0.4 else 0 lines <- lines + 1 for(mi in lminor) { y <- y - 0.4 lines <- lines + (mi != '') j <- which(major == ma & minor == mi) X <- c(X, x[j]) Y <- c(Y, ifelse(big[j], y, y - .14)) if(length(D)) { k <- which(D$major == ma & D$minor == mi) if(length(k) != 1) stop('must have one observation for a major/minor/group combination') diff <- D$diff[k] coldiff <- c(coldiff, ifelse(diff > 0, paste0(altgroup, ' ', htmlTranslate('>'), ' ', refgroup), paste0(refgroup, ' ', htmlTranslate('>='), ' ', altgroup))) Ydiff <- c(Ydiff, y) htd <- if(majorpres) paste0(ma, ': ') else '' if(minorpres) htd <- paste0(htd, mi) htd <- paste0(htd, '<br>', altgroup, ' ', op, ' ', refgroup, ': ', fmt(fun(diff))) if(! is.logical(conf.int) && limspres && length(D)) { dlower <- fmt(fun(D$lower[k])) dupper <- fmt(fun(D$upper[k])) htd <- paste0(htd, '<br>', conf.int, ' C.L.: [', dlower, ', ', dupper, ']') Diff <- c(Diff, diff) difflower <- c(difflower, D$lowermid[k]) diffupper <- c(diffupper, D$uppermid[k]) } htdiff <- c(htdiff, htd) } # end if(length(D)) if(limspres && ! length(D)) { Lower <- c(Lower, lower[j]) Upper <- c(Upper, upper[j]) Htextl <- c(Htextl, paste0('[', fmt(fun(lower[j])), ', ', fmt(fun(upper[j])), ']' ) ) } Group <- c(Group, group[j]) Big <- c(Big, big[j]) Htext <- c(Htext, ht[j]) if(mi != '') { yl <- c(yl, y) yt <- c(yt, mi) ytnb <- c(ytnb, mi) } } # end mi in lminor } # end ma in lmajor d <- data.frame(X, Y, Group, Htext, Big) if(limspres && ! length(D)) { d$Lower <- Lower d$Upper <- Upper d$Htextl <- Htextl } if(! grouppres) d$Group <- NULL if(any(d$Big)) { db <- subset(d, Big) # non-stratified estimates ## For some reason, colors= in add_marker did not always take if(! length(height)) height <- plotlyParm$heightDotchart(lines) auto <- .Options$plotlyauto if(length(auto) && auto) height <- width <- NULL p <- plotly::plot_ly(data=db, colors=cols, height=height, width=width) if(limspres && length(D)) { ddiff <- data.frame(Diff, difflower, diffupper, Ydiff, coldiff, htdiff) ## Could not get color=coldiff to work; perhaps conflicted with ## earlier use of color = nDiff <- Diff[! is.na(Diff)] if(length(nDiff)) { if(any(nDiff > 0)) p <- plotly::add_segments(p, data=subset(ddiff, Diff > 0), x= ~ difflower, xend= ~ diffupper, y= ~ Ydiff, yend= ~ Ydiff, color = I('lightgray'), text = ~ htdiff, hoverinfo='text', name = paste0(htmlSpecial('half'), ' CL of difference<br>', coldiff[Diff > 0][1])) if(any(nDiff <= 0)) p <- plotly::add_segments(p, data=subset(ddiff, Diff <= 0), x= ~ difflower, xend= ~ diffupper, y= ~ Ydiff, yend= ~ Ydiff, color = I('lavender'), text = ~ htdiff, hoverinfo='text', name = paste0(htmlSpecial('half'), ' CL of difference<br>', coldiff[Diff <= 0][1])) } } ## tracename and limitstracename are used if groups not used if(limspres && ! length(D) && length(ugroup) == 2) p <- if(grouppres) plotly::add_segments(p, data=db, x=~ Lower, xend=~ Upper, y=~ Y, yend=~ Y, color=~ Group, text= ~ Htextl, colors=cols, hoverinfo='text') else plotly::add_segments(p, x=~ Lower, xend=~ Upper, y=~ Y, yend=~ Y, text= ~ Htextl, color=I('lightgray'), hoverinfo='text', name=limitstracename) p <- if(grouppres) plotly::add_markers(p, x=~ X, y=~ Y, color=~ Group, text=~ Htext, colors=cols, hoverinfo='text') else plotly::add_markers(p, x=~ X, y=~ Y, text=~ Htext, color=I('black'), hoverinfo='text', name=if(length(tracename)) tracename else if(any(! d$Big)) 'All' else '') } else p <- plotly::plot_ly(colors=cols) # Big is not used if(any(! d$Big)) { dnb <- subset(d, ! Big) # stratified estimates if(limspres) p <- if(grouppres && length(ugroup) == 2) plotly::add_segments(p, data=dnb, x=~ Lower, xend=~ Upper, y=~ Y, yend=~ Y, color=~ Group, text=~ Htextl, colors=cols, hoverinfo='text') else plotly::add_segments(p, data=dnb, x=~ Lower, xend=~ Upper, y=~ Y, yend=~ Y, text=~ Htextl, color=I('lightgray'), hoverinfo='text', name=limitstracename) dnb$sGroup <- paste0(dnb$Group, '\nby ', gsub('Stratified by\n', '', nonbigtracename)) ## Don't understand why if use ~ sGroup right below the symbols no ## longer appear in the legend for the non-stratified estimates p <- if(grouppres) plotly::add_markers(p, data=dnb, x=~ X, y=~ Y, color=~ Group, text=~ Htext, colors=cols, marker=list(opacity=0.45, size=4), hoverinfo='text') # name=nonbigtracename) else plotly::add_markers(p, data=dnb, x=~ X, y=~ Y, text=~ Htext, marker=list(opacity=0.45, size=4), color=I('black'), hoverinfo='text', name=nonbigtracename) } # end if(any(! Big)) leftmargin <- plotlyParm$lrmargin(ytnb) tickvals <- pretty(fun(xlim), n=10) xaxis <- list(title=xlab, range=xlim, zeroline=FALSE, tickvals=ifun(tickvals), ticktext=tickvals) yaxis <- list(title='', range=c(min(Y) - 0.2, 0.2), zeroline=FALSE, tickvals=yl, ticktext=yt) p <- plotly::layout(p, xaxis=xaxis, yaxis=yaxis, margin=list(l=leftmargin), legend=list(traceorder=if(length(difflower)) 'reversed' else 'normal')) attr(p, 'levelsRemoved') <- dropped p } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/discrete.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000002711�12250441224�013501� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������discrete <- function(x, levels=sort(unique.default(x), na.last=TRUE), exclude = NA) { if(!is.numeric(x)) { stop('x must be a numeric vairable') } exclude <- as.vector(exclude, typeof(x)) levels <- levels[is.na(match(levels, exclude))] f <- x[!(x %in% exclude)] attr(f, 'levels') <- levels class(f) <- "discrete" f } as.discrete <- function(x, ...) UseMethod("as.discrete") as.discrete.default <- function(x, ...) { if(is.discrete(x)) x else discrete(x) } is.discrete <- function(x) inherits(x, 'discrete') "[.discrete" <- function(x, ..., drop=FALSE) { y <- NextMethod("[") attr(y, 'levels') <- attr(x, 'levels') class(y) <- class(x) if( drop ) { factor(y) } else { y } } "[<-.discrete" <- function(x, ..., value) { lx <- levels(x) cx <- class(x) m <- match(value, lx) if (any(is.na(m) & !is.na(value))) { warning("invalid factor level, NAs generated") } class(x) <- NULL x[...] <- m attr(x,"levels") <- lx class(x) <- cx x } "[[.discrete" <- function(x, i) { y <- NextMethod("[[") attr(y,"levels")<-attr(x,"levels") class(y) <- class(x) y } "is.na<-.discrete" <- function(x, value) { lx <- levels(x) cx <- class(x) class(x) <- NULL x[value] <- NA structure(x, levels = lx, class = cx) } "length<-.discrete" <- function(x, value) { cl <- class(x) levs <- levels(x) x <- NextMethod() structure(x, levels=levs, class=cl) } �������������������������������������������������������Hmisc/R/summaryRc.s���������������������������������������������������������������������������������0000644�0001762�0000144�00000011553�12243661443�013676� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������summaryRc <- function(formula, data=NULL, subset=NULL, na.action=NULL, fun=function(x) x, na.rm=TRUE, ylab=NULL, ylim = NULL, xlim=NULL, nloc=NULL, datadensity=NULL, quant = c(0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95), quantloc = c('top', 'bottom'), cex.quant=.6, srt.quant=0, bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'), height.bpplot = 0.08, trim=NULL, test=FALSE, vnames=c('labels','names'), ...) { call <- match.call() quantloc <- match.arg(quantloc) vnames <- match.arg(vnames) bpplot <- match.arg(bpplot) if(bpplot == 'top') bpplot <- 'top inside' X <- match.call(expand.dots=FALSE) X$fun <- X$na.rm <- X$ylim <- X$xlim <- X$ylab <- X$nloc <- X$datadensity <- X$quant <- X$quantloc <- X$cex.quant <- X$srt.quant <- X$trim <- X$test <- X$vnames <- X$bpplot <- X$height.bpplot <- X$... <- NULL if(missing(na.action)) X$na.action <- na.retain Terms <- if(missing(data)) terms(formula, 'stratify') else terms(formula, 'stratify', data=data) X$formula <- Terms X[[1]] <- as.name("model.frame") X <- eval(X, sys.parent()) Terms <- attr(X, "terms") resp <- attr(Terms, "response") nact <- attr(X, "na.action") nvar <- ncol(X) - 1 strat <- attr(Terms, 'specials')$stratify getlab <- function(x, default) { if(vnames == 'names') return(default) lab <- attr(x, 'label') if(!length(lab) || lab=='') default else lab } if(length(strat)) { temp <- untangle.specials(Terms,'stratify') strat.name <- var.inner(Terms)[temp$terms] strat <- if(length(temp$vars) == 1) as.factor(X[[temp$vars]]) else stratify(X[,temp$vars]) strat.label <- getlab(X[, temp$vars[1]], strat.name) X[[temp$vars]] <- NULL # remove strata factors } Y <- X[[resp]] yname <- as.character(attr(Terms, 'variables'))[2] ylabel <- if(length(ylab)) ylab else getlab(Y, yname) X[[resp]] <- NULL # remove response var Y <- as.matrix(Y) s <- rowSums(is.na(Y)) == ncol(Y) nmissy <- sum(s) if(nmissy) { X <- X[!s,, drop=FALSE] Y <- Y[!s,, drop=FALSE] strat <- strat[!s] } pl <- function(x, y, strat=NULL, quant, bpplot, width.bpplot, xlab='', ylab='', ylim=NULL, xlim=NULL, fun=function(x) x, ...) { n <- sum(!is.na(x)) group <- if(length(strat)) strat else rep(1, length(x)) if(!length(trim)) trim <- if(n > 200) 10 / n else 0 if(!length(xlim)) { xlim <- if(trim == 0) range(x, na.rm=TRUE) else quantile(x, c(trim, 1 - trim), na.rm=TRUE) } a <- list(x=x, y=y, xlab=xlab, ylab=ylab, xlim=xlim, trim=0, group=group, datadensity=if(length(datadensity)) datadensity else length(strat) > 0, ...) if(length(fun)) a$fun <- fun if(length(ylim)) a$ylim <- ylim z <- do.call('plsmo', a) usr <- par('usr') xl <- usr[1:2] yl <- usr[3:4] if(! (length(nloc) && is.logical(nloc) && !nloc)) { if(length(nloc)) { xx <- nloc[[1]] yy <- nloc[[2]] xx <- xl[1] + xx * diff(xl) yy <- yl[1] + yy * diff(yl) w <- list(x=xx, y=yy) } else { xs <- unlist(lapply(z, function(x)x$x)) ys <- unlist(lapply(z, function(x)x$y)) w <- largest.empty(xs, ys, method='area') } text(w, paste('n=', n, sep=''), cex=.75, font=3, adj=.5) } # Compute user y-units per inch u <- diff(yl) / par('fin')[2] if(bpplot != 'none') { h <- u * height.bpplot yy <- switch(bpplot, 'top outside' = yl[2] + h/2 + u*.11, 'top inside' = yl[2] - h/2 - u*.11, bottom = yl[1] + h/2 + u*.11) panel.bpplot(x, yy, nogrid=TRUE, pch=19, cex.means=.6, height=h) } else if(length(quant)) { h <- u * .15 qu <- quantile(x, quant, na.rm=TRUE) names(qu) <- as.character(quant) qu <- pooleq(qu) yq <- if(quantloc == 'top') yl[2] else yl[1] arrows(qu, yq + h, qu, yq, col='blue', length=.05, xpd=NA) if(cex.quant > 0) text(qu, yq + 1.4 * h, names(qu), adj=if(srt.quant == 0) .5 else 0, cex=cex.quant, srt=srt.quant, xpd=NA) } ## text(xl[2], yl[2] + h/4, paste('n=', n, sep=''), ## cex=.75, font=3, adj=c(1,0), xpd=NA) } ## Find all ties in quantiles and build combined labels pooleq <- function(x) { w <- tapply(names(x), x, paste, collapse=', ') x <- as.numeric(names(w)) names(x) <- w x } i <- 0 nams <- names(X) for(v in nams) { i <- i + 1 x <- X[[v]] xlab <- getlab(x, nams[i]) units <- if(length(l <- attr(x,'units'))) l else '' xlab <- labelPlotmath(xlab, units) pl(x, Y, strat=strat, quant=quant, bpplot=bpplot, height.bpplot=height.bpplot, xlab=xlab, ylab=ylabel, ylim=ylim, xlim=xlim[[v]], ...) } } �����������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/ftu.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000003320�12243661443�012503� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##Dan Heitjan dheitjan@biostats.hmc.psu.edu ftupwr <- function(p1,p2,bign,r,alpha) { ## Compute the power of a two-sided level alpha test of the ## hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there are ## bign observations, bign/(1+r) in group 1 and r*bign/(1+r) in ## group 2. This is based on the two-tailed test version of ## formula (6) in Fleiss, Tytun and Ury (1980 Bcs 36, 343--346). ## This may be used for del not too small (del>=0.1) and r not ## too big or small (0.33<=r<=3). ## Daniel F. Heitjan, 30 April 1991 mstar <- bign/(r+1) del <- abs(p2-p1) rp1 <- r+1 zalp <- qnorm(1-alpha/2) pbar <- (p1+r*p2)/(1+r) qbar <- 1-pbar num <- (r*del^2*mstar-rp1*del)^0.5-zalp*(rp1*pbar*qbar)^0.5 den <- (r*p1*(1-p1)+p2*(1-p2))^0.5 zbet <- num/den pnorm(zbet) } ftuss <- function(p1,p2,r,alpha,beta) { ## Compute the approximate sample size needed to have power 1-beta ## for detecting significance in a two-tailed level alpha test of ## the hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there ## are to be m in group 1 and rm in group 2. The calculation is ## based on equations (3) and (4) of Fleiss, Tytun and Ury (1980 ## Bcs 36, 343--346). This is accurate to within 1% for ## moderately large values of del(p2-p1) (del>=0.1) and sample ## sizes that are not too disproportionate (0.5<=r<=2). ## Daniel F. Heitjan, 30 April 1991 zalp <- qnorm(1-alpha/2) zbet <- qnorm(1-beta) rp1 <- (r+1) pbar <- (p1+r*p2)/rp1 qbar <- 1-pbar q1 <- 1-p1 q2 <- 1-p2 del <- abs(p2-p1) num <- (zalp*(rp1*pbar*qbar)^0.5+zbet*(r*p1*q1+p2*q2)^0.5)^2 den <- r*del^2 mp <- num/den m <- 0.25*mp*(1+(1+2*rp1/(r*mp*del))^0.5)^2 list(n1=floor(m+1),n2=floor(m*r+1)) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/panel.abwplot.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000003446�12243661443�014464� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������if(FALSE) { panel.abwplot <- function(x, y, box.ratio = 1, means=TRUE, font = box.dot$font, pch = box.dot$pch, cex = box.dot$cex, col = box.dot$col, ...) { ok <- !is.na(x) & !is.na(y) x <- x[ok] y <- y[ok] y.unique <- sort(unique(y)) width <- box.ratio/(1 + box.ratio) w <- width/2 lineopts <- trellis.par.get("box.rectangle") for(Y in y.unique) { X <- x[y == Y] q <- quantile(X, c(.01,.05,.1,.25,.75,.9,.95,.99,.5)) median.value <- list(x = q[9], y = Y) z <- c(1, .01, 2, .01, 2, .05, 3, .05, 3, .10, 4, .10, 4, .25, 5, .25, 5, .10, 6, .10, 6, .05, 7, .05, 7, .01, 8, .01, 8,-.01, 7,-.01, 7,-.05, 6,-.05, 6,-.10, 5,-.10, 5,-.25, 4,-.25, 4,-.10, 3,-.10, 3,-.05, 2,-.05, 2,-.01, 1,-.01, 1, .01) box.dot <- trellis.par.get("box.dot") box.dot.par <- c(list(pch = pch, cex = cex, col = col, font = font), ...) do.call('lines',c(list(x=q[z[seq(1,length(z),by=2)]], y=Y + 4*w*z[seq(2,length(z),by=2)]),lineopts)) ##do.call('segments',c(list(x1=q[c(2:7)],y1=Y+rep(-w,6), ## x2=q[c(2:7)],y2=Y+rep(w,6)), ## lineopts)) do.call("points", c(median.value, box.dot.par)) if(means) do.call('lines',c(list(x=rep(mean(X),2),y=Y+c(-w,w)), lineopts, lty=2)) } } NULL } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/gettext.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000003163�12243661443�013376� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������### These are function that are designed to compatibility with S-plus ### for R internationalization. They are named with a prefix of ### "Splus". ### ### These functions contain representations of sprintf, gettext, ### gettextf, and ngettext if(!exists("sprintf")) sprintf <- function(fmt, ...) { ldots <- list(...) text <- vector("character") vars <- vector("character") i <- 1; j <- 1; temp <- fmt while (nchar(temp)) { ne <- regexpr('(?<!%)%[^%]*?[dixXfeEgGs]', temp, perl=TRUE) if( ne < 0 ) { text[i] <- gsub('%%', '%', temp) temp <- "" } else { text[i] <- gsub('%%', '%', substr(temp, 0, ne-1)) i <- i + 1 vars[j] <- substr(temp, ne+1, ne+attr(ne, "match.length")-1) j <- j + 1 temp <- substr(temp, ne+attr(ne, "match.length"), nchar(temp)) } } output <- NULL j <- 1 for( i in 1:(length(text) - 1)) { output <- paste(output, text[i], sep='') if(regexpr('^\\d+\\$', vars[i], perl=TRUE) > 0){ arg <- sub('^(\\d+)\\$.*$', '\\1', vars[i], perl=TRUE) if(arg > 0 && arg < length(ldots)) { val <- as.integer(arg) } else stop("Error") } else { val <- j j <- j + 1 } output <- paste(output, ldots[[val]], sep='') } return(paste(output, text[length(text)], sep='')) } if(!exists("gettext")) gettext <- function(..., domain=NULL) return(unlist(list(...))) if(!exists("gettextf")) gettextf <- function(fmt, ..., domain=NULL) { return(sprintf(fmt, ...)) } if(!exists("ngettext")) ngettext <- function(n, msg1, msg2, domain = NULL) { if(n == 1) return(msg1) return(msg2) } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/na.detail.response.s������������������������������������������������������������������������0000644�0001762�0000144�00000002450�12243661443�015404� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������na.detail.response <- function(mf) { if(is.null(z <- .Options$na.detail.response) || !z) return(NULL) response <- model.extract(mf, response) if(is.null(response)) return(NULL) if(!is.matrix(response)) response <- as.matrix(response) GFUN <- options()$na.fun.response if(is.null(GFUN)) GFUN <- function(x, ...) { if(is.matrix(x)) x <- x[,ncol(x)] x <- x[!is.na(x)] c(N=length(x),Mean=mean(x)) } else GFUN <- eval.parent(as.name(GFUN)) w <- NULL; nam <- names(mf); wnam <- NULL N <- nrow(mf) p <- ncol(mf) omit <- rep(FALSE, N) for(i in 2:p) { x <- mf[,i] if(is.matrix(x)) x <- x[,1] isna <- is.na(x) omit <- omit | isna nmiss <- sum(isna) if(nmiss) { w <- cbind(w, GFUN(response[isna,])) wnam <- c(wnam, paste(nam[i],"=NA",sep="")) } n <- N-nmiss if(n) { w <- cbind(w, GFUN(response[!isna,])) wnam <- c(wnam, paste(nam[i],"!=NA",sep="")) } } ## summarize responce for ANY x missing if(p>2) { nmiss <- sum(omit) if(nmiss) { w <- cbind(w, GFUN(response[omit,])) wnam <- c(wnam, "Any NA") } if(N-nmiss) { w <- cbind(w, GFUN(response[!omit,])) wnam <- c(wnam, "No NA") } } dimnames(w)[[2]] <- wnam w } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/tabulr.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000015572�14112727067�013214� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tabulr <- function(formula, data=NULL, nolabel=NULL, nofill=NULL, ...) { ## require(gsubfn) || stop('package gsubfn not installed') if (!requireNamespace("tables", quietly = TRUE)) stop("This function requires the 'tables' package.") if(!length(data)) data <- environment(formula) else if(is.list(data)) data <- list2env(data, parent=environment(formula)) ## f <- as.character(deparse(formula)) # lab <- function(x, hfill=TRUE) { ## x <- gsub('^ +', '', x) ## x <- gsub(' +$', '', x) # l <- labelLatex(get(x, envir=data), default=x, double=TRUE, hfill=hfill) # paste("Heading('", l, "')*", x, sep='') # } lab <- function(x) { x <- deparse(x) if(x == 'trio') return('table_trio') if(x == 'freq') return('table_freq') if(x == 'N') return('Heading()*table_N') if(! (exists(x, envir=data, mode='numeric') | exists(x, envir=data, mode='character'))) return(x) if(length(nolabel) && x %in% all.vars(nolabel)) return(x) xval <- get(x, envir=data) if(label(xval) == '') return(x) l <- labelLatex(xval, double=FALSE, hfill=!length(nofill) || x %nin% all.vars(nofill)) paste("Heading('", l, "')*", x, sep='') } # f <- gsubfn("\\.\\((.*?)\\)", ~ lab(x), f) # f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f) # f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f) # f <- gsubfn('([ \\(]+)l \\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)', # ~ paste(x, lab(y), sep=''), f) # f <- gsubfn('([ \\(]+)l\\. +\\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)', # ~ paste(x, lab(y, hfill=FALSE), sep=''), f) ## A variable is a string of characters, _, . not starting with 0-9 ## delimited by # f <- gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b') # gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b*dd + f==h' # f <- gsubfn( "([a-zA-Z_\\.][a-zA-Z0-9_\\.]*)((?=\\s*[-+~)*])|\\s*$)", # ~ paste0(toupper(x),'z'), f, perl=TRUE ) # From Bill Dunlap ff <- function(expr, convertName) { if (is.call(expr) && is.name(expr[[1]]) && is.element(as.character(expr[[1]]), c("~","+","-","*","/","%in%","%nin%","(", ":"))) { for(i in seq_along(expr)[-1]) expr[[i]] <- Recall(expr[[i]], convertName = convertName) } else if (is.name(expr)) expr <- as.name(convertName(expr)) expr } f <- ff(formula, lab) f <- as.formula(gsub("`", "", as.character(deparse(f)))) result <- tables::tabular(f, data=data, ...) attr(result, 'originalformula') <- formula result } table_trio <- function(x) { if (!requireNamespace("tables", quietly = TRUE)) stop("This function requires the 'tables' package.") o <- tables::table_options() s <- function(x, default) if(length(x)) x else default left <- s(o$left, 3) right <- s(o$right, 1) prmsd <- s(o$prmsd, FALSE) pn <- s(o$pn, FALSE) pnformat <- s(o$pnformat, "n") pnwhen <- s(o$pnwhen, "all") bold <- s(o$bold, FALSE) isna <- is.na(x) x <- x[!isna] if(!length(x)) return('') qu <- quantile(x, (1:3)/4) w <- paste('{\\smaller ', nFm(qu[1], left, right), '} ', if(bold) '\\textbf{', nFm(qu[2], left, right), if(bold) '}', ' {\\smaller ', nFm(qu[3], left, right), '}', sep='') if(pnwhen == 'ifna' && !any(isna)) pn <- FALSE if(prmsd || pn) { w <- paste(w, '~{\\smaller (', sep='') if(prmsd) w <- paste(w, nFm(mean(x), left, right), '$\\pm$', nFm(sd(x), left, right), sep='') if(pn) w <- paste(w, if(prmsd)' ', '$', if(pnformat == 'n') 'n=', length(x), '$', sep='') w <- paste(w, ')}', sep='') } w } table_N <- function(x) paste('{\\smaller $n=', length(x), '$}', sep='') nFm <- function(x, left, right, neg=FALSE, pad=FALSE, html=FALSE) { tot <- if(right == 0) left + neg else left + right + neg + 1 fmt <- paste('%', tot, '.', right, 'f', sep='') x <- sprintf(fmt, x) if(pad) x <- gsub(' ', if(html) '' else '~', x) x } table_freq <- function(x) { if(!length(x) || all(is.na(x))) return('') if (!requireNamespace("tables", quietly = TRUE)) stop("This function requires the 'tables' package.") w <- table(x) den <- sum(w) to <- tables::table_options() showfreq <- to$showfreq if(!length(showfreq)) showfreq <- 'all' pctdec <- to$pctdec if(!length(pctdec)) pctdec <- 0 i <- switch(showfreq, all = 1:length(w), high = which(w == max(w)), low = which(w == min(w))) m <- w[i] fpct <- table_formatpct(m, den) if(showfreq == 'all') { z <- paste(names(m), '\\hfill', fpct, sep='') z <- paste(z, collapse='\\\\', sep='') len <- max(nchar(names(m))) + 9 + pctdec + 1 * (pctdec > 0) z <- paste('\\parbox{', len, 'ex}{\\smaller ', z, '}', sep='') return(z) } lab <- paste(names(m), collapse=', ') num <- m[1] paste(lab, ':', table_formatpct(num, den), sep='') } table_pc <- function(x, y) { maxn <- max(length(x), length(y)) maxdig <- 1L + floor(log10(maxn)) num <- if(all(is.na(x))) length(x) else if(is.logical(x)) sum(x) else sum(x %in% c('yes','Yes')) den <- if(all(is.na(y))) length(y) else sum(!is.na(y)) prn(c(num,den)); prn(table(x, exclude=NULL)); prn(table(y, exclude=NULL)) table_formatpct(num, den) } table_formatpct <- function(num, den) { if(den == 0 | all(is.na(num + den))) return('') if (!requireNamespace("tables", quietly = TRUE)) stop("This function requires the 'tables' package.") to <- tables::table_options() npct <- to$npct pctdec <- to$pctdec if(!length(pctdec)) pctdec <- 0 if(!length(npct)) npct <- 'both' poss <- c('numerator', 'denominator', 'both', 'none') i <- charmatch(npct, poss) if(is.na(i)) stop('in table_options(npct=) npct must be "numerator", "denominator", "both", or "none"') npct <- poss[i] z <- paste(nFm(100 * num / den, 3, pctdec), '\\%', sep='') if(npct == 'none') return(z) if(npct == 'both') return(paste(z, '{\\smaller[2] $\\frac{', num, '}{', den, '}$}', sep='')) paste(z, '{\\smaller (', if(npct == 'numerator') num else den, ')}', sep='') } table_latexdefs <- function(file='') { ct <- function(...) cat(..., file=file) ct('\\makeatletter\n', '\\def\\blfootnote{\\xdef\\@thefnmark{}\\@footnotetext}\n', '\\makeatother\n') ct('\\def\\keytrio{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$.}}\n') ct('\\def\\keytriomsd{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$. $x \\pm s$ represents the mean and standard deviation.}}\n') invisible() } ��������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/cut2.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000011214�13412473156�012564� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id$ ## Function like cut but left endpoints are inclusive and labels are of ## the form [lower, upper), except that last interval is [lower,upper]. ## F. Harrell 3 Dec 90, modified 7 Mar 92, mod 30May95 (more efficient digits) ## Modified 2Jun95 (preserve label attribute) ## Modified 16Jun95 (categories with 1 unique value -> label=value, not interval) ## Modified 1Jul95 - if specified cuts, mindif would cause improper ## categorization if a cut was close to but not equal an actual value ## Modified 21oct18 - added formatfun cut2 <- function(x, cuts, m=150, g, levels.mean=FALSE, digits, minmax=TRUE, oneval=TRUE, onlycuts=FALSE, formatfun = format, ...) { if (inherits(formatfun, "formula")) { if (!requireNamespace("rlang")) stop("Package 'rlang' must be installed to use formula notation") formatfun <- getFromNamespace('as_function', 'rlang')(formatfun) } method <- 1 ## 20may02 x.unique <- sort(unique(c(x[!is.na(x)],if(!missing(cuts))cuts))) min.dif <- min(diff(x.unique))/2 min.dif.factor <- 1 ## Make formatted values look good if(missing(digits)) digits <- if(levels.mean) 5 else 3 ## add digits to formatfun's arguments if relevant format.args <- if (any(c("...","digits") %in% names(formals(args(formatfun))))) { c(digits = digits, list(...)) } else { list(...) } oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) xlab <- attr(x, 'label') if(missing(cuts)) { nnm <- sum(!is.na(x)) if(missing(g)) g <- max(1,floor(nnm/m)) if(g < 1) stop('g must be >=1, m must be positive') options(digits=15) n <- table(x) xx <- as.double(names(n)) options(digits=digits) cum <- cumsum(n) m <- length(xx) y <- as.integer(ifelse(is.na(x),NA,1)) labs <- character(g) cuts <- approx(cum, xx, xout=(1:g)*nnm/g, method='constant', rule=2, f=1)$y cuts[length(cuts)] <- max(xx) lower <- xx[1] upper <- 1e45 up <- low <- double(g) i <- 0 for(j in 1:g) { cj <- if(method==1 || j==1) cuts[j] else { if(i==0) stop('program logic error') s <- if(is.na(lower)) FALSE else xx >= lower cum.used <- if(all(s)) 0 else max(cum[!s]) if(j==m) max(xx) else if(sum(s)<2) max(xx) else approx(cum[s]-cum.used, xx[s], xout=(nnm-cum.used)/(g-j+1), method='constant', rule=2, f=1)$y } if(cj==upper) next i <- i + 1 upper <- cj y[x >= (lower-min.dif.factor*min.dif)] <- i low[i] <- lower lower <- if(j==g) upper else min(xx[xx > upper]) if(is.na(lower)) lower <- upper up[i] <- lower } low <- low[1:i] up <- up[1:i] variation <- logical(i) for(ii in 1:i) { r <- range(x[y==ii], na.rm=TRUE) variation[ii] <- diff(r) > 0 } if(onlycuts) return(unique(c(low, max(xx)))) flow <- do.call(formatfun,c(list(low), format.args)) fup <- do.call(formatfun,c(list(up), format.args)) bb <- c(rep(')',i-1),']') labs <- ifelse(low==up | (oneval & !variation), flow, paste('[',flow,',',fup,bb,sep='')) ss <- y==0 & !is.na(y) if(any(ss)) stop(paste('categorization error in cut2. Values of x not appearing in any interval:\n', paste(format(x[ss],digits=12),collapse=' '), '\nLower endpoints:', paste(format(low,digits=12), collapse=' '), '\nUpper endpoints:', paste(format(up,digits=12),collapse=' '))) y <- structure(y, class='factor', levels=labs) } else { if(minmax) { r <- range(x, na.rm=TRUE) if(r[1]<cuts[1]) cuts <- c(r[1], cuts) if(r[2]>max(cuts)) cuts <- c(cuts, r[2]) } l <- length(cuts) k2 <- cuts-min.dif k2[l] <- cuts[l] y <- cut(x, k2) if(!levels.mean) { brack <- rep(")",l-1) brack[l-1] <- "]" fmt <- do.call(formatfun,c(list(cuts), format.args)) ## If any interval has only one unique value, set label for ## that interval to that value and not to an interval labs <- paste("[",fmt[1:(l-1)],",",fmt[2:l], brack,sep="") if(oneval) { nu <- table(cut(x.unique,k2)) if(length(nu)!=length(levels(y))) stop('program logic error') levels(y) <- ifelse(nu==1,c(fmt[1:(l-2)],fmt[l]),labs) } else levels(y) <- labs } } if(levels.mean) { means <- tapply(x, y, function(w)mean(w,na.rm=TRUE)) levels(y) <- do.call(formatfun,c(list(means), format.args)) } attr(y,'class') <- "factor" if(length(xlab)) label(y) <- xlab y } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/plot.describe.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000027715�14247426647�014473� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������plot.describe <- function(x, which=c('both', 'continuous', 'categorical'), what=NULL, sort=c('ascending', 'descending', 'none'), n.unique=10, digits=5, bvspace=2, ...) { which <- match.arg(which) pty <- grType() == 'plotly' && requireNamespace("plotly") auto <- .Options$plotlyauto auto <- length(auto) && auto if(length(what)) x <- x[what] sort <- match.arg(sort) specs <- if(pty) markupSpecs$html else markupSpecs$plain if(bvspace == 1) stop('bvspace may not be 1.0') format_counts <- function(s) { bl <- ' ' na <- substring(paste(names(s), bl, sep=''), 1, max(nchar(names(s)))) va <- paste(substring(bl, 1, max(nchar(s)) - nchar(s) + 1), s, sep='') zz <- paste(na, va, sep=':') gsub(' ', specs$space, zz) } fmtlab <- function(x) { lab <- sub('^.*:', '', x$descript) if(length(x$units)) lab <- specs$varlabel(lab, x$units) lab } ge <- function(i) unlist(lapply(w, function(x) x[[i]])) if(which != 'continuous') { f <- function(x) { s <- x$counts v <- x$values type <- if('Sum' %in% names(s)) 'binary' else if(length(v) && is.list(v) && all(names(v) == c('value', 'frequency')) && is.character(v$value) && length(v$value) <= 20) 'categorical' else 'none' if(type == 'none') return(data.frame(prop=numeric(0), text=character(0))) n <- as.numeric(s['n']) if(type == 'binary') { val <- '' freq <- as.numeric(s['Sum']) } else { val <- v$value freq <- v$frequency } category <- if(type == 'categorical') val else '' text <- paste(category, if(type == 'categorical') ' ', round(freq / n, 3), specs$frac(freq, n, size=90)) ## Details about variable for minimum frequency category left <- which.min(freq)[1] text[left] <- paste(c(fmtlab(x), text[left], format_counts(s)), collapse=specs$br) y <- if(type == 'binary') 0 else 1 : length(freq) y <- c(bvspace, rep(1, length(freq) - 1)) ## extra 1 for between-variable spacing j <- switch(sort, ascending = order(freq), descending= order(-freq), none = TRUE) data.frame(prop=freq[j] / n, y=y, text=I(text[j]), category=I(category[j]), missing=as.numeric(s['missing']), nlev=length(freq)) } w <- lapply(x, f) nam <- names(w) for(na in nam) { l <- length(w[[na]]$prop) w[[na]]$xname <- if(l) rep(na, l) else character(0) } if(length(ge('xname')) == 0) { warning('no categorical variables found') pcat <- NULL } else { z <- data.frame(xname = I(ge('xname')), Proportion = ge('prop'), y = ge('y'), text = I(ge('text')), category = I(ge('category')), Missing = ge('missing'), nlev = ge('nlev')) un <- unique(z$xname) nv <- length(un) z$xnamef <- factor(z$xname, levels=un) z$xnamen <- as.integer(z$xnamef) - z$y / pmax(0.7 * nv, z$nlev) z$cumy <- cumsum(z$y) if(! pty) z$cumy <- - z$cumy tly <- z$cumy[z$y == bvspace] if(! pty) { r <- range(z$Proportion) z$proplev <- r[2] + .2 pcat <- if(any(z$Missing > 0)) ggplot(z, aes(text=text)) + geom_point(aes(x=Proportion, y=cumy, color=Missing)) + geom_text(aes(x=proplev, y=cumy, label=category), size=2.5, hjust=1) + scale_y_continuous(breaks=tly, labels=un) + scale_x_continuous(breaks=pretty(r)) + scale_color_gradientn(colors=viridis::viridis(10)) + ylab(NULL) + theme(panel.grid.minor.y = element_blank()) else ggplot(z, aes(text=text)) + geom_point(aes(x=Proportion, y=cumy)) + geom_text(aes(x=proplev, y=cumy, label=category), size=2.5, hjust=1) + scale_y_continuous(breaks=tly, labels=un) + scale_x_continuous(breaks=pretty(r)) + ylab(NULL) + theme(panel.grid.minor.y = element_blank()) } else { z$proplev <- 1.15 pcat <- if(any(z$Missing > 0)) plotly::plot_ly(z, x = ~ Proportion, y= ~ cumy, text= ~ text, color=~ Missing, mode='markers', hoverinfo='text', type='scatter', name='', height=if(! auto) plotlyParm$heightDotchart(nrow(z))) else plotly::plot_ly(z, x=~ Proportion, y=~ cumy, text=~ text, mode='markers', hoverinfo='text', type='scatter', name='', height=if(! auto) plotlyParm$heightDotchart(nrow(z))) pcat <- plotly::add_trace(pcat, data=z, x=~ proplev, y=~ cumy, text=~ category, mode='text', textposition='left', textfont=list(size=9), hoverinfo='none', name='Levels') tl <- seq(0, 1, by=0.05) ## X tick mark labels for multiples of 0.1 tt <- ifelse(tl %% 0.1 == 0, as.character(tl), '') tly <- z$cumy[z$y == bvspace] pcat <- plotly::layout(pcat, xaxis=list(range=c(0,1.15), zeroline=TRUE, tickvals=tl, ticktext=tt, title='Proportion'), yaxis=list(title='', autorange='reversed', tickvals=tly, ticktext=un), margin=list(l=plotlyParm$lrmargin(un))) } } } if(which != 'categorical') { f <- function(x) { s <- x$counts v <- x$values isn <- function(z) is.numeric(z) || testDateTime(z, 'either') if(! (as.numeric(s['distinct']) >= n.unique && length(v) && is.list(v) && all(names(v) == c('value', 'frequency')) && isn(v$value)) ) return(data.frame(X=numeric(0), count=numeric(0), text=character(0))) X <- v$value Y <- v$frequency Xn <- as.numeric(X) text <- paste(format(X, digits=digits), ' (n=', Y, ')', sep='') X <- (Xn - Xn[1]) / diff(range(Xn)) zz <- format_counts(s) lab <- fmtlab(x) text[1] <- paste(c(lab, text[1], zz), collapse='<br>') ## Note: plotly does not allow html tables as hover text m <- rep(as.numeric(s['missing']), length(X)) list(X=X, prop=Y / sum(Y), text=I(text), missing=m) } w <- lapply(x, f) nam <- names(w) for(n in nam) { l <- length(w[[n]]$X) w[[n]]$xname <- if(l) rep(n, l) else character(0) } if(length(ge('xname')) == 0) { warning('no continuous variables found') pcon <- NULL } else { z <- data.frame(xname = I(ge('xname')), X = ge('X'), Proportion = round(ge('prop'), 4), text = I(ge('text')), Missing = ge('missing')) z <- z[nrow(z) : 1, ] # so plotly will keep right ordering unam <- unique(z$xname) z$yy <- match(as.character(z$xname), unam) ## Scale Proportion so that max over all variables is 0.9 z$Proportion <- 0.9 * z$Proportion / max(z$Proportion) g <- if(any(z$Missing > 0)) ggplot(z, aes(text=text)) + geom_segment(aes(x=X, xend=X, y=yy, yend=yy + Proportion, color=Missing)) + scale_y_continuous(breaks=1 : length(unam), labels=unam) + scale_color_gradientn(colors=viridis::viridis(10)) + xlab(NULL) + ylab(NULL) + theme(axis.text.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), axis.ticks.x = element_blank()) else ggplot(z, aes(text=text)) + geom_segment(aes(x=X, xend=X, y=yy, yend=yy + Proportion)) + scale_y_continuous(breaks=1 : length(unam), labels=unam) + xlab(NULL) + ylab(NULL) + theme(axis.text.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), axis.ticks.x = element_blank()) ## ggplotly would not hover text at x=0 when height < 350 px curtail <- function(x) min(1000, max(x, 350)) pcon <- if(! pty) g else plotly::ggplotly(g, tooltip='text', width=if(! auto) 800, height=if(! auto) curtail(60 + 25 * length(unam))) ## If don't run plot_ly, hovering will pop up all vertical points # pcon <- if(any(z$missing > 0)) # plotly::plot_ly(z, x=X, y=yy, mode='markers', # color=missing, colors=colors, name='') # else # plotly::plot_ly(z, x=X, y=yy, mode='none', name='') # # pcon <- with(z, histSpikep(NULL, x=X, y=yy, z=Proportion, # hovertext=text, tracename='', # color=if(any(missing > 0)) missing, # colors=colors)) ## Note: plotly does not allow font, color, size changes for hover text # pcon <- if(any(z$missing > 0)) # plotly::plot_ly(z, x=X, y=xname, size=Proportion, text=text, # color=missing, mode='markers', # marker=list(symbol='line-ns-open'), # type='scatter', hoverinfo='text') # else # plotly::plot_ly(z, x=X, y=xname, size=Proportion, # text=text, # mode='markers', # marker=list(symbol='line-ns-open'), # type='scatter', hoverinfo='text') # # maxlen <- max(nchar(as.character(unam))) # pcon <- plotly::layout(pcon, xaxis=list(title='', showticklabels=FALSE, # zeroline=FALSE, showgrid=FALSE), # yaxis=list(title='', # tickvals=1 : length(unam), # ticktext=unam), # margin=list(l=max(70, maxlen * 6)), # autosize=TRUE, evaluate=TRUE) } } if(which == 'both') { if(! length(pcat)) return(pcon) if(! length(pcon)) return(pcat) } ## knitr.in.progress not set to TRUE unless explicitly select menu ## choice knit to html. Even when properly set, resulting html ## file was messed up as plotly widgets were duplicated switch(which, categorical = pcat, continuous = pcon, both = if(FALSE && pty && isTRUE(getOption('knitr.in.progress'))) htmltools::tagList(list(plotly::as.widget(pcat), plotly::as.widget(pcon))) else list(Categorical=pcat, Continuous=pcon)) } ## Some of these are for dotchart3.s utils::globalVariables(c('X', 'Proportion', 'xname', 'cumy', 'proplev', 'category', 'xb', 'Missing', 'yy')) ���������������������������������������������������Hmisc/R/strgraphwrap.s������������������������������������������������������������������������������0000644�0001762�0000144�00000005760�12243661443�014443� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������strgraphwrap <- function (x, width = 0.9 * getOption("width"), indent = 0, exdent = 0, prefix = "", simplify = TRUE, units='user', cex=NULL) { if (!is.character(x)) x <- as.character(x) spc.len <- strwidth(" ", units=units, cex=cex) prefix.len <- strwidth(prefix, units = units, cex=cex) indentString <- paste(rep.int(" ", indent), collapse = "") indent <- indent * spc.len exdentString <- paste(rep.int(" ", exdent), collapse = "") exdent <- exdent * spc.len y <- list() z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]") for (i in seq_along(z)) { yi <- character(0) for (j in seq_along(z[[i]])) { words <- z[[i]][[j]] nc <- strwidth(words, units=units, cex=cex) if (any(is.na(nc))) { nc0 <- strwidth(words, units=units, cex=cex) nc[is.na(nc)] <- nc0[is.na(nc)] } if (any(nc == 0)) { zLenInd <- which(nc == 0) zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$", words) + 1))] if (length(zLenInd) > 0) { words <- words[-zLenInd] nc <- nc[-zLenInd] } } if (length(words) == 0) { yi <- c(yi, "", prefix) next } currentIndex <- 0 lowerBlockIndex <- 1 upperBlockIndex <- integer(0) lens <- cumsum(nc + spc.len) first <- TRUE maxLength <- width - prefix.len - indent while (length(lens) > 0) { k <- max(sum(lens <= maxLength), 1) if (first) { first <- FALSE maxLength <- maxLength + indent - exdent } currentIndex <- currentIndex + k if (nc[currentIndex] == 0) upperBlockIndex <- c(upperBlockIndex, currentIndex - 1) else upperBlockIndex <- c(upperBlockIndex, currentIndex) if (length(lens) > k) { if (nc[currentIndex + 1] == 0) { currentIndex <- currentIndex + 1 k <- k + 1 } lowerBlockIndex <- c(lowerBlockIndex, currentIndex + 1) } if (length(lens) > k) lens <- lens[-(1:k)] - lens[k] else lens <- NULL } nBlocks <- length(upperBlockIndex) s <- paste(prefix, c(indentString, rep.int(exdentString, nBlocks - 1)), sep = "") for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]], collapse = " "), sep = "") yi <- c(yi, s, prefix) } y <- if (length(yi)) c(y, list(yi[-length(yi)])) else c(y, "") } if (simplify) y <- unlist(y) y } ����������������Hmisc/R/wtd.stats.s���������������������������������������������������������������������������������0000644�0001762�0000144�00000016134�13360354736�013654� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## See stackoverflow.com/questions/10049402 wtd.mean <- function(x, weights=NULL, normwt='ignored', na.rm=TRUE) { if(! length(weights)) return(mean(x, na.rm=na.rm)) if(na.rm) { s <- ! is.na(x + weights) x <- x[s] weights <- weights[s] } sum(weights * x) / sum(weights) } wtd.var <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE, method = c('unbiased', 'ML')) ## By Benjamin Tyner <btyner@gmail.com> 2017-0-12 { method <- match.arg(method) if(! length(weights)) { if(na.rm) x <- x[!is.na(x)] return(var(x)) } if(na.rm) { s <- !is.na(x + weights) x <- x[s] weights <- weights[s] } if(normwt) weights <- weights * length(x) / sum(weights) if(normwt || method == 'ML') return(as.numeric(stats::cov.wt(cbind(x), weights, method = method)$cov)) # the remainder is for the special case of unbiased frequency weights sw <- sum(weights) if(sw <= 1) warning("only one effective observation; variance estimate undefined") xbar <- sum(weights * x) / sw sum(weights*((x - xbar)^2)) / (sw - 1) } wtd.quantile <- function(x, weights=NULL, probs=c(0, .25, .5, .75, 1), type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'), normwt=FALSE, na.rm=TRUE) { if(! length(weights)) return(quantile(x, probs=probs, na.rm=na.rm)) type <- match.arg(type) if(any(probs < 0 | probs > 1)) stop("Probabilities must be between 0 and 1 inclusive") nams <- paste(format(round(probs * 100, if(length(probs) > 1) 2 - log10(diff(range(probs))) else 2)), "%", sep = "") i <- is.na(weights) | weights == 0 if(any(i)) { x <- x[! i] weights <- weights[! i] } if(type == 'quantile') { w <- wtd.table(x, weights, na.rm=na.rm, normwt=normwt, type='list') x <- w$x wts <- w$sum.of.weights n <- sum(wts) order <- 1 + (n - 1) * probs low <- pmax(floor(order), 1) high <- pmin(low + 1, n) order <- order %% 1 ## Find low and high order statistics ## These are minimum values of x such that the cum. freqs >= c(low,high) allq <- approx(cumsum(wts), x, xout=c(low,high), method='constant', f=1, rule=2)$y k <- length(probs) quantiles <- (1 - order)*allq[1:k] + order*allq[-(1:k)] names(quantiles) <- nams return(quantiles) } w <- wtd.Ecdf(x, weights, na.rm=na.rm, type=type, normwt=normwt) structure(approx(w$ecdf, w$x, xout=probs, rule=2)$y, names=nams) } wtd.Ecdf <- function(x, weights=NULL, type=c('i/n','(i-1)/(n-1)','i/(n+1)'), normwt=FALSE, na.rm=TRUE) { type <- match.arg(type) switch(type, '(i-1)/(n-1)'={a <- b <- -1}, 'i/(n+1)' ={a <- 0; b <- 1}, 'i/n' ={a <- b <- 0}) if(! length(weights)) { ##.Options$digits <- 7 ## to get good resolution for names(table(x)) oldopt <- options('digits') options(digits=7) on.exit(options(oldopt)) cumu <- table(x) ## R does not give names for cumsum isdate <- testDateTime(x) ## 31aug02 ax <- attributes(x) ax$names <- NULL x <- as.numeric(names(cumu)) if(isdate) attributes(x) <- c(attributes(x),ax) cumu <- cumsum(cumu) cdf <- (cumu + a)/(cumu[length(cumu)] + b) if(cdf[1]>0) { x <- c(x[1], x); cdf <- c(0,cdf) } return(list(x = x, ecdf=cdf)) } w <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm) cumu <- cumsum(w$sum.of.weights) cdf <- (cumu + a)/(cumu[length(cumu)] + b) list(x = c(if(cdf[1]>0) w$x[1], w$x), ecdf=c(if(cdf[1]>0)0, cdf)) } wtd.table <- function(x, weights=NULL, type=c('list','table'), normwt=FALSE, na.rm=TRUE) { type <- match.arg(type) if(! length(weights)) weights <- rep(1, length(x)) isdate <- testDateTime(x) ## 31aug02 + next 2 ax <- attributes(x) ax$names <- NULL if(is.character(x)) x <- as.factor(x) lev <- levels(x) x <- unclass(x) if(na.rm) { s <- ! is.na(x + weights) x <- x[s, drop=FALSE] ## drop is for factor class weights <- weights[s] } n <- length(x) if(normwt) weights <- weights * length(x) / sum(weights) i <- order(x) # R does not preserve levels here x <- x[i]; weights <- weights[i] if(anyDuplicated(x)) { ## diff(x) == 0 faster but doesn't handle Inf weights <- tapply(weights, x, sum) if(length(lev)) { levused <- lev[sort(unique(x))] if((length(weights) > length(levused)) && any(is.na(weights))) weights <- weights[! is.na(weights)] if(length(weights) != length(levused)) stop('program logic error') names(weights) <- levused } if(! length(names(weights))) stop('program logic error') if(type=='table') return(weights) x <- all.is.numeric(names(weights), 'vector') if(isdate) attributes(x) <- c(attributes(x),ax) names(weights) <- NULL return(list(x=x, sum.of.weights=weights)) } xx <- x if(isdate) attributes(xx) <- c(attributes(xx),ax) if(type=='list') list(x=if(length(lev))lev[x] else xx, sum.of.weights=weights) else { names(weights) <- if(length(lev)) lev[x] else xx weights } } wtd.rank <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE) { if(! length(weights)) return(rank(x, na.last=if(na.rm) NA else TRUE)) tab <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm) freqs <- tab$sum.of.weights ## rank of x = # <= x - .5 (# = x, minus 1) r <- cumsum(freqs) - .5*(freqs-1) ## Now r gives ranks for all unique x values. Do table look-up ## to spread these ranks around for all x values. r is in order of x approx(tab$x, r, xout=x)$y } wtd.loess.noiter <- function(x, y, weights=rep(1,n), span=2/3, degree=1, cell=.13333, type=c('all','ordered all','evaluate'), evaluation=100, na.rm=TRUE) { type <- match.arg(type) n <- length(y) if(na.rm) { s <- ! is.na(x + y + weights) x <- x[s]; y <- y[s]; weights <- weights[s]; n <- length(y) } max.kd <- max(200, n) # y <- stats:::simpleLoess(y, x, weights=weights, span=span, # degree=degree, cell=cell)$fitted y <- fitted(loess(y ~ x, weights=weights, span=span, degree=degree, control=loess.control(cell=cell, iterations=1))) switch(type, all=list(x=x, y=y), 'ordered all'={ i <- order(x); list(x=x[i],y=y[i]) }, evaluate={ r <- range(x, na.rm=na.rm) approx(x, y, xout=seq(r[1], r[2], length=evaluation)) }) } num.denom.setup <- function(num, denom) { n <- length(num) if(length(denom) != n) stop('lengths of num and denom must match') s <- (1:n)[! is.na(num + denom) & denom != 0] num <- num[s]; denom <- denom[s] subs <- s[num > 0] y <- rep(1, length(subs)) wt <- num[num > 0] other <- denom - num subs <- c(subs, s[other > 0]) wt <- c(wt, other[other > 0]) y <- c(y, rep(0, sum(other>0))) list(subs=subs, weights=wt, y=y) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/plsmo.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000017056�14112731327�013046� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������plsmo <- function(x, y, method=c("lowess", "supsmu", "raw", "intervals"), xlab, ylab, add=FALSE, lty=1 : lc, col=par('col'), lwd=par('lwd'), iter=if(length(unique(y)) > 2) 3 else 0, bass=0, f=2 / 3, mobs=30, trim, fun, ifun=mean, group=rep(1, length(x)), prefix, xlim, ylim, label.curves=TRUE, datadensity=FALSE, scat1d.opts=NULL, lines.=TRUE, subset=TRUE, grid=FALSE, evaluate=NULL, ...) { gfun <- ordGridFun(grid) nam <- as.character(sys.call())[2 : 3] method <- match.arg(method) if(method == 'intervals') doint <- function(x, y, m, ifun, fun) { g <- cut2(x, m=m) if(length(levels(g)) < 2) stop(paste('number of observations not large enough for', m, 'observations per interval')) w <- cut2(x, m=m, onlycuts=TRUE) p <- fun(tapply(y, g, ifun, na.rm=TRUE)) seg1 <- list(x1=w[- length(w)], y1=p, x2=w[-1], y2=p) ne <- 2 : (length(w) - 1) seg2 <- list(x1=w[ne], y1=p[-1], x2=w[ne], y2=p[- length(p)]) list(x = (w[-length(w)] + w[-1]) / 2, y = p, xbar = tapply(x, g, mean, na.rm=TRUE), seg1 = seg1, seg2=seg2) } if(missing(ylab)) ylab <- label(y, units=TRUE, plot=TRUE, default=nam[2]) Y <- as.matrix(y) p <- ncol(Y) if(!missing(subset)) { x <- x[subset] Y <- Y[subset,, drop=FALSE] group <- group[subset] } group <- as.factor(group) if(!missing(prefix)) levels(group) <- paste(prefix, levels(group)) group <- as.factor(group) nna <- !(is.na(x) | (rowSums(is.na(Y)) == p) | is.na(group)) x <- x[nna] Y <- Y[nna,, drop=FALSE] group <- group[nna] lev <- levels(group) nlev <- length(lev) lc <- p * nlev curves <- list() clev <- rep('', lc) # for each curve what is the level of group xmin <- ymin <- 1e30; xmax <- ymax <- -1e30 ic <- 0 for(k in 1:p) { y <- Y[, k] for(g in lev) { ic <- ic + 1 s <- group == g z <- switch(method, lowess = lowess(x[s], y[s], iter=iter, f=f), supsmu = supsmu(x[s], y[s], bass=bass), raw = approx(x[s], y[s], xout=sort(unique(x[s]))), intervals = doint(x[s], y[s], m=mobs, ifun=ifun, fun=if(missing(fun)) function(x) x else fun) ) if(missing(trim)) trim <- if(sum(s) > 200) 10 / sum(s) else 0 if(method == 'intervals') { trim <- 0 evaluate <- NULL } if(trim > 0 && trim < 1) { xq <- quantile(x[s], c(trim, 1 - trim)) s <- z$x >= xq[1] & z$x <= xq[2] z <- list(x=z$x[s], y=z$y[s]) } if(length(evaluate)) { rx <- range(z$x) xseq <- seq(rx[1], rx[2], length.out=evaluate) z <- approx(z, xout=xseq) } if(!missing(fun)) { yy <- fun(z$y) s <- !is.infinite(yy) & !is.na(yy) z <- list(x=z$x[s], y=yy[s]) } clev[ic] <- g lab <- if(p == 1) g else if(nlev == 1 & p == 1) '1' else if(nlev == 1 & p > 1) colnames(Y)[k] else paste(colnames(Y)[k], g) curves[[lab]] <- z xmin <- min(xmin, z$x); xmax <- max(xmax, z$x) ymin <- min(ymin, z$y); ymax <- max(ymax, z$y) } } if(add) { if(missing(xlim)) xlim <- if(grid) current.panel.limits()$xlim else par('usr')[1:2] } else { if(missing(xlab)) xlab <- label(x, units=TRUE, plot=TRUE, default=nam[1]) if(missing(xlim)) xlim <- if(method == 'intervals') range(x, na.rm=TRUE) else c(xmin, xmax) if(missing(ylim)) ylim <- c(ymin, ymax) plot(xmin, ymin, xlim=xlim, ylim=ylim, type='n', xlab=xlab, ylab=ylab) } lty <- rep(lty, length.out=lc) col <- rep(col, length.out=lc) if(missing(lwd) && is.list(label.curves) && length(label.curves$lwd)) lwd <- label.curves$lwd lwd <- rep(lwd, length.out=lc) if(method == 'intervals') for(i in 1 : lc) { cu <- curves[[i]] seg1 <- cu$seg1 seg2 <- cu$seg2 acol <- adjustcolor(col[i], alpha.f=.15) gfun$points(cu$xbar, cu$y, col=acol, pch=3) with(cu$seg1, gfun$segments(x1, y1, x2, y2, col=col[i])) with(cu$seg2, gfun$segments(x1, y1, x2, y2, col=acol)) } else { for(i in 1 : lc) { cu <- curves[[i]] s <- cu$x >= xlim[1] & cu$x <= xlim[2] curves[[i]] <- list(x=cu$x[s], y=cu$y[s]) } if(lines.) for(i in 1 : lc) gfun$lines(curves[[i]], lty=lty[i], col=col[i], lwd=lwd[i]) if(datadensity) { for(i in 1 : nlev) { s <- group == lev[i] x1 <- x[s] for(ii in which(clev == lev[i])) { y.x1 <- approx(curves[[ii]], xout=x1)$y sopts <- c(list(x=x1, y=y.x1, col=col[ii], grid=grid), scat1d.opts) do.call('scat1d', sopts) } } } } if((is.list(label.curves) || label.curves) && lc > 1 && (!missing(prefix) | !add | !missing(label.curves))) labcurve(curves, lty=lty, col.=col, opts=label.curves, grid=grid) invisible(curves) } panel.plsmo <- function(x, y, subscripts, groups=NULL, type='b', label.curves=TRUE, lwd = superpose.line$lwd, lty = superpose.line$lty, pch = superpose.symbol$pch, cex = superpose.symbol$cex, font = superpose.symbol$font, col = NULL, scat1d.opts=NULL, ...) { superpose.symbol <- trellis.par.get("superpose.symbol") superpose.line <- trellis.par.get("superpose.line") if(length(groups)) groups <- as.factor(groups) g <- unclass(groups)[subscripts] ng <- if(length(groups)) max(g) else 1 lty <- rep(lty, length = ng) lwd <- rep(lwd, length = ng) pch <- rep(pch, length = ng) cex <- rep(cex, length = ng) font <- rep(font, length = ng) if(!length(col)) col <- if(type == 'p') superpose.symbol$col else superpose.line$col col <- rep(col, length = ng) lc <- if(is.logical(label.curves)) { if(label.curves) list(lwd=lwd, cex=cex[1]) else FALSE } else c(list(lwd=lwd, cex=cex[1]), label.curves) if(type != 'p') if(ng > 1) plsmo(x, y, group=groups[subscripts, drop=FALSE], add=TRUE, lty=lty, col=col, label.curves=lc, grid=TRUE, scat1d.opts=scat1d.opts, ...) else plsmo(x, y, add=TRUE, lty=lty, col=col, label.curves=lc, grid=TRUE, scat1d.opts=scat1d.opts, ...) if(type != 'l') { if(ng > 1) panel.superpose(x, y, subscripts, as.integer(groups), lwd=lwd, lty=lty, pch=pch, cex=cex, font=font, col=col) else panel.xyplot(x, y, lwd=lwd, lty=lty, pch=pch, cex=cex, font=font, col=col) if(ng > 1) { Key <- function(x=NULL, y=NULL, lev, cex, col, font, pch){ oldpar <- par('usr', 'xpd') par(usr=c(0, 1, 0, 1), xpd=NA) on.exit(par(oldpar)) if(is.list(x)) { y <- x[[2]] x <- x[[1]] } ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting if(!length(x)) x <- 0 if(!length(y)) y <- 1 ## because of formals() rlegend(x, y, legend=lev, cex=cex, col=col, pch=pch) invisible() } formals(Key) <- list(x=NULL,y=NULL,lev=levels(groups), cex=cex, col=col, font=font, pch=pch) .setKey(Key) } } } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/impute.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000007335�12515210327�013213� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������impute <- function(x, ...) UseMethod("impute") impute.default <- function(x, fun=median, ...) { m <- is.na(x) k <- sum(m) if(k==0) return(x) nam <- names(x) if(!length(nam)) { nam <- as.character(1:length(x)); names(x) <- nam } if(!is.function(fun)) { fill <- fun if(is.character(fill) && length(fill)==1 && fill=="random") fill <- sample(x[!is.na(x)], sum(is.na(x)), replace=TRUE) } else if(is.factor(x)) { freq <- table(x) fill <- names(freq)[freq==max(freq)][1] #take first if not unique } else fill <- if(missing(fun) && is.logical(x)) (if(sum(x[!m]) >= sum(!m)/2) TRUE else FALSE) else fun(x[!m]) ## median(logical vector) doesn't work - know trying to get median ## if fun is omitted. Get mode. if(length(fill)>1 && length(fill)!=k) stop("length of vector of imputed values != no. NAs in x") ## lab <- label(x) ## if(is.null(lab) || lab=="") lab <- name ## lab <- paste(lab,"with",sum(m),"NAs imputed to",format(fill)) ## attr(x, "label") <- lab if(is.factor(x)) { newlev <- sort(unique(fill)) if(any(!(z <- newlev %in% levels(x)))) { xc <- as.character(x) xc[m] <- fill x <- factor(xc, c(levels(x), newlev[!z])) } else x[m] <- fill } else x[m] <- fill structure(x, imputed=(1:length(x))[m], class=c('impute', attr(x, 'class'))) } print.impute <- function(x, ...) { i <- attr(x,"imputed") if(!length(i)) { print.default(x); return(invisible()) } if(is.factor(x)) w <- as.character(x) else w <- format(x) names(w) <- names(x) w[i] <- paste(w[i], "*", sep="") attr(w, "label") <- attr(w,"imputed") <- attr(w, "class") <- NULL print.default(w, quote=FALSE) invisible() } summary.impute <- function(object, ...) { i <- attr(object, "imputed") oi <- object attr(oi,'class') <- attr(oi,'class')[attr(oi,'class')!="impute"] oi <- oi[i] if(all(oi==oi[1])) cat("\n",length(i),"values imputed to", if(is.numeric(oi)) format(oi[1]) else as.character(oi[1]), "\n\n") else { cat("\nImputed Values:\n\n") if(length(i)<20) print(oi) else print(describe(oi, descript=as.character(sys.call())[2])) cat("\n") } NextMethod("summary") } "[.impute" <- function(x, ..., drop=FALSE) { ats <- attributes(x) ats$dimnames <- NULL ats$dim <- NULL ats$names <- NULL attr(x,'class') <- NULL y <- x[..., drop = drop] if(length(y)==0) return(y) k <- 1:length(x); names(k) <- names(x) k <- k[...] attributes(y) <- c(attributes(y), ats) imp <- attr(y, "imputed") attr(y, "imputed") <- j <- (1:length(k))[k %in% imp] if(length(j)==0) { cy <- attr(y,'class')[attr(y,'class')!='impute'] y <- structure(y, imputed=NULL, class=if(length(cy)) cy else NULL) } y } is.imputed <- function(x) { w <- rep(FALSE, if(is.matrix(x))nrow(x) else length(x)) if(length(z <- attr(x,"imputed"))) w[z] <- TRUE w } as.data.frame.impute <- function(x, row.names = NULL, optional = FALSE, ...) { nrows <- length(x) if(!length(row.names)) { ## the next line is not needed for the 1993 version of data.class and is ## included for compatibility with 1992 version if(length(row.names <- names(x)) == nrows && !anyDuplicated(row.names)) { } else if(optional) row.names <- character(nrows) else row.names <- as.character(1:nrows) } value <- list(x) if(!optional) names(value) <- deparse(substitute(x))[[1]] structure(value, row.names=row.names, class='data.frame') } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/consolidate.s�������������������������������������������������������������������������������0000644�0001762�0000144�00000001625�13215500012�014177� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#combine <- function(x, value, protect, ...) stop("combine() depricated due to naming conflict renamed consolidate()") #'combine<-' <- function(x, protect, ..., value) stop("combine<-() depricated due to naming conflict renamed consolidate<-()") consolidate <- function(x, value, protect, ...) { UseMethod("consolidate") } 'consolidate<-' <- function(x, protect=FALSE, ..., value) consolidate(x, value, protect, ...) consolidate.default <- function(x, value, protect=FALSE, ...) { if(missing(x) || is.null(x)) x <- vector() if(missing(value) || is.null(value)) value <- vector() xNames <- names(x) valueNames <- names(value) if(is.null(xNames) || is.null(valueNames) || all(valueNames == "") || all(xNames == "")) return(c(x, value)) vars <- intersect(xNames, valueNames[valueNames != ""]) if(!protect) x[vars] <- value[vars] c(x, value[!valueNames %in% vars]) } �����������������������������������������������������������������������������������������������������������Hmisc/R/in.operator.s�������������������������������������������������������������������������������0000644�0001762�0000144�00000000100�12243661443�014136� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������"%nin%" <- function(x, table) match(x, table, nomatch = 0) == 0 ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/simMarkovOrd.r������������������������������������������������������������������������������0000644�0001762�0000144�00000114310�14241432662�014322� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Simulate Ordinal Markov Process ##' ##' Simulates longitudinal data for subjects following a first-order Markov process under a proportional odds model. Optionally, response-dependent sampling can be done, e.g., if a subject hits a specified state at time t, measurements are removed for times t+1, t+3, t+5, ... This is applicable when for example a study of hospitalized patients samples every day, Y=1 denotes patient discharge to home, and sampling is less frequent outside the hospital. This example assumes that arriving home is not an absorbing state, i.e., a patient could return to the hospital. ##' @title simMarkovOrd ##' @param n number of subjects to simulate ##' @param y vector of possible y values in order (numeric, character, factor) ##' @param times vector of measurement times ##' @param initial initial value of `y` (baseline state; numeric, character, or factor matching `y`). If length 1 this value is used for all subjects, otherwise it is a vector of length `n`. ##' @param X an optional vector of matrix of baseline covariate values passed to `g`. If a vector, `X` represents a set of single values for all the covariates and those values are used for every subject. Otherwise `X` is a matrix with rows corresponding to subjects and columns corresponding to covariates which `g` must know how to handle. `g` only sees one row of `X` at a time. ##' @param absorb vector of absorbing states, a subset of `y` (numeric, character, or factor matching `y`). The default is no absorbing states. Observations are truncated when an absorbing state is simulated. ##' @param intercepts vector of intercepts in the proportional odds model. There must be one fewer of these than the length of `y`. ##' @param g a user-specified function of three or more arguments which in order are `yprev` - the value of `y` at the previous time, the current time `t`, the `gap` between the previous time and the current time, an optional (usually named) covariate vector `X`, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow `yprev` to be a vector and `yprev` must not include any absorbing states. The `g` function returns the linear predictor for the proportional odds model aside from `intercepts`. The returned value must be a matrix with row names taken from `yprev`. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to `intercepts`. The different columns are used for `y`-specific contributions to the linear predictor (aside from `intercepts`) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments. ##' @param carry set to `TRUE` to carry absorbing state forward after it is first hit; the default is to end records for the subject once the absorbing state is hit ##' @param rdsample an optional function to do response-dependent sampling. It is a function of these arguments, which are vectors that stop at any absorbing state: `times` (ascending measurement times for one subject), `y` (vector of ordinal outcomes at these times for one subject. The function returns `NULL` if no observations are to be dropped, returns the vector of new times to sample. ##' @param ... additional arguments to pass to `g` such as a regresson coefficient ##' @return data frame with one row per subject per time, and columns id, time, yprev, y, values in ... ##' @author Frank Harrell ##' @seealso <https://hbiostat.org/R/Hmisc/markov/> ##' @export ##' @md simMarkovOrd <- function(n=1, y, times, initial, X=NULL, absorb=NULL, intercepts, g, carry=FALSE, rdsample=NULL, ...) { if(is.factor(y)) y <- as.character(y) if(is.factor(initial)) initial <- as.character(initial) if(is.factor(absorb)) absorb <- as.character(absorb) ychar <- is.character(y) if(length(initial) == 1) initial <- rep(initial, n) if(length(initial) != n) stop('initial must have length 1 or n') if(any(initial %in% absorb)) stop('initial state cannot be an absorbing state') Xmat <- length(X) && is.matrix(X) if(Xmat && ! length(colnames(X))) stop('when a matrix, X must have column names') if(length(X) && ! Xmat && ! length(names(X))) stop('when a vector, elements of X must have names') nt <- length(times) Y <- Yp <- if(ychar) character(nt) else numeric(nt) gaps <- numeric(nt) ID <- Time <- Gap <- numeric(n * nt) YYprev <- YY <- if(ychar) character(n * nt) else numeric(n * nt) is <- 1 times.saved <- 0 for(id in 1 : n) { tprev <- 0 i <- 0 for(t in times) { i <- i + 1 gap <- t - tprev gaps[i] <- gap yprev <- if(i == 1) initial[id] else Y[i - 1] Yp[i] <- yprev if(carry && (yprev %in% absorb)) Y[i] <- yprev else { xb <- g(yprev, t, gap, X=if(Xmat) X[id, ] else X, ...) ## If partial PO model xb has 1 row (since yprev is scalar) and ## columns corresponding to intercepts. If PO, is 1x1 probs <- plogis(intercepts + xb) ## Compute cell probabilities from successive differences in ## exceedance probs probs <- c(1., probs) - c(probs, 0.) lo <- probs < 0. hi <- probs > 1. ## The following is needed for partial proportional odds models if(any(c(lo , hi))) { warning(paste('Probabilities < 0 or > 1 at time t=', t, 'id=', id, ':', paste(probs[c(lo, hi)], collapse=' '), 'set to 0 or 1')) if(any(lo)) probs[lo] <- 0. if(any(hi)) probs[hi] <- 1. } Y[i] <- sample(y, 1, prob=probs) if(! carry && (Y[i] %in% absorb)) break } tprev <- t } s <- 1 : i atimes <- times[s] agaps <- gaps[s] aYp <- Yp[s] aY <- Y[s] if(length(rdsample)) { stimes <- rdsample(atimes, aY) lt <- length(stimes) if(lt) { times.saved <- times.saved + i - lt tsprev <- c(0, stimes[- lt]) agaps <- stimes - tsprev aY <- aY[times %in% stimes] if(length(aY) != lt) stop('program logic error in simMarkovOrd') aYp <- c(aYp[1], aY[- lt]) atimes <- stimes } } ie <- is + length(aY) - 1 ID [is : ie] <- id Time [is : ie] <- atimes Gap [is : ie] <- agaps YYprev[is : ie] <- aYp YY [is : ie] <- aY is <- ie + 1 } yy <- YY[1 : ie] if(ychar) yy <- factor(yy, levels=y) yyp <- YYprev[1 : ie] if(ychar) yyp <- factor(yyp, levels=setdiff(y, absorb)) res <- data.frame(id=ID[1 : ie], time=Time[1 : ie], gap=Gap[1 : ie], yprev=yyp, y=yy, ...) attr(res, 'times.saved.per.subject') <- times.saved / n ## Handle case where X is a constant vector to distribute to all obs if(length(X)) { if(Xmat) for(nam in colnames(X)) res[[nam]] <- X[res$id, nam] else for(nam in names(X)) res[[nam]] <- X[nam] } res } #' State Occupancy Probabilities for First-Order Markov Ordinal Model #' #' @title soprobMarkovOrd #' @inheritParams simMarkovOrd #' @param y a vector of possible y values in order (numeric, character, factor) #' @param times vector of measurement times #' @param initial initial value of `y` (baseline state; numeric, character, factr) #' @param absorb vector of absorbing states, a subset of `y`. The default is no absorbing states. (numeric, character, factor) #' @param intercepts vector of intercepts in the proportional odds model, with length one less than the length of `y` #' @param ... additional arguments to pass to `g` such as covariate settings #' #' @return matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities #' @export #' @author Frank Harrell #' @seealso <https://hbiostat.org/R/Hmisc/markov/> #' @export #' @md soprobMarkovOrd <- function(y, times, initial, absorb=NULL, intercepts, g, ...) { if(initial %in% absorb) stop('initial state cannot be an absorbing state') k <- length(y) nt <- length(times) P <- matrix(NA, nrow=nt, ncol=k) colnames(P) <- as.character(y) rownames(P) <- as.character(times) yna <- setdiff(y, absorb) # all states except absorbing ones yc <- as.character(y) ynac <- as.character(yna) ## Don't uncondition on initial state xb <- g(initial, times[1], times[1], ...) # 3rd arg (gap) assumes time origin 0 ## Since initial is scalar, xb has one row. It has multiple columns if ## model is partial PO model, with columns exactly corresponding to intercepts pp <- plogis(intercepts + xb) ## Compute cell probabilities pp <- c(1., pp) - c(pp, 0.) P[1, ] <- pp tprev <- times[1] for(it in 2 : nt) { t <- times[it] gap <- t - tprev ## Compute linear predictor at all non-absorbing states xb <- g(yna, t, gap, ...) #non-intercept part of x * beta ## g puts non-absorbing states as row names (= ynac) ## If partial PO model xb has > 1 column that correspond to intercepts ## Matrix of conditional probabilities of Y conditioning on previous Y ## Columns = k conditional probabilities conditional on a single previous state ## Rows = all possible previous states ## When the row corresponds to an absorbing state, with prob. 1 ## a subject will remain in that state so give it a prob of 1 and ## all other states a prob of 0 cp <- matrix(NA, nrow=k, ncol=k, dimnames=list(yc, yc)) for(yval in y) { # current row yvalc <- as.character(yval) if(yval %in% absorb) { # current row is an absorbing state cp[yvalc, setdiff(yc, yvalc)] <- 0. # P(moving to non-abs state)=0 cp[yvalc, yvalc] <- 1. # certainty in staying } else { # current row is non-absorbing state pp <- plogis(intercepts + xb[yvalc, ]) ## Compute cell probabilities pp <- c(1., pp) - c(pp, 0.) cp[yvalc, ] <- pp } } P[it, ] <- t(cp) %*% P[it - 1, ] tprev <- t } P } ##' Simulate Comparisons For Use in Sequential Markov Longitudinal Clinical Trial Simulations ##' ##' Simulates sequential clinical trials of longitudinal ordinal outcomes using a first-order Markov model. Looks are done sequentially after subject ID numbers given in the vector `looks` with the earliest possible look being after subject 2. At each look, a subject's repeated records are either all used or all ignored depending on the sequent ID number. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function `g` that has extra arguments specifying the true effect of `parameter` the treatment `group` expecting treatments to be coded 1 and 2. `parameter` is usually on the scale of a regression coefficient, e.g., a log odds ratio. Fitting is done using the `rms::lrm()` function, unless non-proportional odds is allowed in which case `VGAM::vglm()` is used. If `timecriterion` is specified, the function also, for the last data look only, computes the first time at which the criterion is satisfied for the subject or use the event time and event/censoring indicator computed by `timecriterion`. The Cox/logrank chi-square statistic for comparing groups on the derived time variable is saved. If `coxzph=TRUE`, the `survival` package correlation coefficient `rho` from the scaled partial residuals is also saved so that the user can later determine to what extent the Markov model resulted in the proportional hazards assumption being violated when analyzing on the time scale. `vglm` is accelerated by saving the first successful fit for the largest sample size and using its coefficients as starting value for further `vglm` fits for any sample size for the same setting of `parameter`. ##' @title estSeqMarkovOrd ##' @inheritParams simMarkovOrd ##' @param y vector of possible y values in order (numeric, character, factor) ##' @param times vector of measurement times ##' @param initial a vector of probabilities summing to 1.0 that specifies the frequency distribution of initial values to be sampled from. The vector must have names that correspond to values of `y` representing non-absorbing states. ##' @param absorb vector of absorbing states, a subset of `y`. The default is no absorbing states. Observations are truncated when an absorbing state is simulated. May be numeric, character, or factor. ##' @param intercepts vector of intercepts in the proportional odds model. There must be one fewer of these than the length of `y`. ##' @param parameter vector of true parameter (effects; group differences) values. These are group 2:1 log odds ratios in the transition model, conditioning on the previous `y`. ##' @param looks integer vector of ID numbers at which maximum likelihood estimates and their estimated variances are computed. For a single look specify a scalar value for `loops` equal to the number of subjects in the sample. ##' @param formula a formula object given to the `lrm()` function using variables with these name: `y`, `time`, `yprev`, and `group` (factor variable having values '1' and '2'). The `yprev` variable is converted to a factor before fitting the model unless `yprevfactor=FALSE`. ##' @param ppo a formula specifying the part of `formula` for which proportional odds is not to be assumed, i.e., that specifies a partial proportional odds model. Specifying `ppo` triggers the use of `VGAM::vglm()` instead of `rms::lrm` and will make the simulations run slower. ##' @param yprevfactor see `formula` ##' @param groupContrast omit this argument if `group` has only one regression coefficient in `formula`. Otherwise if `ppo` is omitted, provide `groupContrast` as a list of two lists that are passed to `rms::contrast.rms()` to compute the contrast of interest and its standard error. The first list corresponds to group 1, the second to group 2, to get a 2:1 contrast. If `ppo` is given and the group effect is not just a simple regression coefficient, specify as `groupContrast` a function of a `vglm` fit that computes the contrast of interest and its standard error and returns a list with elements named `Contrast` and `SE`. For the latter type you can optionally have formal arguments `n1`, `n2`, and `parameter` that are passed to `groupContrast` to compute the standard error of the group contrast, where `n1` and `n2` respectively are the sample sizes for the two groups and `parameter` is the true group effect parameter value. ##' @param cscov applies if `ppo` is not used. Set to `TRUE` to use the cluster sandwich covariance estimator of the variance of the group comparison. ##' @param timecriterion a function of a time-ordered vector of simulated ordinal responses `y` that returns a vector `FALSE` or `TRUE` values denoting whether the current `y` level met the condition of interest. For example `estSeqMarkovOrd` will compute the first time at which `y >= 5` if you specify `timecriterion=function(y) y >= 5`. This function is only called at the last data look for each simulated study. To have more control, instead of `timecriterion` returning a logical vector have it return a numeric 2-vector containing, in order, the event/censoring time and the 1/0 event/censoring indicator. ##' @param sstat set to a function of the time vector and the corresponding vector of ordinal responses for a single group if you want to compute a Wilcoxon test on a derived quantity such as the number of days in a given state. ##' @param coxzph set to `TRUE` if `timecriterion` is specified and you want to compute a statistic for testing proportional hazards at the last look of each simulated data ##' @param nsim number of simulations (default is 1) ##' @param maxest maximum acceptable absolute value of the contrast estimate, ignored if `NULL`. Any values exceeding `maxest` will result in the estimate being set to `NA`. ##' @param maxvest like `maxest` but for the estimated variance of the contrast estimate ##' @param progress set to `TRUE` to send current iteration number to `pfile` every 10 iterations. Each iteration will really involve multiple simulations, if `parameter` has length greater than 1. ##' @param pfile file to which to write progress information. Defaults to `''` which is the console. Ignored if `progress=FALSE`. ##' @return a data frame with number of rows equal to the product of `nsim`, the length of `looks`, and the length of `parameter`, with variables `sim`, `parameter`, `look`, `est` (log odds ratio for group), and `vest` (the variance of the latter). If `timecriterion` is specified the data frame also contains `loghr` (Cox log hazard ratio for group), `lrchisq` (chi-square from Cox test for group), and if `coxph=TRUE`, `phchisq`, the chi-square for testing proportional hazards. The attribute `etimefreq` is also present if `timecriterion` is present, and it probvides the frequency distribution of derived event times by group and censoring/event indicator. If `sstat` is given, the attribute `sstat` is also present, and it contains an array with dimensions corresponding to simulations, parameter values within simulations, `id`, and a two-column subarray with columns `group` and `y`, the latter being the summary measure computed by the `sstat` function. The returned data frame also has attribute `lrmcoef` which are the last-look logistic regression coefficient estimates over the `nsim` simulations and the parameter settings, and an attribute `failures` which is a data frame containing the variables `reason` and `frequency` cataloging the reasons for unsuccessful model fits. ##' @author Frank Harrell ##' @seealso `gbayesSeqSim()`, `simMarkovOrd()`, <https://hbiostat.org/R/Hmisc/markov/> ##' @export ##' @md estSeqMarkovOrd <- function(y, times, initial, absorb=NULL, intercepts, parameter, looks, g, formula, ppo=NULL, yprevfactor=TRUE, groupContrast=NULL, cscov=FALSE, timecriterion=NULL, coxzph=FALSE, sstat=NULL, rdsample=NULL, maxest=NULL, maxvest=NULL, nsim=1, progress=FALSE, pfile='') { olddd <- getOption('datadist') on.exit(options(datadist=olddd)) isppo <- length(ppo) > 0 if(isppo) { if(! inherits(ppo, 'formula')) stop('ppo must be a formula') if(! requireNamespace('VGAM')) stop('ppo specified and VGAM package not available') # VGAM wants you to declare FALSE to indicate non-PO vglm <- VGAM::vglm ppo <- formula(paste('FALSE ~', as.character(ppo)[-1])) } else if (!requireNamespace("rms", quietly = TRUE)) stop('ppo not specified and rms package not available') if(isppo && cscov) stop('may not specify cscov=TRUE with ppo') nas <- setdiff(y, absorb) # non-absorbing states if(length(initial) != length(nas)) stop('length of initial must be number of non-absorbing values of y') if(! all(sort(names(initial)) == sort(as.character(nas)))) stop('names of elements in initial are incorrect') if(coxzph && ! length(timecriterion)) stop('must specify timecriterion when coxzph=TRUE') looks <- sort(looks) nlook <- length(looks) N <- max(looks) np <- length(parameter) nc <- nsim * nlook * np parm <- est <- vest <- numeric(nc) look <- sim <- integer(nc) ndy <- length(y) Etimefreq <- NULL if(length(timecriterion)) { Etimefreq <- array(0, dim=c(nsim, np, 2, 2, length(times)), dimnames=list(paste('sim', 1 : nsim), as.character(parameter), c('1', '2'), c('censored', 'event'), as.character(times))) loghr <- lrchisq <- rep(NA, nc) if(coxzph) phchisq <- rep(NA, nc) } if(length(sstat)) Sstat <- array(0L, dim=c(nsim, np, N, 2), dimnames=list(paste('sim', 1 : nsim), as.character(parameter), paste('id', 1 : N), c('group', 'y'))) groupContrastUsesN <- length(groupContrast) && all(c('n1', 'n2', 'parameter') %in% names(formals(groupContrast))) ## For each simulation and each parameter value, simulate data for the ## whole study is <- 0 pname <- if(isppo) 'group2' else 'group=2' h <- function(time, y) { u <- timecriterion(y) if(! is.logical(u)) return(list(etime=as.numeric(u[1]), event=as.integer(u[2]))) # Note that if there are any absorbing events, the time vector # would already have been truncated at the first of such events if(any(u)) list(etime=as.numeric(min(time[u])), event=1L) else list(etime=as.numeric(max(time)), event=0L) } lrmcoef <- NULL co.na <- NULL # template of coef vector to be all NAs coefprev <- list() # to hold first working fit at last look for each parameter ## coefprev speeds up vglm (last look = maximum sample size) failures <- character(0) for(isim in 1 : nsim) { if(progress && (isim %% 10 == 0)) cat('Simulation', isim, '\n', file=pfile) for(param in parameter) { cparam <- as.character(param) ## Sample N initial states initials <- sample(names(initial), N, replace=TRUE, prob=initial) if(is.numeric(y)) initials <- as.numeric(initials) ## Sample treatment groups 1 and 2 X <- matrix(sample(1 : 2, N, replace=TRUE), ncol=1, dimnames=list(NULL, 'group')) ## For simMarkovOrd X must be a matrix if it varies sdata <- simMarkovOrd(n=N, y, times, initials, X=X, absorb=absorb, intercepts=intercepts, g=g, parameter=param, rdsample=rdsample) tsps <- attr(sdata, 'time.saved.per.subject') if(length(tsps)) cat('Average number of measurement times saved per subject by response-dependent sampling:', round(tsps, 1), '\n') ## sdata is a data frame containing id, time, yprev, y, ... sdata$group <- as.factor(sdata$group) if(yprevfactor) sdata$yprev <- as.factor(sdata$yprev) if(isim == 1 && ! isppo) { .dd. <- rms::datadist(sdata) options(datadist=.dd.) # requires rms 6.1-1 } ## For each look compute the parameter estimate and its variance ## If a contrast is specified (say when treatment interacts with time) ## use that instead of a simple treatment effect ## For vglm speed up by taking as starting values the estimates ## from the last successful run for(l in looks) { ## Subjects are numbered consecutively with id=1,2,3,... and ## these correspond to sequential data looks when accumulated dat <- subset(sdata, id <= l) luy <- length(unique(dat$y)) if(luy != ndy) { f <- paste('Simulated data for simulation with sample size', l, 'has', luy, 'distinct y values instead of the required', ndy) fail <- TRUE } else { if(isppo) { cprev <- coefprev[[cparam]] ## Could not get system to find cprev when regular call inside try() ff <- call('vglm', formula, VGAM::cumulative(parallel=ppo, reverse=TRUE), coefstart=cprev, data=dat) f <- try(eval(ff), silent=TRUE) } else f <- try(rms::lrm(formula, data=dat, x=cscov, y=cscov), silent=TRUE) fail <- inherits(f, 'try-error') } if(fail) failures <- c(failures, as.character(f)) else { if(isppo && l == max(looks) && ! length(coefprev[[cparam]])) coefprev[[cparam]] <- coef(f) if(! length(co.na)) { # save template to insert for failures co.na <- coef(f) co.na[] <- NA } } if(cscov && ! fail) f <- rms::robcov(f, dat$id) is <- is + 1 sim [is] <- isim parm[is] <- param look[is] <- l if(fail) { est [is] <- NA vest[is] <- NA } else { if(length(groupContrast)) { fc <- if(isppo) (if(groupContrastUsesN) groupContrast(f, n1=sum(dat$group == '1'), n2=sum(dat$group == '2'), parameter=param) else groupContrast(f)) else rms::contrast(f, groupContrast[[2]], groupContrast[[1]]) est [is] <- fc$Contrast vest[is] <- (fc$SE) ^ 2 } else { est [is] <- coef(f)[pname] vest[is] <- vcov(f)[pname, pname] } if(length(maxest) && abs(est[is]) > maxest) { failures <- c(failures, paste0('|contrast|>', maxest)) est[is] <- vest[is] <- NA fail <- TRUE } else if(length(maxvest) && vest[is] > maxvest) { failures <- c(failures, paste0('variance>', maxvest)) est[is] <- vest[is] <- NA fail <- TRUE } } # end else if not fail } # end looks co <- if(fail) co.na else coef(f) if(! length(lrmcoef)) lrmcoef <- array(0., dim=c(length(parameter), nsim, length(co)), dimnames=list(as.character(parameter), paste('sim', 1 : nsim), names(co))) ww <- try(lrmcoef[as.character(param), isim, ] <- co) if(inherits(ww, 'try-error')) { wf <- 'estSeqMarkovOrd.err' prn(dimnames(lrmcoef), file=wf) prn(as.character(param), file=wf) prn(isim, file=wf) prn(co, file=wf) stop('non-conformable coefficients in estSeqMarkovOrd. See file estSeqMarkovOrd.err in current working directory.') } if(length(timecriterion)) { # Separately for each subject compute the time until the # criterion is satisfied. Right censor at last observed time if it # doesn't occur setDT(sdata, key=c('group', 'id', 'time')) d <- sdata[, h(time, y), by=.(group, id)] fit <- survival::coxph(Surv(etime, event) ~ group, data=d) loghr [is] <- fit$coef lrchisq[is] <- 2. * diff(fit$loglik) if(coxzph) phchisq[is] <- survival::cox.zph(fit, transform='identity', global=FALSE)$table[, 'chisq'] for(gr in c('1', '2')) { for(ev in 0 : 1) { utimes <- with(subset(d, group == gr & event == ev), as.character(etime)) utimes <- factor(utimes, as.character(times)) tab <- table(utimes) Etimefreq[isim, as.character(param), gr, ev + 1, ] <- Etimefreq[isim, as.character(param), gr, ev + 1, ] + tab } # end censored vs event } # end group } # end timecriterion if(length(sstat)) { ## Separately for each subject compute the summary statistic sds <- sdata[, ys := sstat(time, y), by=.(group, id)] Sstat[isim, as.character(param), sds$id, ] <- cbind(sds$group, sds$ys) } # end sstat } # end param } # end sim res <- data.frame(sim=sim, parameter=parm, look=look, est=est, vest=vest) if(length(timecriterion)) { res$loghr <- loghr res$lrchisq <- lrchisq if(coxzph) res$phchisq <- phchisq attr(res, 'etimefreq') <- Etimefreq } if(length(sstat)) attr(res, 'sstat') <- Sstat attr(res, 'lrmcoef') <- lrmcoef failures <- if(length(failures)) as.data.frame(table(failure=failures)) else data.frame(failure='', Freq=0) attr(res, 'failures') <- failures res } #' Compute Parameters for Proportional Odds Markov Model #' #' Given a vector `intercepts` of initial guesses at the intercepts in a Markov proportional odds model, and a vector `extra` if there are other parameters, solves for the `intercepts` and `extra` vectors that yields a set of occupancy probabilities at time `t` that equal, as closely as possible, a vector of target values. #' @title intMarkovOrd #' @inheritParams simMarkovOrd #' @param intercepts vector of initial guesses for the intercepts #' @param extra an optional vector of intial guesses for other parameters passed to `g` such as regression coefficients for previous states and for general time trends. Name the elements of `extra` for more informative output. #' @param target vector of target state occupancy probabilities at time `t`. If `extra` is specified, `target` must be a matrix where row names are character versions of `t` and columns represent occupancy probabilities corresponding to values of `y` at the time given in the row. #' @param t target times. Can have more than one element only if `extra` is given. #' @param ftarget an optional function defining constraints that relate to transition probabilities. The function returns a penalty which is a sum of absolute differences in probabilities from target probabilities over possibly multiple targets. The `ftarget` function must have two arguments: `intercepts` and `extra`. #' @param onlycrit set to `TRUE` to only return the achieved objective criterion and not print anything #' @param constraints a function of two arguments: the vector of current intercept values and the vector of `extra` parameters, returning `TRUE` if that vector meets the constrains and `FALSE` otherwise #' @param printsop set to `TRUE` to print solved-for state occupancy probabilities for groups 1 and 2 and log odds ratios corresponding to them #' @param ... optional arguments to pass to [stats::nlm()]. If this is specified, the arguments that `intMarkovOrd` normally sends to `nlm` are not used. #' #' @return list containing two vectors named `intercepts` and `extra` unless `oncrit=TRUE` in which case the best achieved sum of absolute errors is returned #' @author Frank Harrell #' @export #' @md #' @seealso <https://hbiostat.org/R/Hmisc/markov/> intMarkovOrd <- function(y, times, initial, absorb=NULL, intercepts, extra=NULL, g, target, t, ftarget=NULL, onlycrit=FALSE, constraints=NULL, printsop=FALSE, ...) { if(any(diff(intercepts) > 0)) stop('initial intercepts are out of order') t <- as.character(t) if(length(t) > 1 && (! is.matrix(target) || nrow(target) != length(t))) stop('target must be a matrix with # rows = length of t') if(length(t) == 1) target <- matrix(target, nrow=1, dimnames=list(t, NULL)) for(ti in t) if(abs(sum(target[ti, ]) - 1.) > 1e-5) stop('each row of target must sum to 1') h <- function(a) { ## Compute state occupancy probabilities at time t for current ## vector of intercept values and extra ints <- a[1 : (length(a) - length(extra))] if(any(diff(ints) > 0.)) return(1000.) if(length(extra)) extra <- a[-(1 : length(ints))] if(length(constraints) && ! constraints(ints, extra)) return(1000.) s <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb, intercepts=ints, g=g, X=1, extra=extra)[t,, drop=FALSE ] # Objective function to minimize: sum of absolute differences with targets # with restriction that intercepts be in descending order crit <- 0. # if(any(diff(ints) > 0.)) 1000. else 0. for(tim in rownames(s)) crit <- crit + sum(abs(s[tim, ] - target[tim, ])) if(length(ftarget)) crit <- crit + ftarget(intercepts=ints, extra=extra) crit } if(length(list(...))) u <- nlm(h, c(intercepts, extra), ...) else u <- nlm(h, c(intercepts, extra), iterlim=300) if(onlycrit) return(u$minimum) cat('\nIterations:', u$iterations, '\n') cat('Sum of absolute errors:', u$minimum, '\n') ints <- u$estimate[1 : (length(u$estimate) - length(extra))] if(length(extra)) extra <- structure(u$estimate[-(1 : length(ints))], names=names(extra)) cat('Intercepts:', round(ints, 3), '\n') if(length(extra)) { cat('\nExtra parameters:\n\n') print(round(extra, 4)) } s1 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb, intercepts=ints, g=g, X=1, extra=extra) if(printsop) { cat('\nOccupancy probabilities for group 1:\n\n') print(round(s1, 3)) } # Show occupancy probabilities for group 2 s2 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb, intercepts=ints, g=g, X=2, extra=extra) if(printsop) { cat('\nOccupancy probabilities for group 2:\n\n') print(round(s2, 3)) } ## Compute log odds ratios at day t if(printsop) for(ti in t) { ## Get cumulative probabilities from right to left except for the first s1t <- rev(cumsum(rev(s1[ti, -1]))) s2t <- rev(cumsum(rev(s2[ti, -1]))) lor <- round(qlogis(s2t) - qlogis(s1t), 3) cat('\nLog odds ratios at', paste0('t=', ti), 'from occupancy probabilities:' , lor, '\n') } list(intercepts=ints, extra=extra) } #' State Occupancy Probabilities for First-Order Markov Ordinal Model from a Model Fit #' #' Computes state occupancy probabilities for a single setting of baseline covariates. If the model fit was from `rms::blrm()`, these probabilities are from all the posterior draws of the basic model parameters. Otherwise they are maximum likelihood point estimates. #' #' @title soprobMarkovOrdm #' @param object a fit object created by `blrm`, `lrm`, `orm`, `VGAM::vglm()`, or `VGAM::vgam()` #' @param data a single observation list or data frame with covariate settings, including the initial state for Y #' @param times vector of measurement times #' @param ylevels a vector of ordered levels of the outcome variable (numeric or character) #' @param absorb vector of absorbing states, a subset of `ylevels`. The default is no absorbing states. (numeric, character, factor) #' @param tvarname name of time variable, defaulting to `time` #' @param pvarname name of previous state variable, defaulting to `yprev` #' @param gap name of time gap variable, defaults assuming that gap time is not in the model #' #' @return if `object` was not a Bayesian model, a matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities. If `object` was created by `blrm`, the result is a 3-dimensional array with the posterior draws as the first dimension. #' @export #' @author Frank Harrell #' @seealso <https://hbiostat.org/R/Hmisc/markov/> #' @md soprobMarkovOrdm <- function(object, data, times, ylevels, absorb=NULL, tvarname='time', pvarname='yprev', gap=NULL) { cl <- class(object)[1] ftypes <- c(lrm='rms', orm='rms', blrm='rmsb', vglm='vgam', vgam='vgam') ftype <- ftypes[cl] if(is.na(ftype)) stop(paste('object must be a fit from one of:', paste(ftypes, collapse=' '))) ## For VGAM objects, predict() did not find the right function when ## inside the rms package prd <- switch(ftype, rms =function(obj, data) predict(obj, data, type='fitted.ind'), vgam=function(obj, data) VGAM::predict(obj, data, type='response'), rmsb=function(obj, data) predict(obj, data, type='fitted.ind', posterior.summary='all')) if(pvarname %nin% names(data)) stop(paste(pvarname, 'is not in data')) if(length(absorb) && (pvarname %in% absorb)) stop('initial state cannot be an absorbing state') nd <- if(ftype == 'rmsb' && length(object$draws)) nrow(object$draws) else 0 if((nd == 0) != (ftype != 'rmsb')) stop('model fit inconsistent with having posterior draws') k <- length(ylevels) s <- length(times) P <- if(nd == 0) array(NA, c(s, k), dimnames=list(as.character(times), as.character(ylevels))) else array(NA, c(nd, s, k), dimnames=list(paste('draw', 1 : nd), as.character(times), as.character(ylevels))) # Never uncondition on initial state data[[tvarname]] <- times[1] if(length(gap)) data[[gap]] <- times[1] data <- as.data.frame(data) p <- prd(object, data) if(nd == 0) P[1, ] <- p else P[, 1, ] <- p # cp: matrix of conditional probabilities of Y conditioning on previous time Y # Columns = k conditional probabilities conditional on a single previous state # Rows = all possible previous states # This is for a single posterior draw (or for a frequentist fit) rnameprev <- paste('t-1', ylevels) rnameprevna <- paste('t-1', setdiff(ylevels, absorb)) if(length(absorb)) { rnamepreva <- paste('t-1', absorb) cnamea <- paste('t', absorb) } cp <- matrix(0., nrow=k, ncol=k, dimnames=list(rnameprev, paste('t', ylevels))) ## cp is initialized to zero, which will remain the values for ## probabilities of moving out of absorbing (row) states ## Set probabilities of staying in absorbing states to 1 if(length(absorb)) cp[cbind(rnamepreva, cnamea)] <- 1. data <- as.list(data) yna <- setdiff(ylevels, absorb) # non-absorbing states data[[pvarname]] <- yna # don't request estimates for absorbing states edata <- expand.grid(data) for(it in 2 : s) { edata[[tvarname]] <- times[it] if(length(gap)) edata[[gap]] <- times[it] - times[it - 1] pp <- prd(object, edata) if(nd == 0) { ## If there are absorbing states, make a bigger version of ## the cell probability matrix that includes them ## Rows representing absorbing states have P(stating in that state)=1 cp[rnameprevna, ] <- pp ## Compute unconditional probabilities of being in all possible states ## at current time t P[it, ] <- t(cp) %*% P[it - 1, ] } else { for(idraw in 1 : nd) { cp[rnameprevna, ] <- pp[idraw, ,] P[idraw, it, ] <- t(cp) %*% P[idraw, it - 1, ] } } } P } utils::globalVariables(c('id', 'group', 'event', ':=', 'ys')) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/GiniMd.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000001074�12761051114�013051� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## David HA (1968): Gini's mean difference rediscovered. Biometrika 55 No. 3 ## p. 573-575 ## For binary x, is 2*n*p*(1-p)/(n-1) = approx 2*p*(1-p) ## For trinomial with value A (frequency a) B (freq b) C (freq c): ## 2*(ab|A-B| + ac|A-C|+bc|B-C|)/[n(n-1)] ## = 2n/(n-1) * [ PaPb|A-B| + PaPc|A-C| + PbPb|B-C| ] GiniMd<- function(x, na.rm=FALSE) { if(na.rm) { k <- is.na(x) if(any(k)) x <- x[! k] } n <-length(x) if(n < 2) return(NA) w <- 4 * ((1 : n) - (n - 1) / 2) / n / (n - 1) sum(w * sort(x - mean(x))) ## center for numerical stability only } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/spearman.test.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000001453�12243661443�014476� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Spearman correlation test (p=1) or Spearman test extended by adding ## rank(x)^2 to model (p=2) ## F Harrell 30Sep90 spearman.test <- function(x,y,p=1) { x <- as.numeric(x); y <- as.numeric(y) ## 17Jul97 if(length(x)!=length(y)) stop("length of x must = length of y") nomiss <- !is.na(x+y) n <- sum(nomiss) if(n<3) stop("fewer than 3 non-missing x-y pairs") if(!(p==1 | p==2)) stop("p must be 1 or 2") x <- x[nomiss] x <- rank(x) y <- y[nomiss] y <- rank(y) sst <- sum((y-mean(y))^2) if(p==2) x <- cbind(x,x^2) sse <- sum((lsfit(x,y)$residuals)^2) rsquare <- 1-sse/sst df2 <- n-p-1 fstat <- rsquare/p/((1-rsquare)/df2) pvalue <- 1-pf(fstat,p,df2) x <- c(rsquare,fstat,p,df2,pvalue,n) names(x) <- c("Rsquare","F","df1","df2","pvalue","n") x } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/labcurve.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000076070�14112731327�013520� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������labcurve <- function(curves, labels=names(curves), method=NULL, keys=NULL, keyloc=c('auto','none'), type='l', step.type=c('left','right'), xmethod=if(any(type=='s')) 'unique' else 'grid', offset=NULL, xlim=NULL, tilt=FALSE, window=NULL, npts=100, cex=NULL, adj='auto', angle.adj.auto=30, lty=pr$lty, lwd=pr$lwd, col.=pr$col, transparent=TRUE, arrow.factor=1, point.inc=NULL, opts=NULL, key.opts=NULL, empty.method=c('area','maxdim'), numbins=25, pl=!missing(add), add=FALSE, ylim=NULL, xlab="", ylab="", whichLabel=1:length(curves), grid=FALSE, xrestrict=NULL, ...) { if(pl && !add) { plot.new(); par(new=TRUE) # enables strwidth etc. } oxpd <- par('xpd') par(xpd=NA) on.exit(par(xpd=oxpd)) gfun <- ordGridFun(grid) ## see Misc.s gun <- gfun$unit diffu <- function(v) diff(unclass(v)) # mainly for POSIXt ## also look at difftime mcurves <- missing(curves) pr <- par(c('cex','col','lwd','lty')) if(!mcurves) { nc <- length(curves) type <- rep(type, length.out=nc) lty <- rep(lty, length.out=nc) lwd <- rep(lwd, length.out=nc) col. <- rep(col., length.out=nc) for(i in 1:nc) { z <- curves[[i]] if(pl && !add) { if(i==1) { xlm <- range(z[[1]],na.rm=TRUE) ylm <- range(z[[2]],na.rm=TRUE) } else { xlm <- range(xlm,z[[1]],na.rm=TRUE) ylm <- range(ylm,z[[2]],na.rm=TRUE) } } if(length(a <- z$type)) type[i] <- a if(length(a <- z$lty)) lty[i] <- a if(length(a <- z$lwd)) lwd[i] <- a if(length(a <- z$col)) col.[i] <- a } } ## Optionally bring arguments from opts as if they were listed outside opts ## This is used when opts is passed through to a function calling labcurve if(length(opts) && is.list(opts)) { names.opts <- names(opts) full.names <- c('labels','method','keys','keyloc','type','step.type', 'xmethod','offset','xlim','tilt','window','npts','cex', 'adj','angle.adj.auto','lty','lwd','col.','n.auto.keyloc', 'transparent','arrow.factor','point.inc','key.opts', 'empty.method','numbins','ylim','xlab','ylab') i <- charmatch(names.opts, full.names, -1) if(any(i < 1)) stop(paste('Illegal elements in opts:', paste(names.opts[i < 1], collapse=' '))) for(j in 1:length(opts)) assign(full.names[i[j]],opts[[j]],immediate=TRUE) } if(mcurves) nc <- length(labels) else if(!is.logical(labels) && nc != length(labels)) stop('length of labels is not equal to # curves') type <- rep(type, length.out=nc) lty <- rep(lty, length.out=nc) lwd <- rep(lwd, length.out=nc) col. <- rep(col., length.out=nc) if(pl) { if(mcurves) stop('curves must be given if pl=T') if(!add) { if(!length(xlim)) xlim <- xlm if(!length(ylim)) ylim <- ylm namcur <- names(curves[[1]]) if(!is.expression(xlab) && xlab=='' && length(namcur)) xlab <- namcur[1] if(!is.expression(ylab) && ylab=='' && length(namcur)) ylab <- namcur[2] if(grid) stop("grid=TRUE when pl=TRUE is not yet implemented") else plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type='n', xaxt='n') if(inherits(xlim,'POSIXt') || inherits(xlim,'POSIXct')) axis.POSIXct(1) else if(inherits(xlim,'Date')) axis.Date(1) else axis(1) pr <- par(c('cex','col','lwd','lty')) } for(i in 1:nc) { z <- curves[[i]] gfun$lines(z[[1]], z[[2]], type=type[i], lty=lty[i], lwd=lwd[i], col=col.[i], xpd=FALSE) } } if(length(method) && method=='none') return(invisible()) pr <- parGrid(grid) usr <- pr$usr; uin <- pr$uin is.keys <- length(keys) > 0 lines.keys <- length(keys)==1 && is.character(keys) && keys=='lines' if(!length(method)) { if(is.keys) method <- if(is.numeric(keys) || lines.keys) 'on top' else 'offset' else method <- 'offset' } ## Expand abbreviations for method - couldn't use match.arg possible.methods <- c('offset','on top','arrow','mouse','locator') i <- charmatch(method, possible.methods, -1) if(i < 1) stop(paste('method must be one of ', paste(possible.methods,collapse=' '))) method <- possible.methods[i] if(!length(cex)) cex <- pr$cex if(mcurves && method %nin% c('mouse','locator')) stop('must specify curves unless method="mouse" or "locator"') if(!lines.keys && is.keys && length(keys) != nc) stop('number of keys must = number of curves') if(method %in% c('mouse','locator')) { if(adj=='auto') adj <- .5 xt <- yt <- numeric(nc) for(i in 1:nc) { if(i %in% whichLabel) { cat('\nPosition pointer to desired center of curve label and click for', labels[i],'\n') lab.pos <- locator(1) xt[i] <- lab.pos$x yt[i] <- lab.pos$y gfun$text(lab.pos, labels[i], cex=cex, adj=adj, col=col.[i], ...) } } return(invisible(list(x=xt, y=yt, offset=0, adj=adj, cex=cex, angle=0, col=col., lwd=lwd, key.opts=key.opts, ...))) } if(is.character(keyloc)) keyloc <- match.arg(keyloc) empty.method <- match.arg(empty.method) if(!length(offset)) offset <- if(grid) unit(.75,"strheight","m") else strheight('m','user', cex)*.75 if(!length(xlim)) xlim <- usr[1:2] if(!length(ylim)) ylim <- usr[3:4] if(nc==1) { ci <- curves[[1]] xx <- ci[[1]]; yy <- ci[[2]] s <- is.finite(xx+yy) xx <- xx[s]; yy <- yy[s] imid <- trunc((length(xx)+1)/2) adj <- if(is.character(adj))0.5 else adj if(any(whichLabel==1)) gfun$text(xt <- gun(xx[imid]), yt <- gun(yy[imid])+offset, labels, cex=cex, adj=adj, col=col., ...) return(invisible(list(x=xt, y=yt, offset=offset, adj=adj, cex=cex, col=col., lwd=lwd, angle=0, key.opts=key.opts, ...))) } if(xmethod %nin% c('grid','unique')) stop('xmethod must be "grid" or "unique"') step.type <- match.arg(step.type) if(is.character(adj)) { adj.does.vary <- TRUE adj.needs.to.vary <- TRUE adj <- rep(.5, nc) } else { adj.does.vary <- length(adj) > 1 adj.needs.to.vary <- FALSE adj <- rep(adj, length.out=nc) } if(xmethod=='grid') xs <- seq(xlim[1],xlim[2],length.out=npts) else { xs <- unlist(sapply(curves, function(z)z[[1]])) xs <- sort(unique(xs[!is.na(xs)])) xs <- xs[xs>=xlim[1] & xs<=xlim[2]] } ys <- matrix(NA, nrow=length(xs), ncol=nc) rng <- matrix(NA, nrow=2, ncol=nc) for(i in 1:nc) { ci <- curves[[i]] xx <- ci[[1]]; yy <- ci[[2]] s <- is.finite(xx+yy) xx <- xx[s] y <- approx(xx, yy[s], xout=xs, f=if(step.type=='left') 0 else 1, method=if(type[i]=='l') "linear" else "constant")$y y <- pmax(pmin(y,usr[4]),usr[3]) ## Where one curve is not defined, consider this gap to have an ordinate ## that is far from the other curves so labels where be placed where ## the other curves haven't started or after they've ended y[is.na(y)] <- 1e10 ys[,i] <- y rxx <- range(xx) if(length(xrestrict)) { rxx[1] <- max(rxx[1],xrestrict[1]) rxx[2] <- min(rxx[2],xrestrict[2]) } rng[,i] <- rxx ## Save real range of each x-vector so candidates for labeling ## will be where the curve really exists } if(method=='on top' && is.keys && is.numeric(keys)) { ## Draw periodic symbols sym <- function(curve, pch, inc, offset, type, step.type, col., grid, gfun) { x <- curve[[1]]; y <- curve[[2]] s <- is.finite(x+y) x <- x[s]; y <- y[s] if(length(x) < 2) stop("when specifying numeric keys (pch) you must have >=2 data points") lim <- range(x) xx <- if(grid) convertX(gun(seq(lim[1],lim[2],by=inc) + offset), 'native', valueOnly=TRUE) else seq(lim[1], lim[2], by=inc) + offset if(length(xx)>1) xx <- xx[-1] xx <- xx[xx<=lim[2]] if(length(xx)==0) warning('curve was too short to mark with a symbol.\nMay want to change point.inc or xmethod for labcurve') else { yy <- approx(x, y, xout=xx, method=if(type=='l') 'linear' else 'constant', f=if(step.type=='left') 0 else 1)$y gfun$points(xx, yy, pch=pch, col=col.) } } if(!length(point.inc)) point.inc <- diffu(xlim)/5 for(i in 1:nc) sym(curves[[i]], keys[i], point.inc, (i-1)*point.inc/nc, type[i], step.type, col.=col.[i], grid, gfun) xt <- yt <- NULL } else { xt <- yt <- direction <- numeric(nc) angle <- rep(0,nc) g <- function(x) { ## finds min(abs(x)) but keeps original sign ax <- abs(x) if(all(is.na(ax))) return(NA) w <- min(ax, na.rm=TRUE) (x[ax==w])[1] #use first occurrence } for(i in 1:nc) { yi <- ys[,i] yi[xs<rng[1,i] | xs>rng[2,i]] <- NA diffmat <- ys[,-i,drop=FALSE] - yi mindiff <- apply(diffmat, 1, g) z <- abs(mindiff)==max(abs(mindiff),na.rm=TRUE) maxid <- min(c(1:length(mindiff))[z], na.rm=TRUE) xt[i] <- xs[maxid] yt[i] <- ys[maxid,i] if(!is.na(mindiff[maxid])) direction[i] <- 1-2*(mindiff[maxid]>0) yto <- yt[i] + direction[i] * (if(grid) convertY(offset,'native',valueOnly=TRUE) else offset) if(!is.na(yto)) if(yto >= usr[4] || yto <= usr[3]) direction[i] <- -direction[i] ## Find slope of curve i at xt[i] if(tilt || adj.needs.to.vary) { angle[i] <- if(type[i]=='s') 0 else { ci <- curves[[i]] xx <- ci[[1]]; yy <- ci[[2]] s <- is.finite(xx+yy) w <- if(length(window)) window else { nch <- if(lines.keys) nchar(labels[i]) else if(is.keys) 1*is.numeric(keys) + nchar(keys[i])*is.character(keys) else nchar(labels[i]) w <- if(grid) nch*convertX(unit(.75,"strwidth","m"), 'native',valueOnly=TRUE) else nch*strwidth('m','user',cex) } yy <- approx(xx[s], yy[s], xout=c(xt[i]-w/2,xt[i]+w/2), rule=2)$y slope <- diff(yy)/w 180*atan(slope*uin[2]/uin[1])/pi } } if(adj.needs.to.vary) { adj[i] <- if(type[i]=='s') 1*(direction[i]<0) else { if(is.na(angle[i]) || abs(angle[i])<=angle.adj.auto) .5 else if((direction[i]<0 && slope>0) || (direction[i]>0 && slope<0)) 0 else 1 } } } if(!tilt) angle[] <- 0 if(!lines.keys && method=='offset' && (!is.logical(labels) || labels)) { if(is.keys) { if(is.numeric(keys)) for(i in 1:nc) gfun$points(xt[i], (gun(yt) + direction*offset)[i], pch=keys[i], col=col.[i]) else if(i %in% whichLabel) gfun$text(xt, gun(yt) + direction*offset, keys, cex=cex, adj=adj[1], col=col., ...) } else { if(tilt || adj.does.vary) for(i in whichLabel) gfun$text(xt[i], gun(yt[i])+direction[i]*offset, labels[i], cex=cex, srt=angle[i], adj=adj[i], col=col.[i],...) else gfun$text(xt, gun(yt)+direction*offset, labels, cex=cex, adj=adj[1], col=col., ...) } } retlist <- list(x=xt, y=yt, offset=direction*offset, adj=adj, cex=cex, col=col., lwd=lwd, angle=if(tilt) angle, key.opts=key.opts, ...) } if(method %in% c('on top','arrow') && (!is.logical(labels) || labels)) { retlist <- list(x=xt, y=yt, offset=0, adj=.5, cex=cex, col=col., lwd=lwd, angle=0, key.opts=key.opts, ...) if(method == 'on top' && !lines.keys) { if(is.keys) { if(is.character(keys)) gfun$text(xt, yt, keys, cex=cex, col=col., adj=.5, ...) ## numeric keys (periodic plotting symbols) already handled above } else gfun$text(xt, yt, labels, cex=cex, col=col., adj=.5, ...) } else if(method=='arrow') { ydelta <- if(grid) unit(1/17,'npc') else diffu(ylim)/17 xdelta <- if(grid) unit(1/26,'npc') else diffu(xlim)/26 lab.pos <- list(x=gun(xt) + xdelta*arrow.factor, y=gun(yt) + ydelta*arrow.factor) gfun$arrows(gun(xt)+xdelta*.6*arrow.factor, gun(yt)+ydelta*.6*arrow.factor, xt,yt,open=TRUE,size=.06,col=col.) gfun$text(lab.pos, labels, cex=cex, col=col., ...) } } if(is.keys && (!is.character(keyloc) || keyloc!='none')) { ## Make legend s <- whichLabel if(is.character(keyloc) && keyloc=='auto') { ## Find emptiest spot for drawing legend by finding ## center of largest empty rectangle large enough to hold ## this rectangle Xs <- rep(xs, nc) Ys <- as.vector(ys) putKeyEmpty(Xs, Ys, labels=if(lines.keys || is.numeric(keys)) labels[s] else paste(keys,' ',labels, sep='')[s], pch=if(is.numeric(keys)) keys[s], lty=lty[s], lwd=lwd[s], cex=cex, col=col.[s], transparent=transparent, plot=TRUE, key.opts=key.opts, xlim=xlim, ylim=ylim, grid=grid) } else putKey(keyloc, labels=if(lines.keys || is.numeric(keys)) labels[s] else paste(keys,' ',labels, sep='')[s], pch=if(is.numeric(keys)) keys[s], lty=lty[s], lwd=lwd[s], cex=cex, col=col.[s], transparent=transparent, plot=TRUE, key.opts=key.opts, grid=grid) } invisible(retlist) } ## Version of legend for R that implements plot=FALSE, adds grid=TRUE ## Also defaults lty, lwd, pch to NULL and checks for length>0 rather ## than missing(), so it's easier to deal with non-applicable parameters ## ## rlegendg is better to use when grid is in effect. In R 2.0, you ## can't use strwidth etc. after a lattice drawing has been rendered rlegendg <- function(x, y, legend, col=pr$col[1], lty=NULL, lwd=NULL, pch=NULL, cex=pr$cex[1], other=NULL) { pr <- par() if(is.list(x)) { y <- x[[2]] x <- x[[1]] } do.lines <- (length(lty) && any(lty > 0)) || length(lwd) do.points <- length(pch) cmd <- NULL if(do.lines) cmd$lines <- list(col=col, lty=lty, lwd=lwd) if(do.points) cmd$points<- list(col=col, pch=pch, cex=cex) cmd$text <- list(lab=legend) if(length(other)) cmd <- c(cmd, other) draw.key(cmd, draw=TRUE, vp=viewport(x=unit(x,'npc'),y=unit(y,'npc'))) invisible() } rlegend <- function (x, y, legend, fill, col = "black", lty=NULL, lwd=NULL, pch=NULL, angle = NULL, density = NULL, bty = "o", bg = par("bg"), pt.bg = NA, cex = 1, xjust = 0, yjust = 1, x.intersp = 1, y.intersp= 1, adj = 0, text.width = NULL, merge = do.lines && has.pch, trace = FALSE, ncol = 1, horiz = FALSE, plot=TRUE, grid=FALSE, ...) { gfun <- ordGridFun(grid) ## see Misc.s if (is.list(x)) { if (!missing(y)) { if (!missing(legend)) stop("`y' and `legend' when `x' is list (need no `y')") legend <- y } y <- x$y x <- x$x } else if (missing(y)) stop("missing y") if (!is.numeric(x) || !is.numeric(y)) stop("non-numeric coordinates") if ((nx <- length(x)) <= 0 || nx != length(y) || nx > 2) stop("invalid coordinate lengths") xlog <- par("xlog") ylog <- par("ylog") rect2 <- function(left, top, dx, dy, ...) { r <- left + dx if (xlog) { left <- 10^left r <- 10^r } b <- top - dy if (ylog) { top <- 10^top b <- 10^b } gfun$rect(left, top, r, b, angle = angle, density = density, ...) } segments2 <- function(x1, y1, dx, dy, ...) { x2 <- x1 + dx if (xlog) { x1 <- 10^x1 x2 <- 10^x2 } y2 <- y1 + dy if (ylog) { y1 <- 10^y1 y2 <- 10^y2 } gfun$segments(x1, y1, x2, y2, ...) } points2 <- function(x, y, ...) { if (xlog) x <- 10^x if (ylog) y <- 10^y gfun$points(x, y, ...) } text2 <- function(x, y, ...) { if (xlog) x <- 10^x if (ylog) y <- 10^y gfun$text(x, y, ...) } if (trace) catn <- function(...) do.call("cat", c(lapply(list(...), formatC), list("\n"))) pr <- parGrid(grid) cin <- pr$cin Cex <- (if(length(unique(cex)) > 1) mean(cex,na.rm=TRUE) else cex) * pr$cex if (!length(text.width)) text.width <- max(strwidth(legend, units = "user", cex = cex)) else if (!is.numeric(text.width) || text.width < 0) stop("text.width must be numeric, >= 0") xc <- Cex * xInch(cin[1], warn.log = FALSE, grid=grid) yc <- Cex * yInch(cin[2], warn.log = FALSE, grid=grid) xchar <- xc yextra <- yc * (y.intersp - 1) ymax <- max(yc, strheight(legend, units = "user", cex = cex)) ychar <- yextra + ymax if (trace) catn(" xchar=", xchar, "; (yextra,ychar)=", c(yextra, ychar)) if (!missing(fill)) { xbox <- xc * 0.8 ybox <- yc * 0.5 dx.fill <- xbox } do.lines <- (length(lty) && any(lty > 0)) || length(lwd) n.leg <- length(legend) n.legpercol <- if (horiz) { if (ncol != 1) warning(paste("horizontal specification overrides: Number of columns :=", n.leg)) ncol <- n.leg 1 } else ceiling(n.leg/ncol) if (has.pch <- length(pch)) { if (is.character(pch) && nchar(pch[1]) > 1) { if (length(pch) > 1) warning("Not using pch[2..] since pch[1] has multiple chars") np <- nchar(pch[1]) pch <- substr(rep(pch[1], np), 1:np, 1:np) } if (!merge) dx.pch <- x.intersp/2 * xchar } x.off <- if (merge) -0.7 else 0 ## if (xlog) x <- log10(x) 25Oct13 ## if (ylog) y <- log10(y) if (nx == 2) { x <- sort(x) y <- sort(y) left <- x[1] top <- y[2] w <- diff(x) h <- diff(y) w0 <- w/ncol x <- mean(x) y <- mean(y) if (missing(xjust)) xjust <- 0.5 if (missing(yjust)) yjust <- 0.5 } else { h <- n.legpercol * ychar + yc w0 <- text.width + (x.intersp + 1) * xchar if (!missing(fill)) w0 <- w0 + dx.fill if (has.pch && !merge) w0 <- w0 + dx.pch if (do.lines) w0 <- w0 + (2 + x.off) * xchar w <- ncol * w0 + 0.5 * xchar left <- x - xjust * w top <- y + (1 - yjust) * h } if (bty != "n") { if (trace) catn(" rect2(", left, ",", top, ", w=", w, ", h=", h, "...)", sep = "") if(plot) rect2(left, top, dx = w, dy = h, col = bg) ## FEH } xt <- left + xchar + (w0 * rep(0:(ncol - 1), rep(n.legpercol, ncol)))[1:n.leg] yt <- top - (rep(1:n.legpercol, ncol)[1:n.leg] - 1) * ychar - 0.5 * yextra - ymax if (!missing(fill)) { fill <- rep(fill, length.out = n.leg) if(plot) rect2(left = xt, top = yt + ybox/2, dx = xbox, dy = ybox, col = fill) xt <- xt + dx.fill } if (has.pch || do.lines) col <- rep(col, length.out = n.leg) if (do.lines) { seg.len <- 2 ok.l <- if (!length(lty)) { lty <- 1 TRUE } else lty > 0 if (!length(lwd)) lwd <- pr$lwd lty <- rep(lty, length.out = n.leg) lwd <- rep(lwd, length.out = n.leg) if (trace) catn(" segments2(", xt[ok.l] + x.off * xchar, ",", yt[ok.l], ", dx=", seg.len * xchar, ", dy=0, ...)", sep = "") if(plot) segments2(xt[ok.l] + x.off * xchar, yt[ok.l], dx = seg.len * xchar, dy = 0, lty = lty[ok.l], lwd = lwd[ok.l], col = col[ok.l]) xt <- xt + (seg.len + x.off) * xchar } if (has.pch) { pch <- rep(pch, length.out = n.leg) pt.bg <- rep(pt.bg, length.out = n.leg) ok <- is.character(pch) | pch >= 0 x1 <- (if (merge) xt - (seg.len/2) * xchar else xt)[ok] y1 <- yt[ok] if (trace) catn(" points2(", x1, ",", y1, ", pch=", pch[ok], "...)") if(plot) points2(x1, y1, pch = pch[ok], col = col[ok], cex = cex, bg = pt.bg[ok]) if (!merge) xt <- xt + dx.pch } xt <- xt + x.intersp * xchar if(plot) text2(xt, yt, labels = legend, adj = adj, cex = min(cex, na.rm=TRUE)) invisible(list(rect = list(w = w, h = h, left = left, top = top), text = list(x = xt, y = yt))) } putKey <- function(z, labels, type=NULL, pch=NULL, lty=NULL, lwd=NULL, cex=par('cex'), col=rep(par('col'),nc), transparent=TRUE, plot=TRUE, key.opts=NULL, grid=FALSE) { nc <- length(labels) if(!length(pch)) pch <- rep(NA, nc) if(!length(lty)) lty <- rep(NA, nc) if(!length(lwd)) lwd <- rep(NA, nc) pp <- !is.na(pch) lp <- !is.na(lty) | !is.na(lwd) lwd <- ifelse(is.na(lwd), par('lwd'), lwd) if(!length(type)) type <- ifelse(!(pp | lp), 'n', ifelse(pp & lp, 'b', ifelse(pp, 'p', 'l'))) pch <- ifelse(is.na(pch) & type!='p' & type!='b', NA, pch) lty <- ifelse(is.na(lty) & type=='p', NA, lty) lwd <- ifelse(is.na(lwd) & type=='p', 1, lwd) cex <- ifelse(is.na(cex) & type!='p' & type!='b', 1, cex) if(any(is.na(lwd))) stop("lwd can not be NA for type='l' or 'b'") if(any(is.na(cex))) stop("cex can not be NA for type='p' or 'b'") m <- list() m[[1]] <- as.name(if(grid) 'draw.key' else 'rlegend') if(!grid) { m$x <- z[[1]]; m$y <- z[[2]] } if(grid) { w <- list(text=list(labels, col=col)) if(!(all(is.na(lty)) & all(is.na(lwd)))) { lns <- list() if(!all(is.na(lty))) lns$lty <- lty if(!all(is.na(lwd))) lns$lwd <- lwd lns$col <- col w$lines <- lns } if(!all(is.na(pch))) w$points <- list(pch=pch, col=col) m$key <- c(w, key.opts) m$draw <- plot if(plot) m$vp <- viewport(x=unit(z[[1]], 'native'), y=unit(z[[2]], 'native')) z <- eval(as.call(m)) size <- if(plot) c(NA,NA) else c(convertUnit(grobWidth(z), 'native', 'x', 'location', 'x', 'dimension', valueOnly=TRUE)[1], convertUnit(grobHeight(z), 'native', 'y', 'location', 'y', 'dimension', valueOnly=TRUE)[1]) return(invisible(size)) } else { m$legend <- labels m$xjust <- m$yjust <- .5 m$plot <- plot m$col <- col m$cex <- cex if(!all(is.na(lty))) m$lty <- lty if(!all(is.na(lwd))) m$lwd <- lwd if(!all(is.na(pch))) m$pch <- pch if(length(key.opts)) m[names(key.opts)] <- key.opts w <- eval(as.call(m))$rect return(invisible(c(w$w[1], w$h[1]))) } m$transparent <- transparent m$corner <- c(.5,.5) m$plot <- plot m$type <- type if(!plot) labels <- substring(labels, 1, 10) ## key gets length wrong for long labels m$text <- list(labels, col=col) if(all(type=='p')) m$points <- list(pch=pch, cex=cex, col=col) else m$lines <- if(any(type!='l')) list(lty=lty, col=col, lwd=lwd, pch=pch, cex=cex) else list(lty=lty, col=col, lwd=lwd) if(length(key.opts)) m[names(key.opts)] <- key.opts invisible(eval(as.call(m))) ## execute key(....) } putKeyEmpty <- function(x, y, labels, type=NULL, pch=NULL, lty=NULL, lwd=NULL, cex=par('cex'), col=rep(par('col'),nc), transparent=TRUE, plot=TRUE, key.opts=NULL, empty.method=c('area','maxdim'), numbins=25, xlim=pr$usr[1:2], ylim=pr$usr[3:4], grid=FALSE) { nc <- length(labels) empty.method <- match.arg(empty.method) pr <- parGrid(grid) uin <- pr$uin uin <- 1 ## already in x,y units z <- putKey(list(0, 0), labels, type, pch, lty, lwd, cex, col, transparent=transparent, plot=FALSE, key.opts=key.opts, grid=grid)/uin ## /uin converts to x,y units ## Find center of largest empty rectangle large enough to hold ## this rectangle s <- is.finite(x + y) if(length(xlim)) s <- s & (x >= xlim[1] & x <= xlim[2]) if(length(ylim)) s <- s & (y >= ylim[1] & y <= ylim[2]) x <- x[s] y <- y[s] keyloc <- largest.empty(x, y, xlim=xlim, ylim=ylim, width=z[1], height=z[2], method=empty.method, numbins=numbins, grid=grid) if(is.na(keyloc$x)) { cat('No empty area large enough for automatic key positioning. Specify keyloc or cex.\n') cat('Width and height of key as computed by key(), in data units:', format(z),'\n') return(keyloc) } else if(plot) putKey(keyloc, labels, type, pch, lty, lwd, cex, col, transparent, plot=TRUE, key.opts=key.opts, grid=grid) invisible(keyloc) } largest.empty <- function(x, y, width=0, height=0, numbins=25, method=c('exhaustive','rexhaustive','area','maxdim'), xlim=pr$usr[1:2], ylim=pr$usr[3:4], pl=FALSE, grid=FALSE) { method <- match.arg(method) if(missing(xlim) || missing(ylim)) pr <- parGrid(grid) isna <- is.na(x + y) if(any(isna)) { x <- x[!isna] y <- y[!isna] } n <- length(x) o <- order(y) x <- x[o] y <- y[o] itype <- 3 * (method=='exhaustive') + 4 * (method=='rexhaustive') + 1 * (method=='area') + 2 * (method=='maxdim') storage.mode(x) <- storage.mode(y) <- storage.mode(xlim) <- storage.mode(ylim) <- 'double' storage.mode(numbins) <- storage.mode(itype) <- storage.mode(n) <- 'integer' if(method %in% c('area','maxdim')) { storage.mode(width) <- storage.mode(height) <- 'double' } if(method %in% c('area','maxdim')) { a <- .Fortran(F_largrec, x, y, n, xlim, ylim, width, height, numbins, itype, rx=double(2), ry=double(2)) x <- a$rx if(any(x > 1e29)) { warning('no empty rectangle was large enough') return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA)) } y <- a$ry } else if(method=='rexhaustive') { ## Author: Hans Borchers <hwborchers@googlemail.com> maxEmptyRect <- function(ax, ay, x, y, width=0, height=0) { n <- length(x) d <- sort(c(ax, x)) D <- diff(d) m <- which.max(D) ## check vertical slices mgap <- D[m] maxr <- mgap * (ay[2] - ay[1]) maxR <- c(d[m], ay[1], d[m+1], ay[2]) ## o <- order(y) ## X <- x[o]; Y <- y[o] for (i in 1:n) { tl <- ax[1]; tr <- ax[2] if (i < n) { for (j in (i+1):n) { if (x[j] > tl && x[j] < tr) { ## check horizontal slices (j == i+1) ## and (all) rectangles above (x[i], y[i]) area <- (tr-tl)*(y[j]-y[i]) if (area > maxr && ((tr - tl) > width) && ((y[j] - y[i]) > height)) { maxr <- area maxR <- c(tl, y[i], tr, y[j]) } if (x[j] > x[i]) tr <- x[j] else tl <- x[j] } } } ## check open rectangles above (x[i], y[i]) area <- (tr-tl)*(ay[2]-y[i]) if (area > maxr && ((tr - tl) > width) && ((ay[2] - y[i]) > height )){ maxr <- area maxR <- c(tl, y[i], tr, ay[2]) } ## check open rectangles below (x[i], y[i]) ri <- min(ax[2], x[y < y[i] & x > x[i]]) li <- max(ax[1], x[y < y[i] & x < x[i]]) area <- (ri - li)*(ay[2] - y[i]) if (area > maxr && ((ri - li) > width) && ((y[i] - ay[1]) > height)) { maxr <- area maxR <- c(li, ay[1], ri, y[i]) } } return(list(x=maxR[c(1,3)], y=maxR[c(2,4)])) } a <- maxEmptyRect(xlim, ylim, x, y, width, height) x <- a$x y <- a$y if(diff(x) < width || diff(y) < height) { warning('no empty rectangle was large enough') return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA)) } } else { d <- sort(c(xlim, x)) D <- diff(d) m <- which.max(D) a <- .Fortran(F_maxempr, xlim, ylim, x, y, n, as.double(width), as.double(height), as.double(c(D[m], d[m], d[m+1])), area=double(1), rect=double(4)) x <- a$rect[c(1,3)] y <- a$rect[c(2,4)] if(diff(x) < width || diff(y) < height) { warning('no empty rectangle was large enough') return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA)) } } rectx <- x[c(1,2,2,1)] recty <- y[c(1,1,2,2)] if(pl) ordGridFun(grid)$polygon(rectx, recty, col=1+itype) structure(list(x=mean(x), y=mean(y), rect=list(x=rectx, y=recty), area=diff(x)*diff(y))) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/stat-plsmo.r��������������������������������������������������������������������������������0000644�0001762�0000144�00000007222�14227357755�014030� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#' Add a lowess smoother without counfidence bands. #' #' Automatically selects \code{iter=0} for \code{lowess} if \code{y} is binary, otherwise uses \code{iter=3}. #' #' #' @param mapping,data,geom,position,show.legend,inherit.aes see ggplot2 documentation #' @param span see \code{f} argument to \code{lowess} #' @param fun a function to transform smoothed \code{y} #' @param fullrange should the fit span the full range of the plot, or just #' the data #' @param n number of points to evaluate smoother at #' @param na.rm If \code{FALSE} (the default), removes missing values with #' a warning. If \code{TRUE} silently removes missing values. #' @param ... other arguments are passed to smoothing function #' @return a data.frame with additional columns #' \item{y}{predicted value} #' @seealso #' \code{\link{lowess}} for \code{loess} smoother. #' @export #' @examples #' \donttest{ #' c <- ggplot(mtcars, aes(qsec, wt)) #' c + stat_plsmo() #' c + stat_plsmo() + geom_point() #' #' c + stat_plsmo(span = 0.1) + geom_point() #' #' # Smoothers for subsets #' c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl) #' c + stat_plsmo() + geom_point() #' c + stat_plsmo(fullrange = TRUE) + geom_point() #' #' # Geoms and stats are automatically split by aesthetics that are factors #' c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl))) #' c + stat_plsmo() + geom_point() #' c + stat_plsmo(aes(fill = factor(cyl))) + geom_point() #' c + stat_plsmo(fullrange=TRUE) + geom_point() #' #' # Example with logistic regression #' data("kyphosis", package="rpart") #' qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) + stat_plsmo() #' } stat_plsmo <- function (mapping = NULL, data = NULL, geom = "smooth", position = "identity", n = 80, fullrange = FALSE, span=2/3, fun=function(x) x, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { layer( stat = StatPlsmo, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( n = n, fullrange = fullrange, span = span, fun = fun, na.rm = na.rm, ...) ) } StatPlsmo <- ggplot2::ggproto("StatPlsmo", ggplot2::Stat, required_aes = c("x", "y"), setup_data = function(data, params) { if (!requireNamespace("plyr", quietly = TRUE)) stop("This function requires the 'plyr' package.") rows <- plyr::daply(data, "group", function(df) length(unique(df$x))) if (all(rows == 1) && length(rows) > 1) { message("geom_plsmo: Only one unique x value each group.", "Maybe you want aes(group = 1)?") return(data.frame()) } data }, compute_group = function(., data, scales, n=80, span=2/3, fun=function(x) x, fullrange=FALSE, xseq = NULL, na.rm = FALSE) { data <- remove_missing(data, na.rm, c("x", "y"), name="stat_plsmo") if (length(unique(data$x)) < 2) { # Not enough data to perform fit return(data.frame()) } if (is.null(xseq)) { if (is.integer(data$x)) { if (fullrange) { xseq <- scales$x$dimension() } else { xseq <- sort(unique(data$x)) } } else { if (fullrange) { range <- scales$x$dimension() } else { range <- range(data$x, na.rm = TRUE) } xseq <- seq(range[1], range[2], length.out = n) } } n_y <- length(unique(data$y[!is.na(data$y)])) z <- lowess(data$x, data$y, iter = if (n_y < 3) 0 else 3, f = span ) z <- approx(z, xout = xseq) z$y <- fun(z$y) as.data.frame(z) } ) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/summaryP.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000037274�13626753741�013552� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������summaryP <- function(formula, data=NULL, subset=NULL, na.action=na.retain, sort=TRUE, asna=c('unknown', 'unspecified'), ...) { formula <- Formula(formula) Y <- if(length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) X <- model.part(formula, data=Y, rhs=1) Y <- model.part(formula, data=Y, lhs=1) nY <- NCOL(Y) nX <- NCOL(X) namY <- names(Y) if(nX == 0) X <- data.frame(x=rep(1, NROW(Y))) else { ## Remove observations with any values of X NA i <- apply(is.na(X), 1, any) if(any(i)) { X <- X[! i,, drop=FALSE] Y <- Y[! i,, drop=FALSE] } } ux <- unique(X) Z <- NULL n <- nrow(X) Lev <- character(0) if(sort) { ## Compute marginal frequencies of all regular variables so can sort mfreq <- list() for(ny in namY) { y <- Y[[ny]] if(!inherits(y, 'ynbind') && !inherits(y, 'pBlock')) { if(length(asna) && (is.factor(y) || is.character(y))) y[y %in% asna] <- NA freq <- table(y) counts <- as.numeric(freq) names(counts) <- names(freq) mfreq[[ny]] <- - sort(- counts) } } } ## Save combinations of var and val to exclude if exclude1 is used in ## a function that operates on summaryP result ylevels.to.exclude1 <- NULL for(ny in namY) { y <- Y[[ny]] la <- label(y) if(la == '') la <- ny tab <- table(y) tab <- structure(as.numeric(tab), names=names(tab)) if(length(tab) == 2) ylevels.to.exclude1 <- rbind(ylevels.to.exclude1, data.frame(var=la, val=names(tab)[which.max(tab)])) } for(i in 1 : nrow(ux)) { j <- rep(TRUE, n) if(nX > 0) for(k in 1 : nX) j <- j & (X[[k]] == ux[i, k]) for(k in 1 : nY) { ## y <- yx[[k]] doesn't work as attributes lost by [.data.frame y <- Y[[k]] y <- if(is.matrix(y)) y[j,, drop=FALSE] else y[j] # y <- (Y[[k]])[j,, drop=FALSE] if(inherits(y, 'ynbind') || inherits(y, 'pBlock')) { overlab <- attr(y, 'label') labs <- attr(y, 'labels') z <- NULL for(iy in 1 : ncol(y)) { tab <- table(y[, iy]) no <- as.numeric(sum(tab)) if(inherits(y, 'ynbind')) { d <- data.frame(var=overlab, val=labs[iy], freq=as.numeric(tab['TRUE']), denom=no) Lev <- c(Lev, as.character(labs[iy])) } else { d <- data.frame(var=overlab, val=names(tab), # paste(labs[iy], names(tab)), freq=as.numeric(tab), denom=no) Lev <- c(Lev, names(tab)) } z <- rbind(z, d) } } else { # regular single column if(length(asna) && (is.factor(y) || is.character(y))) y[y %in% asna] <- NA tab <- table(y) ny <- namY[k] la <- label(y) if(la == '') la <- ny lev <- names(tab) ## mf <- mfreq[[ny]] no <- as.numeric(sum(tab)) if(sort) lev <- reorder(lev, (mfreq[[ny]])[lev]) Lev <- c(Lev, as.character(lev)) z <- data.frame(var = unname(la), val = lev, freq = as.numeric(tab), denom = no, stringsAsFactors=TRUE) } ## Add current X subset settings if(nX > 0) for(k in 1: nX) z[[names(ux)[k]]] <- if(is.character(ux[i,k])) factor(ux[i, k]) else ux[i,k] Z <- rbind(Z, z) } } Z$val <- factor(Z$val, levels=unique(Lev)) yl <- ylevels.to.exclude1 iex <- integer(0) if(length(yl)) { for(i in 1 : nrow(Z)) { exi <- FALSE for(j in 1 : nrow(yl)) if(as.character(Z$var[i]) == as.character(yl$var[j]) && as.character(Z$val[i]) == as.character(yl$val[j])) exi <- TRUE if(exi) iex <- c(iex, i) } } structure(Z, class=c('summaryP', 'data.frame'), formula=formula, nX=nX, nY=nY, rows.to.exclude1=iex) } plot.summaryP <- function(x, formula=NULL, groups=NULL, marginVal=NULL, marginLabel=marginVal, refgroup=NULL, exclude1=TRUE, xlim=c(-.05, 1.05), text.at=NULL, cex.values=0.5, key=list(columns=length(groupslevels), x=.75, y=-.04, cex=.9, col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)), outerlabels=TRUE, autoarrange=TRUE, col=colorspace::rainbow_hcl, ...) { ## marginval: category name indicating addMarginal summaries (usually 'All') ## marginLabel: a fuller label for this, e.g. 'All Regions' X <- x at <- attributes(x) Form <- at$formula nX <- at$nX nY <- at$nY groupslevels <- if(length(groups)) levels(x[[groups]]) condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups)) ## Reorder condvar in descending order of number of levels numu <- function(x) if(is.factor(x)) length(levels(x)) else length(unique(x[! is.na(x)])) if(exclude1 && length(at$rows.to.exclude1)) X <- X[- at$rows.to.exclude1, , drop=FALSE] if(autoarrange && length(condvar) > 1) { nlev <- sapply(X[condvar], numu) condvar <- condvar[order(nlev)] } if(grType() == 'plotly') { condvar <- setdiff(condvar, 'var') if(length(condvar) > 1) stop('with options(grType="plotly") does not handle > 1 stratification variable') if(length(condvar) == 1 && length(marginVal)) { X$Big <- X[[condvar]] == marginVal if(marginLabel != marginVal) X[[condvar]] <- ifelse(X$Big, marginLabel, as.character(X[[condvar]])) } X$.gg. <- if(length(groups)) { if(length(condvar) == 1 && length(marginVal)) ifelse(X$Big, as.character(X[[groups]]), paste0(X[[groups]], ' stratified<br>by ', condvar[1])) else X[[groups]] } p <- with(X, dotchartpl(freq / denom, major = var, minor = val, group = if(length(groups)) .gg., mult = if(length(condvar) > 0) X[[condvar]], big = if(length(condvar) == 1 && length(marginVal)) Big, num = freq, denom = denom, refgroup = refgroup, xlim = xlim, col = col, nonbigtracename=if(length(condvar)) paste0('Stratified by\n', condvar[1]) else 'Stratified Estimates', ...) ) return(p) } form <- if(length(formula)) formula else as.formula( paste('val ~ freq', paste(condvar, collapse=' * '), sep=' | ')) pan <- function(x, y, subscripts, groups=NULL, ...) { y <- as.numeric(y) denom <- X$denom[subscripts] panel.dotplot(x/denom, y, subscripts=subscripts, groups=groups, ...) if(length(cex.values) && cex.values > 0) { col <- if(length(groups)) trellis.par.get('superpose.symbol')$col else trellis.par.get('dot.symbol')$col longest.string <- paste(max(x), max(denom), sep='/ ') length.longest <- unit(1, 'strwidth', longest.string) xpos <- unit(1, 'npc') - unit(1, 'mm') txt <- if(length(groups)) { groups <- groups[subscripts] tx <- '' ig <- 0 xpos <- xpos - length(levels(groups)) * length.longest for(g in levels(groups)) { ig <- ig + 1 i <- groups == g fr <- paste(x[i], denom[i], sep='/') xpos <- xpos + length.longest grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'), just=c('right','top'), gp=gpar(cex=cex.values, col=col[ig])) } } else { fr <- paste(x, denom, sep='/') grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'), gp=gpar(cex=cex.values, col=col[1]), just=c('right','top')) } } } scal <- list(y='free', rot=0) scal$x <- if(length(text.at)) { at <- pretty(xlim) list(limits=range(c(xlim, text.at)), at=at[at >= -0.0001 & at <= 1.0001]) } else list(limits=xlim) d <- if(!length(groups)) dotplot(form, data=X, scales=scal, panel=pan, xlab='Proportion', ...) else eval(parse(text= sprintf("dotplot(form, groups=%s, data=X, scales=scal, panel=pan, auto.key=key, xlab='Proportion', ...)", groups) )) # if(outerlabels && ((nX - length(groups) + 1 == 2) || # length(dim(d)) == 2)) d <- useOuterStrips(d) if(length(dim(d)) == 2) d <- latticeExtra::useOuterStrips(d) ## Avoid wasting space for vertical variables with few levels if(condvar[length(condvar)] == 'var') { vars <- levels(X$var) nv <- length(vars) h <- integer(nv) for(i in 1 : nv) h[i] <- length(unique((X$val[X$var == vars[i]]))) d <- latticeExtra::resizePanels(d, h = h + 1) } d } ggplot.summaryP <- function(data, mapping, groups=NULL, exclude1=TRUE, xlim=c(0, 1), col=NULL, shape=NULL, size=function(n) n ^ (1/4), sizerange=NULL, abblen=5, autoarrange=TRUE, addlayer=NULL, ..., environment) { X <- data class(X) <- setdiff(class(X), 'summaryP') at <- attributes(X) Form <- at$formula nX <- at$nX nY <- at$nY groupslevels <- if(length(groups)) levels(X[[groups]]) condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups)) ## Reorder condvar in descending order of number of levels numu <- function(x) if(is.factor(x)) length(levels(x)) else length(unique(x[! is.na(x)])) if(exclude1 && length(at$rows.to.exclude1)) X <- X[- at$rows.to.exclude1, , drop=FALSE] if(autoarrange && length(condvar) > 1) { nlev <- sapply(X[condvar], numu) condvar <- condvar[order(nlev)] } ## Find list of variables that contain only one level but have a ## variable name longer than abblen characters. ## The space devoted ## to one-level variables is not tall enough to print the variable name. ## Replace the name with (1) (2) ... and put the variable names possibly ## in a footnote fnvar <- '' lvar <- levels(X$var) i <- 0 for(v in lvar) { maxlen <- nchar(v) # max(nchar(strsplit(v, split=' ')[[1]])) if(maxlen > abblen) { nlev <- length(unique(X$val[X$var == v])) if(nlev == 1) { i <- i + 1 w <- paste('(', i, ')', sep='') if(i > 1) fnvar <- paste(fnvar, '; ', sep='') fnvar <- paste(fnvar, w, ' ', v, sep='') levels(X$var)[levels(X$var) == v] <- w } } } spl <- function(x) { u <- levels(x) n <- length(u) utrans <- character(n); names(utrans) <- u for(w in u) utrans[w] <- paste(strwrap(w, 10), collapse='\n') factor(x, u, utrans) } X$var <- spl(X$var) if(length(condvar) == 2) { othvar <- setdiff(condvar, 'var') X[[othvar]] <- spl(X[[othvar]]) } N <- X$denom rN <- range(N) ratioN <- rN[2] / rN[1] if(diff(rN) < 10 | (ratioN < 1.2)) size <- NULL ## plotly hover label X$hov <- paste0(round(X$freq / X$denom, 3), ' ', markupSpecs$html$frac(X$freq, X$denom, size=90)) if(length(groups)) X$hov <- paste0(X[[groups]], '<br>', X$hov) k <- 'ggplot(X, aes(x=freq / denom, y=val, text=hov' if(length(groups)) k <- paste(k, sprintf(', color=%s, shape=%s', groups, groups)) k <- paste(k, '))') if(length(size)) { k <- paste(k, if(length(size)) 'geom_point(aes(size = N))' else 'geom_point()', sep=' + ') Ns <- X$denom if(! length(sizerange)) { fn <- if(is.function(size)) size else sqrt sizerange <- c(max(0.7, 2.7 / fn(ratioN)), 3.25) } if(is.function(size)) { X$N <- size(Ns) Ns0 <- Ns[Ns > 0] uN <- unique(sort(Ns0)) Nbreaks <- if(length(uN) < 8) uN else unique(round(quantile(Ns0, (0 : 6) / 6, type=1))) Nbreakst <- size(Nbreaks) k <- paste(k, 'scale_size_continuous(breaks=Nbreakst, labels=format(Nbreaks), range=sizerange)', sep=' + ') } else { k <- paste(k, 'scale_size_discrete(range = sizerange)', sep=' + ') X$N <- cut2(Ns, g=size) } } else k <- paste(k, 'geom_point()', sep=' + ') p <- eval(parse(text=k)) if(length(addlayer)) p <- p + addlayer if('var' %nin% condvar) stop('program logic error') if(length(condvar) == 1) p <- p + facet_grid(var ~ . , scales='free_y', space='free_y') else { p <- p + facet_grid(as.formula(sprintf('var ~ %s', othvar)), scales='free_y', space='free_y') } p <- p + xlim(xlim) + xlab('Proportion') + ylab('') if(length(col)) p <- p + scale_color_manual(values=col) if(length(shape)) p <- p + scale_shape_manual(values=shape) if(fnvar != '') attr(p, 'fnvar') <- fnvar p } latex.summaryP <- function(object, groups=NULL, exclude1=TRUE, file='', round=3, size=NULL, append=TRUE, ...) { class(object) <- 'data.frame' rte <- attr(object, 'rows.to.exclude1') if(exclude1 && length(rte)) object <- object[- rte, , drop=FALSE] if(! append) cat('', file=file) p <- ifelse(object$denom == 0, '', format(round(object$freq / object$denom, round))) object$y <- paste(p, ' {\\scriptsize$\\frac{', format(object$freq), '}{', format(object$denom), '}$}', sep='') object$freq <- object$denom <- NULL stratvar <- setdiff(names(object), c('var', 'val', 'y', groups)) svar <- if(! length(stratvar)) as.factor(rep('', nrow(object))) else { if(length(stratvar) == 1) object[[stratvar]] else do.call('interaction', list(object[stratvar], sep=' ')) } object$stratvar <- svar object <- object[, c('var', 'val', 'y', groups, 'stratvar')] nl <- 0 slev <- levels(svar) nslev <- length(slev) for(i in 1 : nslev) { if(nslev > 1) cat('\n\\vspace{1ex}\n\\begin{minipage}{\\linewidth}\n\\textbf{', slev[i], '}\n\\vspace{1ex}\n\n', sep='', file=file, append=TRUE) x <- object[svar == slev[i], colnames(object) != 'stratvar'] if(length(groups)) { ord <- function(v) { v <- as.character(v) un <- unique(v) vn <- 1 : length(un) names(vn) <- un vn[v] } varn <- ord(x$var) valn <- ord(x$val) r <- reshape(x, timevar=groups, direction='wide', idvar=c('var', 'val')) ## reorder rows to be in original order ir <- order(varn[as.character(r$var)], valn[as.character(r$val)]) r <- r[ir, ] ## reshape does not respect order of levels of factors; reorder columns lev <- levels(x[[groups]]) r <- r[c('var', 'val', paste('y', lev, sep='.'))] nl <- length(lev) w <- latex(r[colnames(r) != 'var'], table.env=FALSE, file=file, append=TRUE, rowlabel='', rowname=rep('', nrow(r)), rgroup=levels(r$var), n.rgroup=as.vector(table(r$var)), size=size, colheads=c(' ', lev), center='none') } else { w <- latex(x[colnames(x) != 'var'], table.env=FALSE, file=file, append=TRUE, rowlabel='', rowname=rep('', nrow(x)), rgroup=levels(x$var), n.rgroup=as.vector(table(x$var)), size=size, colheads=c(' ', ' '), center='none') } if(nslev > 1) cat('\\end{minipage}\n', file=file, append=TRUE) } attr(w, 'ngrouplevels') <- nl attr(w, 'nstrata') <- nslev w } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/mtitle.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000001167�12243661443�013212� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Thanks for Rick Becker for suggestions mtitle <- function(main,ll,lc, lr=format(Sys.time(),'%d%b%y'), cex.m=1.75, cex.l=.5, ...) { out <- any(par()$oma!=0) g <- if(out) function(...) mtext(..., outer=TRUE) else function(z, adj, cex, side, ...) if(missing(side)) title(z, adj=adj, cex=cex) else title(sub=z, adj=adj, cex=cex) if(!missing(main)) g(main,cex=cex.m,adj=.5) if(!missing(lc)) g(lc,side=1,adj=.5,cex=cex.l,...) if(!missing(ll)) g(ll,side=1,adj=0,cex=cex.l,...) if(lr!="") g(lr,side=1,adj=1,cex=cex.l,...) invisible() } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/sys.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000000330�12250353053�012512� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Improvements by Sebastian Weber <Sebastian.Weber@aventis.com> 26Aug03 sys <- function(command, text=NULL, output=TRUE) { cmd <- if(length(text)) paste(command, text) else command system(cmd, intern=output) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/model.frame.default.s�����������������������������������������������������������������������0000644�0001762�0000144�00000006432�12250440744�015525� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## $Id$ GetModelFrame <- function(formula, specials, default.na.action=NULL) { if(missing(formula) || !inherits(formula, "formula")) stop("GetModelFrame needs a function argument specified", "as a forumula or terms object") ## get the function call of the calling function fun.call <- match.call(sys.function(sys.parent()), call=sys.call(sys.parent()), expand.dots=FALSE) args.needed <- c("formula", "data", "weights", "subset", "na.action") m <- structure(match(args.needed, names(fun.call), nomatch=0), names=args.needed) ## get the envronment of the formula env <- environment(formula) if (is.null(env)) env <- parent.frame() ## If formula is not a terms object then ## the formula must be turned into a terms object using the ## 'terms' function if(!inherits(formula, "terms")) { ## Check for precence of args needed for terms call has.arg <- c(formula=TRUE, data=FALSE) if(m["data"]) has.arg["data"] <- TRUE junk <- lapply(fun.call, print) new.call <- fun.call[c(1,has.arg)] new.call[[1]] <- as.name('terms') names(new.call)[2] <- "x" if(!missing(specials) && !is.null(specials)) new.call$specials=specials ## convert the formula to a terms object print(new.call) formula <- eval(new.call, envir=env) # formula <- do.call("terms", args=list(x=formula, # data=if(m["data"]) fun.call[m["data"]] else NULL, # specials=specials)[has.arg], # envir=env) } new.call <- fun.call[c(1, m)] new.call[[1]] <- as.name("model.frame") new.call$formula <- formula if("na.action" %nin% names(fun.call) && !is.null(default.na.action)) new.call$na.action <- default.na.action return(eval(new.call, env, parent.frame())) } ## Replaced with one more like default R 3nov02 ## With R 1.6 was getting error with ... arguments ## if(FALSE) '[.factor' <- function (x, i, drop = TRUE) ## { ## y <- NextMethod("[") ## class(y) <- class(x) ## attr(y, "contrasts") <- attr(x, "contrasts") ## attr(y, "levels") <- attr(x, "levels") ## opt <- .Options$drop.factor.levels ## if(!length(opt)) ## opt <- .Options$drop.unused.levels ## if(drop && (!missing(drop) || (length(opt)==0 || opt))) ## reFactor(y) ## else y ## } termsDrop <- function(object, drop, data) { trm <- terms(object, data=data) if(is.numeric(drop)) { vars <- attr(trm, 'term.labels') if(any(drop > length(vars))) stop('subscript out of range') drop <- vars[drop] } form <- update(trm, as.formula(paste('~ . ', paste('-', drop, collapse='')))) terms(form, data=data) } var.inner <- function(formula) { if(!inherits(formula,"formula")) formula <- attr(formula,"formula") if(!length(formula)) stop('no formula object found') if(length(formula) > 2) formula[[2]] <- NULL # remove response variable av <- all.vars(formula, max.names=1e6) ## Thanks to Thomas Lumley <tlumley@u.washington.edu> 28Jul01 : unique(sapply(attr(terms(formula),"term.labels"), function(term,av) av[match(all.vars(parse(text=term), max.names=1e6),av)][1], av=av)) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/event.history.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000024326�12243661443�014537� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## event.history-sim-request.txt: s-plus code to make event history graphs ## (for distribution, including SIM readers) ## last edited: 09-28-01 ## start event.history function ## --> assume data is approporately pre-processed (e.g., smoothed) ## prior to function call event.history <- function(data, survtime.col, surv.col, surv.ind = c(1,0), subset.rows = NULL, covtime.cols = NULL, cov.cols = NULL, num.colors = 1, cut.cov = NULL, colors = 1, cens.density = 10, mult.end.cens = 1.05, cens.mark.right = FALSE, cens.mark = '-', cens.mark.ahead = .5, cens.mark.cutoff = -1e-8, cens.mark.cex = 1.0, x.lab = 'time under observation', y.lab = 'estimated survival probability', title = 'event history graph', ...) { ## if covtime.cols was assigned a single zero, then ## make it a one-column matrix of zeroes: if(is.null(covtime.cols)) covtime.cols <- as.matrix(rep(0, dim(data)[1])) ## do necessary subsetting if(!is.null(subset.rows)) { data <- data[subset.rows,] surv.col <- surv.col[subset.rows] survtime.col <- survtime.col[subset.rows] covtime.cols <- covtime.cols[subset.rows,] if(!is.null(cov.cols)) cov.cols <- cov.cols[subset.rows,] } ## put in stops signifying 'illegal' data if(any(is.na(surv.col))) stop('cannot have NA entries in surv.col column \n') if(any(is.na(survtime.col))) stop('cannot have NA entries in survtime.col column \n') if(min(survtime.col) < 0) stop('survtime.col observations cannot be < 0 \n') if(min(covtime.cols, na.rm = TRUE) < 0) stop('covtime.cols observations cannot be < 0 \n') ## create color-covariate cutting based on subset data, as desired if(is.null(cov.cols)) colors.cat <- matrix(1, nrow=dim(data)[1]) else { if(is.null(cut.cov)) colors.cat <- matrix(as.numeric(cut(cov.cols, breaks = num.colors)), ncol=dim(cov.cols)[2]) else colors.cat <- matrix(as.numeric(cut(cov.cols, breaks = cut.cov)), ncol=dim(cov.cols)[2]) } ## order the entire dataframe such that ## time is in descending order and, when tied, then, ## survival comes before censoring if(surv.ind[1] > surv.ind[2]) data <- data[order(unlist(survtime.col), unlist(-surv.col)),] else if(surv.ind[1] < surv.ind[2]) data <- data[order(unlist(survtime.col), unlist(surv.col)),] ## determine vector of upcoming consecutive censored objects if current is censored cens.consec.vec <- rep(NA, dim(data)[1]) cnt <- 0 for(i in dim(data)[1]:1) { if(surv.col[i] == surv.ind[1]) { cnt <- 0 cens.consec.vec[i] <- 0 next } else if(surv.col[i] == surv.ind[2]) { cnt <- cnt + 1 cens.consec.vec[i] <- cnt - 1 } } ## some pre-processing here before plotting: ## determine vector of upcoming events (possibly tied events) following ## any censored time or string of consecutive censored times; ## also, determine upcoming event times (or, by default, ## 5% beyond final censored time if no event times ## eventually follow a censored time) ## --> also, determine string size of censored obs followed by event(s) n <- dim(data)[1] cnt <- 0 seq.events <- (1:n)[surv.col == surv.ind[1]] upcoming.events <- time.ahead <- string <- split <- rep(NA, dim(data)[1]) table.temp <- table(survtime.col[surv.col == surv.ind[1]]) for(i in 1:n) { if(surv.col[i] == surv.ind[2]) { if((n - cens.consec.vec[i]) > i) { cnt <- cnt + 1 upcoming.events[i] <- table.temp[as.numeric(names(table.temp)) > survtime.col[i]][1] time.ahead[i] <- as.numeric(names(table.temp[as.numeric(names(table.temp)) > survtime.col[i]])[1]) seq.event.after <- seq.events[seq.events > i][1] if(i == 1 | (cnt == i)) { string[i] <- table.temp[as.numeric(names(table.temp)) > survtime.col[i]][1] + (seq.event.after - 1) } else { seq.event.before <- rev(seq.events[seq.events < i])[1] string[i] <- table.temp[as.numeric(names(table.temp)) > survtime.col[i]][1] + (seq.event.after - seq.event.before - 1) } split[i] <- cnt if(surv.col[i+1] == surv.ind[1]) cnt <- 0 } else if((n - cens.consec.vec[i]) <= i) { cnt <- cnt + 1 time.ahead[i] <- survtime.col[n] * mult.end.cens split[i] <- cnt seq.event.before <- rev(seq.events[seq.events < i])[1] string[i] <- n - seq.event.before } ## end censored if statement } else if(surv.col[i] == surv.ind[1]) { if(i > 1) { if(surv.col[i-1] == surv.ind[2]) { split[i] <- split[i-1] + 1 string[i] <- string[i-1] } else if((surv.col[i-1] == surv.ind[1]) & (survtime.col[i-1] == survtime.col[i]) & !is.na(split[i-1])) { split[i] <- split[i-1] + 1 string[i] <- string[i-1] } } } ## end event if statement } ## end pre-processing for loop ## set up plotting region, axis labels, title, etc. plot(x=c(0, max(survtime.col, na.rm=TRUE) * mult.end.cens), y=c(0,1), type='n', xlab=x.lab, ylab=y.lab, main=title, ...) ## definitions needed in below for loop temp.prob.c <- temp.prob.e <- NA temp.prob.old <- 1 temp.prob.e.old <- 1 cens.cnt <- 0 cumsum.e <- cumsum(surv.col) ## main function for loop to create plotting lines for each patient for(i in 1:n) { len.cov <- sum(!is.na(covtime.cols[i,])) ## number of intervals to draw for patient i if(len.cov < 1) stop('can have only non-NA covariate observations in iteration', i, '\n') if(surv.col[i] == surv.ind[1]) { ## event temp.prob.e <- temp.prob.e.old * (n - i) / (n - i + 1) if(!is.na(split[i])) { upcoming.prob.e <- (n - (i + (string[i] - split[i]))) / (n + upcoming.event.old - (i + (string[i] - split[i]))) * temp.prob.e.old temp.prob.plot <- temp.prob.e.old - ((temp.prob.e.old - upcoming.prob.e) * split[i]/string[i]) } else temp.prob.plot <- temp.prob.e ## perform plotting for uncensored obs i if(len.cov > 1) { for(j in (1:(len.cov - 1))) { color <- switch(colors.cat[i, j], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8], colors[9], colors[10], colors[11], colors[12], colors[13], colors[14], colors[15], colors[16], colors[17], colors[18], colors[19], colors[20]) polygon(x=c(covtime.cols[i,j], covtime.cols[i,j+1], covtime.cols[i,j+1], covtime.cols[i,j]), y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color) } } color <- switch(colors.cat[i, len.cov], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8], colors[9], colors[10], colors[11], colors[12], colors[13], colors[14], colors[15], colors[16], colors[17], colors[18], colors[19], colors[20]) polygon(x=c(covtime.cols[i,len.cov], survtime.col[i], survtime.col[i], covtime.cols[i,len.cov]), y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color) if(!is.na(string[i]) & (split[i] < string[i])) temp.prob.old <- temp.prob.plot else temp.prob.e.old <- temp.prob.old <- temp.prob.plot ## end event if statement for plotting } else if(surv.col[i] == surv.ind[2]) { ## censored if((n - cens.consec.vec[i]) > i) { upcoming.prob.c <- (n - (i + (string[i] - split[i]))) / (n + upcoming.events[i] - (i + (string[i] - split[i]))) * temp.prob.e.old temp.prob.plot <- temp.prob.e.old - ((temp.prob.e.old - upcoming.prob.c) * split[i]/string[i]) upcoming.event.old <- upcoming.events[i] } else if((n - cens.consec.vec[i]) <= i) { temp.prob.plot <- temp.prob.e.old - (temp.prob.e.old * split[i]/string[i]) } ## perform plotting for censored obs i if(len.cov > 1) { for(j in (1:(len.cov - 1))) { color <- switch(colors.cat[i, j], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8], colors[9], colors[10], colors[11], colors[12], colors[13], colors[14], colors[15], colors[16], colors[17], colors[18], colors[19], colors[20]) polygon(x=c(covtime.cols[i,j], covtime.cols[i,j+1], covtime.cols[i,j+1], covtime.cols[i,j]), y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color) } } color <- switch(colors.cat[i, len.cov], colors[1], colors[2], colors[3], colors[4], colors[5], colors[6], colors[7], colors[8], colors[9], colors[10], colors[11], colors[12], colors[13], colors[14], colors[15], colors[16], colors[17], colors[18], colors[19], colors[20]) polygon(x=c(covtime.cols[i,len.cov], survtime.col[i], survtime.col[i], covtime.cols[i,len.cov]), y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color) polygon(x=c(survtime.col[i], time.ahead[i], time.ahead[i], survtime.col[i]), y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), density=cens.density, border=TRUE) ## Following was if(cens.mark.right == TRUE) FEH 31jan03 if(cens.mark.right & temp.prob.plot >= cens.mark.cutoff) text(x = time.ahead[i] + cens.mark.ahead, y = temp.prob.old, labels = cens.mark, cex = cens.mark.cex) temp.prob.c <- temp.prob.old <- temp.prob.plot ## end censored if statement for plotting } ## end of function's major for loop } ## end of function itself } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/xy.group.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000001063�12243661443�013502� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Author: Frank Harrell 24 Jun 91 xy.group <- function(x,y,m=150,g,fun=mean,result="list") { k <- !is.na(x+y) if(sum(k)<2) stop("fewer than 2 non-missing x and y") x <- x[k] y <- y[k] if(missing(m)) q <- cut2(x,g=g,levels.mean=TRUE,digits=7) else q <- cut2(x,m=m,levels.mean=TRUE,digits=7) n <- table(q) x.mean <- as.single(levels(q)) y.fun <- as.vector(tapply(y, q, fun)) if(result=="matrix") { z <- cbind(table(q),x.mean,y.fun) dimnames(z) <- list(levels(q), c("n","x","y")) } else z <- list(x=x.mean,y=y.fun) z } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/transcan.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000125221�14320627511�013517� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## $Id$ transcan <- function(x, method=c("canonical","pc"), categorical=NULL, asis=NULL, nk, imputed=FALSE, n.impute, boot.method=c('approximate bayesian', 'simple'), trantab=FALSE, transformed=FALSE, impcat=c("score","multinom","rpart"), mincut=40, inverse=c('linearInterp','sample'), tolInverse=.05, pr=TRUE, pl=TRUE, allpl=FALSE, show.na=TRUE, imputed.actual=c('none','datadensity','hist','qq','ecdf'), iter.max=50, eps=.1, curtail=TRUE, imp.con=FALSE, shrink=FALSE, init.cat="mode", nres=if(boot.method=='simple')200 else 400, data, subset, na.action, treeinfo=FALSE, rhsImp=c('mean','random'), details.impcat='', ...) { ##This is a non-.Internal version of the approx function. The ##S-Plus version of approx sometimes bombs with a bus error. asing <- function(x)x call <- match.call() method <- match.arg(method) impcat <- match.arg(impcat) boot.method <- match.arg(boot.method) imputed.actual <- match.arg(imputed.actual) inverse <- match.arg(inverse) rhsImp <- match.arg(rhsImp) if(missing(n.impute)) n.impute <- 0 if(n.impute > 0) { imputed <- TRUE if(impcat == 'rpart') stop('n.impute not supported for impcat="rpart"') warning('transcan provides only an approximation to true multiple imputation.\nA better approximation is provided by the aregImpute function.\nThe MICE and other S libraries provide imputations from Bayesian posterior distributions.') } if(imputed.actual!='none') imputed <- TRUE # if(impcat=='multinom') require(nnet) # if(impcat=='rpart') require(rpart) if(missing(data)) stop('Must specify data= when using R') formula <- nact <- NULL if(inherits(x,"formula")) { formula <- x y <- match.call(expand.dots=FALSE) y$x <- y$method <- y$categorical <- y$asis <- y$nk <- y$imputed <- y$trantab <- y$impcat <- y$mincut <- y$pr <- y$pl <- y$allpl <- y$show.na <- y$iter.max <- y$eps <- y$curtail <- y$imp.con <- y$shrink <- y$init.cat <- y$n.impute <- y$... <- y$nres <- y$boot.method <- y$transformed <- y$treeinfo <- y$imputed.actual <- y$inverse <- y$tolInverse <- y$details.impcat <- y$rhsImp <- NULL y$formula <- x if(missing(na.action)) y$na.action <- na.retain y[[1]] <- as.name("model.frame") y <- eval(y, sys.parent()) nact <- attr(y,"na.action") d <- dim(y) # Error if user is trying to use a non-allowed formula if(length(attr(y, "terms")) > 2) stop('transcan does not support a left hand side variable in the formula') nam <- all.vars(attr(y, "terms")) # Error if user has passed an invalid formula if(length(nam) != d[2]) stop(paste('Formula', formula, 'does not have a dominant inner variable.')) if(!length(asis)) { Terms <- terms(formula, specials='I') asis <- nam[attr(Terms,'specials')$I] ## all.vars will cause I() wrapper to be ignored } x <- matrix(NA,nrow=d[1],ncol=d[2], dimnames=list(attr(y,"row.names"),nam)) for(i in 1:d[2]) { w <- y[[i]] if(is.character(w)) w <- factor(w) if(is.factor(w)) { x[,i] <- unclass(w) categorical <- c(categorical, nam[i]) } else { x[,i] <- w nu <- length(unique(w[!is.na(w)])) if(nu<2) stop(paste("variable",nam[i],"has only one value")) if(nu==2) asis <- c(asis, nam[i]) else if(nu==3) categorical <- c(categorical, nam[i]) } } } nam <- dimnames(x)[[2]] rnam <- dimnames(x)[[1]] if(length(rnam)==0) rnam <- as.character(1:nrow(x)) p <- ncol(x) if(!length(nam)) stop("x must have column names") n <- nrow(x) if(missing(nk)) nk <- 3*(n<30)+4*(n>=30 & n<100)+5*(n>=100) ## Compute constant to multiply canonical variates by to get a variance of 1.0 varconst <- sqrt(n-1) if(length(categorical)) { if(length(categorical)==1 && categorical=="*") categorical <- nam ## oldopts <- options(c('na.action','contrasts')) ## R does not allow multiple options to be spec. oldopts <- options() ## names(oldopts) <- c('na.action','contrasts') #windows can mess this up if(impcat == 'rpart') { options(contrasts=c("contr.treatment","contr.poly")) on.exit(options(oldopts)) } } if(length(asis)==1 && asis=="*") asis <- nam R <- parms <- coef <- fill.con <- Imputed <- Trantab <- vector("list",p) fillin <- rep(NA,p); names(fillin) <- nam scale <- rep(1,p); names(scale) <- nam; names(Trantab) <- nam nparm <- shr <- fillin if(n.impute > 0) { Resid <- vector("list",p) names(Resid) <- nam } else Resid <- NULL datad <- list(); datad.ranges <- list() ## For canonical-variate expansions (standardized), use scale of 1 xcoef <- matrix(NA, nrow=p, ncol=p+1, dimnames=list(nam,c("intercept",nam))) usefill <- 1*(is.logical(imp.con) && imp.con)+2*(is.numeric(imp.con)) if(usefill==2 && length(imp.con)!=p) stop("length of imp.con != ncol(x)") for(i in 1:p) { lab <- nam[i] y <- x[,i] na <- is.na(y) w <- y[!na] if(imputed && n.impute==0) Imputed[[i]] <- double(sum(na)) if(lab %in% asis) { fillin[i] <- if(usefill==2) imp.con[i] else median(w) scale[i] <- mean(abs(w-fillin[i])) if(is.na(fillin[i])) stop(paste("fillin value for",lab,"is NA")) coef[[i]] <- c(0,1) nparm[i] <- 1 } else { if(lab %in% categorical) { w <- table(y) z <- as.numeric(names(w)) if(usefill==2) fillin[i] <- imp.con[i] else fillin[i] <- z[w==max(w)][1] #most freq. category assign("Y", as.factor(y)) opold <- options(na.action="na.retain") w <- model.matrix(~Y) # uses contr.treatment (reference cell coding) options(na.action=opold[[1]]) #for some reason Windows needs opt name r <- attr(w,"contrasts")[[1]] attr(r,"codes") <- z parms[[i]] <- r R[[i]] <- w[,-1,drop=FALSE] #kill intercept column nparm[i] <- length(z)-1 if(usefill>0) { fill.con[[i]] <- w[y==imp.con[i],-1,drop=FALSE][1,,drop=FALSE] ##select first hit if(length(fill.con[[i]])==0) stop("imp.con has a code not in the data for a categorical var") } } else { fillin[i] <- if(usefill==2) imp.con[i] else median(y[!is.na(y)]) R[[i]] <- rcspline.eval(y, nk=nk, inclx=TRUE) parms[[i]] <- attr(R[[i]], "knots") if(usefill>0) fill.con[[i]] <- rcspline.eval(fillin[i], parms[[i]], inclx=TRUE) nparm[i] <- length(parms[[i]])-1 } } } xt <- x if(init.cat %in% c("mode","random")) for(i in (1:p)[nam %in% categorical]) xt[,i] <- if(init.cat=="mode") { if(is.na(fillin[i])) stop(paste("fillin value for",nam[i],"is NA")) xt[,i]==fillin[i] } else runif(n) p1 <- p-1 R2 <- R2.adj <- double(p) r2 <- r2.adj <- NA Details.impcat <- NULL last.iter <- FALSE if(pr) cat("Convergence criterion:") milab <- as.character(1:n.impute) predSamp <- function(res, yhat, rnam, allowed.range, n.impute, boot.method) { m <- length(yhat) yhat <- matrix(rep(yhat, n.impute), ncol=n.impute, dimnames=list(rnam, as.character(1:n.impute))) errors <- if(boot.method=='simple') sample(res, m*n.impute, replace=TRUE) else { ## From Jeff Longmate (jlongmat@coh.org): n <- length(res) i <- ceiling(runif(n*n.impute, 0, n)) j <- ceiling(runif(m*n.impute, 0, n)) + rep((0:(n.impute-1))*n, rep(m, n.impute)) res[i[j]] } structure(pmax(pmin(yhat + errors, allowed.range[2]), allowed.range[1]), names=NULL) } anyVarNA <- rep(FALSE, n) for(iter in 1:iter.max) { dmax <- 0 if(last.iter) xtl <- xt for(i in 1:p) { lab <- nam[i] catg <- lab %in% categorical xx <- xt[,-i,drop=FALSE] k.other <- sum(pmax(nparm[-i]-1,0))/(p-1)+p-1 #effective d.f. if(iter==1) { for(j in 1:p1) { if(any(z <- is.na(xx[,j]))) { l <- (nam[-i])[j] if(is.na(fillin[l])) stop(paste("variable",l,"has fillin value of NA")) xx[z,j] <- fillin[l] } } } if(method=="pc") { z <- xx for(k in 1:p1) { y <- z[,k]; z[,k] <- (y-mean(y))/sqrt(var(y)) } P <- prcomp(z)$x[,1] # 1st prin. comp. } j <- is.na(x[,i]) anyVarNA[j] <- TRUE if(lab %in% asis) { y <- x[!j, i] f <- lm.fit.qr.bare(xx[!j,,drop=FALSE], y) newy <- x[,i] names(newy) <- NULL xcof <- f$coef r2 <- f$rsquared nn <- length(y) r2.adj <- max(0,1-(1-r2)*(nn-1)/(nn-k.other-1)) if(shrink) { ybar <- mean(y) shr[i] <- h <- (nn-k.other-1)*r2/(nn-1)/r2.adj xcof <- c(ybar*(1-h)+h*xcof[1],h*xcof[-1]) } if(any(j)) newy[j] <- if(usefill>0) fillin[i] else cbind(1,xx[j,,drop=FALSE]) %*% xcof res <- f$residuals if(last.iter) { ybar <- mean(y) if(imputed & any(j)) { r <- range(newy[!j]) Imputed[[i]] <- if(n.impute==0)structure(pmax(pmin(newy[j],r[2]),r[1]), names=rnam[j]) else predSamp(res, newy[j], rnam[j], r, n.impute, boot.method) NULL } xcoef[i, c("intercept",nam[-i])] <- xcof if(trantab) { rr <- range(y); Trantab[[i]] <- list(x=rr, y=rr); NULL } if(n.impute > 0) Resid[[i]] <- if(length(res) <= nres) asing(res) else asing(sample(res, nres)) ## was f$residuals 3 times } } else { f <- cancor(xx[!j,,drop=FALSE], R[[i]][!j,,drop=FALSE]) r2 <- f$cor[1]^2 xcof <- c(intercept=-sum(f$xcoef[,1] * f$xcenter), f$xcoef[,1])*varconst cof <- if(method=="canonical") c(intercept=-sum(f$ycoef[,1] * f$ycenter), f$ycoef[,1])*varconst else { g <- lm.fit.qr.bare(R[[i]][!j,,drop=FALSE], P[!j]) g$coef } newy <- drop(cbind(1,R[[i]]) %*% cof) if((n.impute > 0 && last.iter) || rhsImp=='random') res <- if(method=='canonical') newy[!j] - cbind(1,xx[!j,,drop=FALSE]) %*% xcof else g$residuals if(n.impute > 0 && last.iter) { Resid[[i]] <- if(length(res) <= nres) asing(res) else asing(sample(res, nres)) } nn <- n - sum(j) k <- nparm[i]-1+k.other r2.adj <- max(0,1-(1-r2)*(nn-1)/(nn-k-1)) if(shrink) { shr[i] <- h <- (nn-k-1)*r2/(nn-1)/r2.adj xcof <- h*xcof #mean of can var=0 } if(any(j)) newy[j] <- if(usefill>0) drop(cbind(1,fill.con[[i]]) %*% cof) else drop(cbind(1,xx[j,,drop=FALSE]) %*% xcof) if(last.iter) { coef[[i]] <- cof xcoef[i,c("intercept",nam[-i])] <- xcof if(trantab || (any(j) && catg && impcat %in% c("score","multinom"))) { xa <- x[!j, i] ya <- newy[!j] tab <- table(paste(as.character(xa), as.character(ya),sep=';')) vals <- names(tab) uvals <- unPaste(vals, ';') names(tab) <- NULL Trantab[[i]] <- list(x=as.numeric(uvals[[1]]), y=as.numeric(uvals[[2]]), frequency=unname(tab)) NULL } if(imputed & any(j)) { if(catg) { if(usefill>0) pred <- rep(fillin[i], sum(j)) else { if(impcat == 'rpart') { y <- as.factor(x[,i]) zdf <- list(xx=xx, y=y) f <- rpart(y ~ xx, control=rpart.control(minsplit=mincut), data=zdf) ## won't work because rpart will not allow matrix x pred <- (t(apply(-predict(f,zdf)[j,,drop=FALSE],1,order)))[,1] if(treeinfo) { cat('\nProbabilities of Category Membership and Category Selected for',lab,'\n\n') print(cbind(round(predict(f,zdf)[j,,drop=FALSE],3), Mode=pred)) } ## Gets level number of most probable category } else if(impcat=='score') { ##Get category code with score closest to pred. score ti <- Trantab[[i]] if(n.impute==0) { ##ww <- apply(outer(newy[j], ti$y, ##function(x,y)abs(x-y)),1,order)[1,] ww <- order(ti$y)[round(approx(sort(ti$y), 1:length(ti$y), xout=newy[j], rule=2)$y)] ## Thanks from Alan Zaslavsky <zaslavsk@hcp.med.harvard.edu>: ## "The idea is to interpolate (after arranging in order) and then round the ## index, since the fractional part of the index represents the relative ## distance from the two adjacent values." ##pred <- round(approx(ti$y, ti$x, xout=newy[j], rule=2)$y) pred <- ti$x[ww] } else { sval <- predSamp(0*res, newy[j], rnam[j], c(-Inf,Inf), n.impute, boot.method) ww <- order(ti$y)[round(approx(sort(ti$y), 1:length(ti$y), xout=sval, rule=2)$y)] pred <- matrix(ti$x[ww], ncol=n.impute, dimnames=list(rnam[j],milab)) names(pred) <- NULL if(lab==details.impcat) Details.impcat <- list(pred.trans.na=sval,imputed=pred, pred.trans.nona=cbind(1,xx[!j,]) %*% xcof, obs=x[!j,i],trantab=ti) } } else { ## Multinomial logit zdf <- list(y=as.factor(x[!j,i]), xx=xx[!j,,drop=FALSE]) f <- multinom(y ~ xx, data=zdf, trace=FALSE, maxit=200) ncat <- length(levels(zdf$y)) ## bug in predict.multinom when predictor is a matrix cf <- coef(f) zdf <- cbind(1,xx[j,,drop=FALSE]) %*% (if(is.matrix(cf)) t(cf) else as.matrix(cf)) pred <- exp(cbind(0,zdf))/ (1 + apply(exp(zdf),1,sum)) dimnames(pred)[[2]] <- as.character(1:ncat) pred <- if(n.impute==0) (t(apply(-pred,1,order)))[,1] else rMultinom(pred, n.impute) } } if(n.impute==0) names(pred) <- rnam[j] Imputed[[i]] <- pred NULL } else { if(n.impute == 0) { if(usefill > 0) Im <- rep(fillin[i], sum(j)) else Im <- invertTabulated(x[!j,i], newy[!j], aty=newy[j], name=nam[i], inverse=inverse, tolInverse=tolInverse) names(Im) <- rnam[j] Imputed[[i]] <- Im NULL } else { sval <- predSamp(res, newy[j], rnam[j], c(-Inf,Inf), n.impute, boot.method) sval.orig <- matrix(invertTabulated(x[!j,i], newy[!j], aty=sval, name=nam[i], inverse=inverse, tolInverse=tolInverse), ncol=n.impute, dimnames=list(rnam[j],milab)) names(sval.orig) <- NULL Imputed[[i]] <- sval.orig NULL } } } ##end imputed } ##end last.iter } ##end non-asis if(curtail && any(j)) { r <- range(newy[!j]) newy[j] <- pmax(pmin(newy[j],r[2]),r[1]) } if(iter>1) { jj <- if(rhsImp=='mean')TRUE else TRUE dmax <- max(dmax, min(max(abs(xt[jj,i]-newy[jj]),na.rm=TRUE), max(abs(-xt[jj,i]-newy[jj]),na.rm=TRUE))/scale[i]) ##Allows for arbitrary flips (negation) of current transformation } if(rhsImp=='random') newy[j] <- newy[j] + sample(res, sum(j), replace=TRUE) if(last.iter) xtl[,i] <- newy else xt[,i] <- newy ##don't update working transformations ##during last iteration since recomputing x-coefficients ##on the basis of current transformations, which may flip rapidly if((pl & last.iter) | allpl) { xti <- if(last.iter) xtl[,i] else xt[,i] plot(x[,i], xti, xlab=lab,ylab=paste("Transformed",lab)) title(sub=paste("R2=",format(round(r2,2)),sep=""),cex=.4,adj=0) if(any(j)) title(sub=paste(sum(j),"missing"),cex=.4,adj=1) if(show.na && any(j)) { scat1d(xti[j], 4, ...) if(imputed && last.iter) scat1d(as.numeric(Imputed[[i]]), 3, ...) } } if(last.iter && imputed.actual!='none' && any(j)) { v1n <- nam[i]; v2n <- paste('Imputed',v1n) datad[[v1n]] <- x[!j,i] datad[[v2n]] <- Imputed[[i]] datad.ranges[[v1n]] <- datad.ranges[[v2n]] <- range(c(x[!j,i], Imputed[[i]]), na.rm=TRUE) } R2[i] <- r2; R2.adj[i] <- r2.adj } #end i if(pr && iter>1) cat(format(round(dmax,3)),"") if(pr && (iter %% 10 == 0)) cat("\n") niter <- iter if(last.iter) break last.iter <- (iter==(iter.max-1)) || (iter>1 && dmax<eps) || (rhsImp=='random' && iter==5) } #end iter if(pr) cat("\n") if(iter.max>3 & niter==iter.max & dmax>=eps) stop(paste("no convergence in",iter.max,"iterations")) ## Use xtl instead of xt, otherwise transformed variables will not ## match ones from predict() or Function() since coefficients have ## been updated if(pr && rhsImp=='mean') cat("Convergence in",niter,"iterations\n") if(imputed.actual=='datadensity') { lab <- names(datad) datadensity.data.frame(datad, ranges=datad.ranges, labels=ifelse((1:length(lab)) %% 2, lab,'Imputed')) } else if(imputed.actual !='none') { namdd <- names(datad) for(i in seq(1,length(datad),by=2)) { if(imputed.actual=='hist') histbackback(datad[i:(i+1)]) else { v1 <- datad[[i]]; v2 <- datad[[i+1]] n1 <- namdd[i]; n2 <- namdd[i+1] if(imputed.actual=='ecdf' && is.numeric(datad[[i]])) Ecdf(c(v1,v2), xlab=n1, group=c(rep('actual',length(v1)), rep('imputed',length(v2)))) else { qqplot(v1, v2, xlab=n1, ylab=n2) abline(a=0, b=1, lty=2) } } } } names(R2) <- nam if(pr) { cat("R-squared achieved in predicting each variable:\n\n") print(round(R2, 3)) } names(R2.adj) <- nam if(pr) { cat("\nAdjusted R-squared:\n\n") print(round(R2.adj, 3)) } if(shrink) { names(shr) <- nam if(pr) { cat("\nShrinkage factors:\n\n") print(round(shr,3)) } } else shr <- NULL names(parms) <- names(coef) <- nam r <- apply(xtl, 2, range) dimnames(r) <- list(c("low","high"), nam) if(imputed) { names(Imputed) <- nam } else Imputed <- NULL structure(list(call=call, formula=formula, niter=niter, imp.con=usefill>0, n.impute=n.impute, residuals=Resid, rsq=R2, rsq.adj=R2.adj, shrinkage=shr, inverse=inverse, tolInverse=tolInverse, categorical=categorical, asis=asis, parms=parms, coef=coef, xcoef=xcoef, fillin=fillin, scale=scale, ranges=r, transformed=if(transformed)xtl, trantab=if(trantab)Trantab, imputed=Imputed, na.action=nact, rhsImp=rhsImp, details.impcat=Details.impcat), class='transcan') } summary.transcan <- function(object, long=FALSE, digits=6, ...) { ## Check for old style object if(!is.list(object)) object <- attributes(object) dput(object$call); cat("\n") if(length(nact <- object$na.action)) naprint(nact) cat("Iterations:",object$niter,"\n\n") cat("R-squared achieved in predicting each variable:\n\n") print(round(object$rsq,3)) cat("\nAdjusted R-squared:\n\n") print(round(object$rsq.adj,3)) if(length(shr <- object$shrink)) { cat("\nShrinkage factors:\n\n") print(round(shr,3)) } cat("\nCoefficients of canonical variates for predicting each (row) variable\n\n") xcoef <- object$xcoef[,-1] g <- format(round(xcoef,2)) g[is.na(xcoef)] <- "" print(g, quote=FALSE) imp <- object$imputed if(length(imp)) { nimp <- TRUE for(nn in names(imp)) { if(length(z <- imp[[nn]])) { if(nimp & !long) cat("\nSummary of imputed values\n\n"); nimp <- FALSE if(long) { cat("\nImputed values for",nn,"\n\n"); print(z) } if(nn %in% object$categorical) { print(describe(as.vector(z), nn)) } else { print(describe(signif(as.vector(z), digits), nn)) } } } } if(object$imp.con) cat("\nImputed values set to these constants:\n\n") else cat("\nStarting estimates for imputed values:\n\n") print(signif(object$fillin, digits)) invisible() } print.transcan <- function(x, long=FALSE, ...) { ## Check for old style if(!is.list(x)) { trans <- x cal <- attr(x, 'call') } else { trans <- x$transformed cal <- x$call } dput(cal); cat("\n") if(length(trans)) { if(long) print(unclass(x)) else print.default(trans) } invisible() } impute.transcan <- function(x, var, imputation, name=as.character(substitute(var)), pos.in, data, list.out=FALSE, pr=TRUE, check=TRUE, ...) { if(!missing(imputation) && length(imputation) > 1) stop('imputation must be a single number') ## Check for old style imp <- if(is.list(x)) x$imputed else attr(x, 'imputed') if(!length(imp)) { if(missing(var) && missing(name)) stop('imputed=TRUE was not specified to transcan') warning("imputed was not specified to transcan") return(if(! missing(var)) var) } if(missing(var) && missing(name)) { nams <- names(imp) if(list.out) { outlist <- vector('list', length(nams)) names(outlist) <- nams } if(missing(data)) { if(missing(pos.in)) pos.in <- find(nams[1])[1] var1 <- get(nams[1], pos=pos.in) } else { if(any(ni <- nams %nin% names(data))) stop(paste('variable', paste(nams[ni],collapse=','), 'not in data')) var1 <- data[[nams[1]]] } namvar <- names(var1) if(! length(namvar) && ! missing(data)) namvar <- row.names(data) if(check && length(namvar)==0) warning(paste('variable', nams[1], 'does not have a names() attribute\nand data does not have row.names. Assuming row names are integers.')) nimp <- integer(length(nams)) names(nimp) <- nams for(nam in nams) { i <- imp[[nam]] if(!length(i)) { if(list.out) outlist[[nam]] <- if(missing(data)) get(nam, pos=pos.in) else data[[nam]] next } d <- dim(i) obsImputed <- if(length(d)) dimnames(i)[[1]] else names(i) ## i[,imputation] drops names if only one obs. imputed if(!missing(imputation)) { if(!length(d)) stop('imputation can only be given when transcan used n.impute') if(imputation < 1 || imputation > d[2]) stop(paste('imputation must be between 1 and',d[2])) i <- i[, imputation] } else if(length(d)) stop('imputation must be specified when transcan used n.impute') v <- if(missing(data)) get(nam, pos=pos.in) else data[[nam]] if(is.character(v)) v <- as.factor(v) ##### ## Below was names(i) instead of match(...) if(length(namvar)) { sub <- match(obsImputed, namvar, nomatch=0) i <- i[sub > 0] sub <- sub[sub > 0] } else { if(! all.is.numeric(obsImputed)) stop(paste('names attribute of ',nam, ' is not all numeric\n', 'and original observations did not have names',sep='')) sub <- as.integer(obsImputed) } if(check) if((missing(imputation) || imputation == 1) && ! all(is.na(v[sub]))) stop(paste('variable',nam, 'does not have same missing values as were present when transcan was run')) v[sub] <- if(is.factor(v)) levels(v)[as.integer(i)] else if(is.logical(v)) i == 1 else i ## Note: if v was empty before, new v would have arbitrary length ## Avoid problem by requiring all variables to be in data attr(v,'imputed') <- sub attr(v,'class') <- c('impute', attr(v,'class')) nimp[nam] <- length(i) if(list.out) outlist[[nam]] <- v } if(pr) { cat('\n\nImputed missing values with the following frequencies\n', 'and stored them in variables with their original names:\n\n') print(nimp[nimp > 0]) } if(list.out) { z <- sapply(outlist, length) if(diff(range(z)) > 0) { cat('\n\nLengths of variable vectors:\n\n') print(z) stop('inconsistant naming of observations led to differing length vectors') } return(outlist) } return(invisible(nimp)) } impval <- imp[[name]] if(name %nin% names(imp)) warning(paste('Variable',name, 'was not specified to transcan or had no NAs')) if(!length(impval)) return(var) d <- dim(impval) if(!missing(imputation)) { if(!length(d)) stop('imputation can only be given when transcan used n.impute') if(imputation < 1 || imputation > d[2]) stop(paste('imputation must be between 1 and',d[2])) impval <- impval[,imputation] } else if(length(d)) stop('imputation must be specified when transcan used n.impute') namvar <- names(var) if(!length(namvar)) { if(missing(data)) stop(paste('variable',name, 'does not have a names() attribute\nand data= was not given.\nAssuming identifiers stored by transcan are integer subscripts')) else namvar <- row.names(data) if(!length(namvar)) stop(paste('variable',name, 'does not have a names() attribute\nand data has no row.names')) } if(length(namvar)) { sub <- match(names(impval), namvar, nomatch=0) impval <- impval[sub > 0] sub <- sub[sub > 0] } else { if(!all.is.numeric(names(impval))) stop(paste('names attribute of ',name, ' is not all numeric\n', 'and original observations did not have names',sep='')) sub <- as.integer(names(impval)) } ##Now take into account fact that transcan may have been ##run on a superset of current data frame m <- length(sub) if(check) if(missing(imputation) || imputation==1) if(m!=sum(is.na(var))) warning("number of NAs in var != number of imputed values from transcan.") if(m == 0) return(var) var[sub] <- if(is.factor(var)) levels(var)[as.integer(impval)] else if(is.logical(var)) impval == 1 else impval attr(var, 'imputed') <- sub attr(var, 'class') <- c("impute", attr(var,'class')) var } "[.transcan" <- function(x, rows=1:d[1], cols=1:d[2], ..., drop=TRUE) { ## Check for old style object if(is.list(x)) { if(length(x$imputed) && sum(sapply(x$imputed,length))) { d <- dim(x$transformed) original.rownames <- dimnames(x$transformed)[[1]] subset.rownames <- original.rownames[rows] for(v in names(x$imputed)) { z <- x$imputed[[v]] if(length(z)) { use <- names(z) %in% subset.rownames x$imputed[[v]] <- z[use] } } } x$transformed <- x$transformed[rows,cols, drop=drop] return(x) } ats <- attributes(x) ats$dimnames <- ats$dim <- ats$names <- NULL attr(x, 'class') <- NULL y <- x[..., drop = drop] attributes(y) <- c(attributes(y), ats) if(!length(dim(y))) { aty <- attributes(y) aty$call <- aty$iter <- aty$rsq <- aty$parms <- aty$coef <- aty$xcoef <- aty$rsq.adj <- aty$shrink <- aty$fillin <- aty$imputed <- aty$class <- aty$ranges <- aty$imp.con <- aty$scale <- aty$categorical <- aty$asis <- aty$trantab <- NULL attributes(y) <- aty if(is.character(z <- list(...)[[1]])) attr(y,"label") <- paste("Transformed",z) ##May someday have to use label(y) <- for this ? } y } predict.transcan <- function(object, newdata=NULL, iter.max=50, eps=.01, curtail=TRUE, type=c("transformed","original"), inverse, tolInverse, check=FALSE, ...) { type <- match.arg(type) if(!is.list(object)) object <- attributes(object) parms <- object$parms coef <- object$coef xcoef <- object$xcoef fillin <- object$fillin ranges <- object$ranges scale <- object$scale imp.con<- object$imp.con rhsImp <- object$rhsImp trantab<- object$trantab categorical <- object$categorical formula <- object$formula inverse <- if(missing(inverse)) object$inverse if(!length(inverse)) inverse <- 'linearInterp' tolInverse <- if(missing(tolInverse)) object$tolInverse if(!length(tolInverse)) tolInverse <- 0.05 if(type=="original" & !length(trantab)) stop('type="trantab" and trantab=TRUE not specified to transcan') if(length(formula)) { oldop <- options(na.action="na.retain") y <- model.frame(formula, data=newdata) options(oldop) d <- dim(y) p <- d[2] newdata <- matrix(NA, nrow=d[1], ncol=p, dimnames=list(attr(y,"row.names"), names(y))) for(i in 1:p) { w <- y[[i]] if(is.character(w)) { warning("character predictor present. Depending on levels being same as in original fit,\nthat all levels are present in the data, and that levels were in alphabetical order") w <- factor(w) } newdata[,i] <- unclass(w) } } else { if(!length(newdata)) stop("newdata must be given (unless formula was given to transcan)") p <- ncol(newdata) } if(!is.matrix(newdata)) { if(!length(names(newdata))) names(newdata) <- dimnames(object)[[2]] newdata <- t(as.matrix(newdata)) } if(imp.con || !any(is.na(newdata))) iter.max <- 1 ##only 1 iteration needed if no NAs (imp.con) xt <- newdata nam <- dimnames(ranges)[[2]] if(ncol(ranges)!=p) stop("wrong number of columns in newdata") if(!length(dimnames(xt)[[2]])) dimnames(xt) <- list(dimnames(xt)[[1]],nam) else if(check && any(dimnames(newdata)[[2]]!=nam)) warning("column names in newdata do not match column names in object") if(length(dimnames(xt)[[1]])==0) dimnames(xt) <- list(as.character(1:nrow(xt)), dimnames(xt)[[2]]) for(iter in 1:iter.max) { dmax <- 0 for(i in 1:p) { lab <- nam[i] j <- is.na(newdata[,i]) prm <- parms[[lab]] if(length(prm)==0) { newy <- newdata[,i] if(any(j))newy[j] <- if(iter==1) fillin[i] else drop(cbind(1,xt[j,-i,drop=FALSE]) %*% xcoef[i,-i-1]) } else { if(is.matrix(prm)) { lev <- attr(prm, "codes") consec.lev <- match(newdata[,i], lev) #may give NAs - OK for next line R <- prm[consec.lev,, drop=FALSE] if(iter==1 && any(match(newdata[!j,i], lev, 0)==0)) stop("codes for categorical variable not in original list") } else R <- rcspline.eval(newdata[,i], prm, inclx=TRUE) newy <- drop(cbind(1,R) %*% coef[[i]]) if(any(j)) newy[j] <- if(iter==1) 0 else drop(cbind(1, xt[j,-i,drop=FALSE]) %*%xcoef[i, -i-1]) } if(curtail) newy <- pmax(pmin(newy,ranges[2,i]),ranges[1,i]) if(iter>1) dmax <- max(dmax, min(max(abs(xt[,i]-newy),na.rm=TRUE), max(abs(-xt[,i]-newy),na.rm=TRUE))/ scale[i]) xt[,i] <- newy } #end i niter <- iter if(niter>1 && dmax<eps) break if(rhsImp=='random' && niter>4) break } #end iter if(rhsImp=='mean') { if(iter.max>3 & niter==iter.max) stop(paste("no convergence in",iter.max,"iterations")) cat("Convergence in",niter,"iterations\n") } if(type=="transformed") return(xt) for(i in 1:p) { ft <- trantab[[i]] j <- is.na(newdata[,i]) if(any(j)) { newdata[j,i] <- if(imp.con) fillin[i] else { ww <- invertTabulated(ft, aty=xt[j,i], name=nam[i], inverse=inverse, tolInverse=tolInverse) if(nam[i] %in% categorical) ww <- round(ww) ww } } } newdata } Function <- function(object, ...) UseMethod("Function") Function.transcan <- function(object, prefix=".", suffix="", pos=-1, ...) { at <- if(is.list(object)) object else attributes(object) Nam <- names(at$coef) p <- length(Nam) categorical <- at$categorical asis <- at$asis coef <- at$coef parms <- at$parms fnames <- character(p) for(i in 1:p) { nam <- Nam[i] cof <- coef[[nam]] if(nam %in% asis) f <- function(x) x else if(nam %in% categorical) { codes <- attr(parms[[nam]], "codes") g <- "{x <- unclass(x);" cof[-1] <- cof[-1] + cof[1] #convert from ref cell to cell means model for(j in 1:length(codes)) { if(j>1 && cof[j]>0) g <- paste(g,"+") g <- paste(g, format(cof[j]), "*(x==",format(codes[j]),")",sep="") } g <- paste(g, "}", sep="") f <- function(x) NULL f[[2]] <- parse(text=g)[[1]] } else f <- attr(rcspline.restate(parms[[nam]], cof), "function") fun.name <- paste(prefix,nam,suffix,sep="") cat("Function for transforming",nam,"stored as",fun.name,"\n") assign(fun.name, f, pos=pos) fnames[i] <- fun.name } invisible(fnames) } na.retain <- function(mf) mf plot.transcan <- function(x, ...) { ## check for old style object if(!is.list(x)) x <- attributes(x) trantab <- x$trantab imputed <- x$imputed n.impute <- x$n.impute if(length(trantab)==0) stop('you did not specify trantab=TRUE to transcan()') p <- length(trantab) nam <- names(trantab) for(w in nam) { z <- trantab[[w]] plot(z, xlab=w, ylab=paste('Transformed',w)) title(sub=paste('R2=',format(round(x$rsq[w],2)),sep=''),cex=.4,adj=0) if(length(imputed)) { m <- imputed[[w]] if(L <- length(m)) { title(sub=paste(L / n.impute, 'missing'),cex=.4,adj=1) m.trans <- approx(z, xout=m, rule=2)$y scat1d(m, 3, ...) scat1d(m.trans, 4, ...) } } } } ggplot.transcan <- function(data, mapping, scale=FALSE, ..., environment) { x <- data trantab <- x$trantab imputed <- x$imputed n.impute <- max(1, x$n.impute) rsq <- x$rsq if(length(trantab) == 0) stop('you did not specify trantab=TRUE to transcan()') p <- length(trantab) nam <- names(trantab) data <- adata <- NULL for(w in nam) { z <- trantab[[w]] x <- z[[1]] y <- z[[2]] if(scale) { r <- range(y) y <- (y - r[1]) / (r[2] - r[1]) z <- list(x=x, y=y) } data <- rbind(data, data.frame(type='transform', X=w, x=x, y=y)) loc <- largest.empty(x, y, xlim=range(x), ylim=range(y)) lab <- paste('R^2==', round(rsq[w], 2), sep='') if(length(imputed)) { m <- as.vector(imputed[[w]]) if(L <- length(m)) { lab <- paste('paste(', lab, '," ', L / n.impute, ' missing")', sep='') m.trans <- approx(z, xout=m, rule=2)$y data <- rbind(data, data.frame(type='imputed', X=w, x=m, y=m.trans)) } adata <- rbind(adata, data.frame(X=w, x=loc$x, y=loc$y, lab=lab, type='transform')) } } # scale*manual calls are from David Norris # The implicit (alphabetical) order of 'imputed' and 'transform' # reverses the intended symbol & color assignments. But naming # the vectors corrects this. ggplot(data, aes(x=x, y=y, color=type, shape=type, size=type)) + geom_point() + facet_wrap(~ X, scales=if(scale) 'free_x' else 'free', ...) + xlab(NULL) + ylab('Transformed') + scale_color_manual(values = c(transform="#00000059", imputed="#FF000059")) + scale_shape_manual(values = c(transform=1, imputed=3)) + scale_size_manual(values = c(transform=1.3, imputed=2.25)) + theme(legend.position='none') + geom_text(data=adata, aes(label=lab), parse=TRUE, size=1.65, col='black') } ##The following needed if Design is not in effect, to make anova work vcov.default <- function(object, regcoef.only=FALSE, ...) { vc <- object$Varcov if(length(vc)) { if(regcoef.only) return(object$var) else return(vc(object, which='var')) } cov <- object$var if(!length(cov)) stop("object does not have variance-covariance matrix") if(regcoef.only) { p <- length(object$coef) cov <- cov[1:p, 1:p, drop=FALSE] } cov } if(FALSE) Varcov.lm <- function(object, ...) { cof <- object$coefficients Qr <- object$qr cov <- chol2inv(Qr$qr) cov <- sum(object$residuals^2)*cov/object$df.residual nm <- names(cof) dimnames(cov) <- list(nm, nm) cov } if(FALSE) Varcov.glm <- function(object, ...) { if(length(object$var)) return(object$var) ## for glmD s <- summary.glm(object) s$cov.unscaled * s$dispersion } #Varcov.multinom <- function(object, ...) vcov(object) invertTabulated <- function(x, y, freq=rep(1,length(x)), aty, name='value', inverse=c('linearInterp','sample'), tolInverse=0.05, rule=2) { inverse <- match.arg(inverse) if(is.list(x)) { freq <- x[[3]] y <- x[[2]] x <- x[[1]] } if(inverse=='linearInterp') return(approx(y, x, xout=aty, rule=rule, ties=mean)$y) del <- diff(range(y, na.rm=TRUE)) m <- length(aty) yinv <- double(m) cant <- double(0) for(i in 1:m) { a <- aty[i] s <- abs(y-a) < (tolInverse * del) nclose <- sum(s) if(nclose < 2) { if(nclose==0) cant <- c(cant, a) xest <- approx(y, x, xout=a, rule=rule)$y ## If a outside range of y, approx(rule=2) will return min or max ## x. There may be many x's with y values near this extreme x. ## Take a random draw from them. a <- approx(x, y, xout=xest, rule=rule)$y s <- abs(y - a) < (tolInverse * del) nclose <- sum(s) if(nclose > 1) { maxdist <- max((y[s] - a)^2) wt <- if(maxdist==0) freq[s] else (1 - ((y[s] - a)^2) / maxdist) * freq[s] if(all(wt==0)) wt <- freq[s] # y[s] all the same if(any(wt==0)) wt[wt==0] <- min(wt[wt>0])/2 xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))] } } else { maxdist <- max((y[s] - a)^2) wt <- if(maxdist==0) freq[s] else (1 - ((y[s] - a)^2) / maxdist) * freq[s] if(all(wt==0)) wt <- freq[s] # y[s] all the same if(any(wt==0)) wt[wt==0] <- min(wt[wt>0])/2 xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))] ## sample(x[s],...) fails if x[s] is scalar; thanks: Bill Dunlap } yinv[i] <- xest } if(length(cant)) warning(paste('No actual ',name, ' has y value within ', format(tolInverse), '* range(y) (',format(del), ') of the following y values:', paste(format(sort(unique(cant))),collapse=' '), '.\nConsider increasing tolInverse. ', 'Used linear interpolation instead.',sep='')) yinv } ## Trick taken from MICE impute.polyreg rMultinom <- function(probs, m) { d <- dim(probs) n <- d[1] k <- d[2] lev <- dimnames(probs)[[2]] if(!length(lev)) lev <- 1:k ran <- matrix(lev[1], ncol=m, nrow=n) z <- apply(probs, 1, sum) if(any(abs(z-1) > .00001)) stop('error in multinom: probabilities do not sum to 1') U <- apply(probs, 1, cumsum) for(i in 1:m) { un <- rep(runif(n), rep(k,n)) ran[,i] <- lev[1 + apply(un > U, 2, sum)] } ran } utils::globalVariables('type') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/abs.error.pred.s����������������������������������������������������������������������������0000644�0001762�0000144�00000002667�12243661443�014550� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������abs.error.pred <- function(fit, lp=NULL, y=NULL) { if(!length(y)) y <- fit$y if(!length(lp)) lp <- fit$fitted.values if(!length(lp)) lp <- fit$linear.predictors if(!(length(y) && length(lp))) stop('must specify lp and y or specify y=T in the fit') s <- is.na(y + lp) if(any(s)) { y <- y[!s] lp <- lp[!s] } my <- median(y) mlp <- median(lp) meanr <- mean( abs( lp - mlp)) meant <- mean( abs( y - my )) meane <- mean( abs( lp - y )) medr <- median(abs( lp - mlp)) medt <- median(abs( y - my )) mede <- median(abs( lp - y )) differences <- cbind(c(meanr,meane,meant), c(medr ,mede ,medt ) ) dimnames(differences) <- list(c('|Yi hat - median(Y hat)|', '|Yi hat - Yi|', '|Yi - median(Y)|'), c('Mean','Median')) ratios <- cbind(c(meanr/meant, meane/meant), c( medr/ medt, mede/ medt)) dimnames(ratios) <- list(c('|Yi hat - median(Y hat)|/|Yi - median(Y)|', '|Yi hat - Yi|/|Yi - median(Y)|'), c('Mean','Median')) structure(list(differences=differences,ratios=ratios),class='abs.error.pred') } print.abs.error.pred <- function(x, ...) { cat('\nMean/Median |Differences|\n\n') print(x$differences) cat('\n\nRatios of Mean/Median |Differences|\n\n') print(x$ratios) invisible() } �������������������������������������������������������������������������Hmisc/R/curveRep.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000033775�14244137605�013522� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������curveRep <- function(x, y, id, kn=5, kxdist=5, k=5, p=5, force1=TRUE, metric=c('euclidean','manhattan'), smooth=FALSE, extrap=FALSE, pr=FALSE) { metric <- match.arg(metric) id <- as.character(id) omit <- is.na(x + y) missfreq <- NULL; nomit <- sum(omit) if(nomit) { m <- tapply(omit, id, sum) missfreq <- table(m) x <- x[!omit]; y <- y[!omit]; id <- id[!omit] } n <- length(x) ns <- table(id) nunique <- length(unique(ns)) if(nunique==1 || nunique <= kn) ncuts <- c(sort(unique(ns)),Inf) else { grouped.n <- cut2(ns, g=kn) ncuts <- cut2(ns, g=kn, onlycuts=TRUE) if(force1 && ncuts[2] > 1 && min(ns)==1) ncuts <- sort(unique(c(1:2, ncuts))) } nlev <- length(ncuts)-1 res <- vector('list', nlev) names(res) <- as.character(ncuts[-length(ncuts)]) clust <- function(x, k) if(diff(range(x))==0 || NROW(x) < k+1) rep(1, NROW(x)) else clara(x, k, metric=metric)$clustering interp <- if(extrap) function(x, y=NULL, xout) approxExtrap(x, y, xout=xout)$y else function(x, y=NULL, xout) approx(x, y, xout=xout, rule=2)$y ## Cluster by sample size first if(pr) cat('Creating',nlev,'sample size groups\n\n') for(i in 1:nlev) { ## Get list of curve ids in this sample size group if(i==nlev) { below <- ns <= ncuts[i+1] brack <- ']' } else { below <- ns < ncuts[i+1] brack <- ')' } ids <- names(ns)[ns >= ncuts[i] & below] if(pr) cat('Processing sample size [',ncuts[i],',',ncuts[i+1], brack,' containing ', length(ids),' curves\n',sep='') if(length(ids) < kxdist) res[[i]] <- list(ids) else { ## Cluster by distribution of x within sample size group ## Summarize these ids by clustering on range of x, ## plus the largest gap if minimum sample size > 2 ## Use only the x position is min sample size is 1 s <- id %in% ids ssize <- min(tapply(x[s], id[s], function(w) length(unique(w)))) z <- tapply((1:n)[s], id[s], function(j) if(ssize==1) x[j][1] else if(ssize==2) range(x[j]) else c(range(x[j]),max(diff(sort(x[j]))))) z <- matrix(unlist(z), nrow=length(z), byrow=TRUE) if(kxdist > nrow(z) - 1) stop('number of curves to cluster must be >= kxdist+1') distclusters <- clust(z, kxdist) if(pr) { cat(' Number of curves in each x-dist cluster:\n') print(table(distclusters)) } resi <- list() ## Within x distribution and within sample size interval, ## cluster on linearly interpolated y at p equally spaced x points ## unless <2 unique x-points for some curve for(clus in 1:max(distclusters)) { idc <- ids[distclusters==clus] if(pr) cat(' Processing x-distribution group', clus, 'containing', length(idc),'curves\n') s <- id %in% idc ssize <- min(tapply(x[s], id[s], function(w) length(unique(w)))) if(ssize > 1) { xrange <- range(x[s]) xseq <- seq(xrange[1], xrange[2], length.out=p) } g <- if(ssize==1) function(j) c(mean(x[j]), mean(y[j])) else if(smooth && ssize > 2) function(j) interp(clowess(x[j],y[j]), xout=xseq) else function(j) interp(x[j], y[j], xout=xseq) z <- tapply((1:n)[s], id[s], g) z <- matrix(unlist(z), nrow=length(idc), byrow=TRUE) yclusters <- clust(z, min(k, max(length(idc)-2,1))) names(yclusters) <- idc resi[[clus]] <- yclusters } res[[i]] <- resi } } structure(list(res=res, ns=table(ns), nomit=nomit, missfreq=missfreq, ncuts=ncuts, kn=kn, kxdist=kxdist, k=k, p=p, smooth=smooth, x=x, y=y, id=id), class='curveRep') } print.curveRep <- function(x, ...) { sm <- if(x$smooth) 'smooth' else 'not smoothed' ncuts <- x$ncuts cat('kn:',x$kn, ' kxdist:',x$kxdist, ' k:',x$k, ' p:',x$p, ' ', sm, '\n\n', sep='') cat('Frequencies of number of non-missing values per curve:\n') print(x$ns) if(length(x$missfreq)) { cat(x$nomit, 'missing values excluded.\n\n') cat('\nFrequency of number of missing values per curve:\n') print(x$missfreq) } cat('\nSample size cuts:', paste(ncuts, collapse=' '),'\n') cat('Number of x distribution groups per sample size group:', paste(sapply(x$res, length), collapse=' '),'\n\n') res <- x$res ng <- length(res) for(i in 1:ng) { ngroup <- res[[i]] maxclus <- max(unlist(ngroup)) w <- matrix(NA, nrow=maxclus, ncol=length(ngroup), dimnames=list(paste('Cluster',1:maxclus), paste('x-Dist', 1:length(ngroup)))) j <- 0 for(xdistgroup in ngroup) { j <- j+1 w[,j] <- tabulate(xdistgroup, nbins=maxclus) } brack <- if(i==ng) ']' else ')' z <- if(is.infinite(ncuts[i+1])) ncuts[i] else paste('[', ncuts[i], ',', ncuts[i+1], brack, sep='') cat('\nNumber of Curves for Sample Size ', z, '\n',sep='') print(w) } invisible() } plot.curveRep <- function(x, which=1:length(res), method=c('all','lattice','data'), m=NULL, probs=c(.5,.25,.75), nx=NULL, fill=TRUE, idcol=NULL, freq=NULL, plotfreq=FALSE, xlim=range(x), ylim=range(y), xlab='x', ylab='y', colorfreq=FALSE, ...) { method <- match.arg(method) retdat <- FALSE if(method == 'data') { retdat <- TRUE method <- 'lattice' } ncuts <- x$ncuts res <- x$res; id <- x$id; y <- x$y; k <- x$k; x <- x$x nng <- length(res) samp <- function(ids) if(!length(m) || is.character(m) || length(ids) <= m) ids else sample(ids, m) if(is.character(m) && (m != 'quantiles' || method != 'lattice')) stop('improper value of m') if(method=='lattice') { if(length(which) != 1) stop('must specify one n range to plot for method="lattice" or "data"') nres <- names(res) nname <- if(length(nres)==1) NULL else if(nres[which]=='1' & nres[which+1]=='2') 'n=1' else { brack <- if(which==length(nres)) ']' else ')' z <- if(is.infinite(ncuts[which+1])) ncuts[which] else paste('[',ncuts[which],',',ncuts[which+1],brack,sep='') paste('n ',z, sep='') } res <- res[[which]] n <- length(x) X <- Y <- xdist <- cluster <- sizecluster <- numeric(n) curve <- character(n) if(length(freq)) { unique.cats <- unique(freq) Freqtab <- matrix(0, nrow=n, length(unique.cats), dimnames=list(NULL, unique.cats)) } st <- 1 for(jx in 1:length(res)) { xgroup <- res[[jx]] ids <- names(xgroup) for(jclus in 1:max(xgroup)) { all.ids.in.cluster <- ids[xgroup==jclus] if(length(freq)) { freqtab <- table(freq[all.ids.in.cluster]) nfreqtab <- names(freqtab) } plotted.ids.in.cluster <- samp(all.ids.in.cluster) for(cur in plotted.ids.in.cluster) { s <- id %in% cur np <- sum(s) i <- order(x[s]) en <- st+np-1 if(en > n) stop('program logic error 1') X[st:en] <- x[s][i] Y[st:en] <- y[s][i] xdist[st:en] <- jx cluster[st:en] <- jclus curve[st:en] <- cur sizecluster[st:en] <- sum(xgroup==jclus) if(length(freq)) Freqtab[st:en, nfreqtab] <- rep(freqtab, each=np) st <- st+np } } } Y <- Y[1:en]; X <- X[1:en] distribution <- xdist[1:en]; cluster <- cluster[1:en] curve <- curve[1:en]; sizecluster <- sizecluster[1:en] if(length(freq)) Freqtab <- Freqtab[1:en,,drop=FALSE] textfun <- function(subscripts, groups=NULL) { if(!length(subscripts)) return() txt <- if(length(freq) && length(groups)) { tab <- Freqtab[subscripts[1],] if(plotfreq) { mx <- max(Freqtab, na.rm=TRUE) f <- mx/(.1*plotfreq) y <- 1 fnam <- names(tab) long <- fnam[nchar(fnam)==max(nchar(fnam))][1] lx <- convertX(unit(1, 'strwidth', long), 'npc', valueOnly=TRUE) for(i in 1:length(tab)) { y <- y - .075 grid.text(fnam[i], x=lx-.005, y=y+.025, just=c(1,.5), gp=gpar(fontsize=7, col=gray(.4))) if(tab[i] > 0) grid.polygon(x=c(lx, lx+tab[i]/f, lx+tab[i]/f, lx, lx), y=c(y, y, y+.05, y+.05, y), gp=gpar(fill=gray(.7), col=gray(.7))) if(tab[i]==mx) grid.text(mx, x=lx+mx/f + .01, y=y+.025, just=c(0,.5), gp=gpar(fontsize=7, col=gray(.4))) } return() } txt <- paste(names(tab), tab, sep=':') txt2 <- txt paste(txt, collapse=';') } else { size <- sizecluster[subscripts[1]] paste('N=',size,sep='') } if (!colorfreq | is.null(idcol) | is.null(freq)) #Do same as original { grid.text(txt, x = 0.005, y = 0.99, just = c(0, 1), gp = gpar(fontsize = 9, col = gray(0.25))) } else #color freq text using idcol { mycolors<-data.frame(idcol,freq) mycolors<-unique(mycolors) curtext<-txt2[1] curtrt<-strsplit(curtext,':')[[1]][1] curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] ) grid.text(curtext, x = 0.005, y = 0.99, just = c(0, 1), gp = gpar(fontsize = 9, col =curcol) ) emspace<-2*strwidth('M', units="figure") for (i in 2:length(txt2)) { curtext<-txt2[i] curtrt<-strsplit(curtext,':')[[1]][1] curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] ) grid.text(curtext, x = emspace+strwidth(txt2[i-1], units="figure") , y = 0.99, just = c(0, 1), gp = gpar(fontsize = 9, col = curcol) ) } } } pan <- if(length(idcol)) function(x, y, subscripts, groups, type, ...) { groups <- as.factor(groups)[subscripts] textfun(subscripts, groups) for(g in levels(groups)) { idx <- groups == g xx <- x[idx]; yy <- y[idx]; ccols <- idcol[g] if (any(idx)) { switch(type, p = lpoints(xx, yy, col = ccols), l = llines(xx, yy, col = ccols), b = { lpoints(xx, yy, col = ccols) llines(xx, yy, col = ccols) }) } } } else function(x, y, subscripts, groups, ...) { panel.superpose(x, y, subscripts, groups, ...) textfun(subscripts, groups) } if(retdat) return(data.frame(x=X, y=Y, distribution, cluster, curve=curve, ninterval=nname)) if(is.character(m)) print(xYplot(Y ~ X | distribution*cluster, method='quantiles', probs=probs, nx=nx, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, main=nname, as.table=TRUE, panel=function(x, y, subscripts, ...) { if(length(subscripts)) { panel.xYplot(x, y, subscripts, ...) textfun(subscripts) } })) else print(xyplot(Y ~ X | distribution*cluster, groups=curve, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, type=if(nres[which]=='1')'b' else 'l', main=nname, panel=pan, as.table=TRUE)) return(invisible()) } for(jn in which) { ngroup <- res[[jn]] for(jx in 1:length(ngroup)) { xgroup <- ngroup[[jx]] ids <- names(xgroup) for(jclus in 1:max(xgroup)) { rids <- ids[xgroup==jclus] nc <- length(rids) ids.in.cluster <- samp(rids) for(curve in 1:length(ids.in.cluster)) { s <- id %in% ids.in.cluster[curve] i <- order(x[s]) type <- if(length(unique(x[s]))==1)'b' else 'l' if(curve==1) { plot(x[s][i], y[s][i], xlab=xlab, ylab=ylab, type='n', xlim=xlim, ylim=ylim) brack <- if(jn==nng) ']' else ')' z <- if(is.infinite(ncuts[jn+1])) ncuts[jn] else paste('[', ncuts[jn],',',ncuts[jn+1],brack,sep='') title(paste('n ', z, ' x=',jx, ' c=',jclus,' ',nc,' curves', sep=''), cex=.5) } lines(x[s][i], y[s][i], type=type, col=if(length(idcol)) idcol[ids.in.cluster[curve]] else curve) } } if(fill && max(xgroup) < k) for(i in 1:(k - max(xgroup))) plot(0, 0, type='n', axes=FALSE, xlab='', ylab='') } } } curveSmooth <- function(x, y, id, p=NULL, pr=TRUE) { omit <- is.na(x + y) if(any(omit)) { x <- x[!omit]; y <- y[!omit]; id <- id[!omit] } uid <- unique(id) m <- length(uid) pp <- length(p) if(pp) { X <- Y <- numeric(p*m) Id <- rep(id, length.out=p*m) } st <- 1 en <- 0 ncurve <- 0 for(j in uid) { if(pr) { ncurve <- ncurve + 1 if((ncurve %% 50) == 0) cat(ncurve,'') } s <- id==j xs <- x[s] ys <- y[s] if(length(unique(xs)) < 3) { if(pp) { en <- st + length(xs) - 1 X[st:en] <- xs Y[st:en] <- ys Id[st:en] <- j } } else { if(pp) { uxs <- sort(unique(xs)) xseq <- if(length(uxs) < p) uxs else seq(min(uxs), max(uxs), length.out=p) ye <- approx(clowess(xs, ys), xout=xseq)$y n <- length(xseq) en <- st + n - 1 X[st:en] <- xseq Y[st:en] <- ye Id[st:en] <- j } else y[s] <- approx(clowess(xs, ys), xout=xs)$y } st <- en + 1 } if(pr) cat('\n') if(pp) { X <- X[1:en] Y <- Y[1:en] Id <- Id[1:en] list(x=X, y=Y, id=Id) } else list(x=x, y=y, id=id) } ���Hmisc/R/hist.data.frame.s���������������������������������������������������������������������������0000644�0001762�0000144�00000005030�12250442105�014643� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hist.data.frame <- function(x, n.unique=3, nclass="compute", na.big=FALSE, rugs=FALSE, freq=TRUE, mtitl=FALSE, ...) { oldmf <- par('mfrow') oldoma <- par('oma') oldmar <- par('mar') # resetting mfrow causes a new mar on.exit(par(mfrow=oldmf, oma=oldoma, mar=oldmar)) mf <- oldmf if(length(mf)==0) mf <- c(1,1) automf <- FALSE if((la <- length(x))>1 & max(mf)==1) { mf <- if(la<=4) c(2,2) else if(la<=6) c(2,3) else if(la<=9) c(3,3) else if(la<=12) c(3,4) else if(la<=16) c(4,4) else c(4,5) automf <- TRUE par(mfrow=mf) } if(is.character(mtitl)) par(oma=c(0,0,3,0)) nam <- names(x) i <- 0 j <- 0 for(v in x) { j <- j+1 type <- if(is.character(v) || is.factor(v)) 'cat' else if(inherits(v,'Date')) 'Date' else 'none' lab <- attr(v,"label") lab <- if(length(lab) && nchar(lab) > 35) nam[j] else label(v, units=TRUE, plot=type!='cat', default=nam[j]) if(type=='cat') { tab <- -sort(-table(v)) dotchart3(tab, xlab=paste('Frequencies for', lab)) } else { type <- if(inherits(v,'Date')) 'Date' else 'none' if(type %nin% c('none','Date')) v <- unclass(v) w <- v[!is.na(v)] n <- length(w) if(length(unique(w)) >= n.unique) { i <- i+1 if(is.numeric(nclass)) nc <- nclass else if(nclass=="compute") nc <- max(2,trunc(min(n/10,25*logb(n,10))/2)) if(nclass == 'default') { if(type == 'Date') hist(v, nc, xlab=lab, freq=freq, main='') else hist(v, xlab=lab, main='', freq=freq) } else { if(type == 'Date') hist(v, nc, xlab=lab, freq=freq, main='') else hist(v, nclass=nc, xlab=lab, freq=freq, main='') } m <- sum(is.na(v)) pm <- paste("n:",n," m:",m,sep="") title(sub=pm,adj=0,cex=.5) if(na.big && m>0) mtext(paste(m,"NAs"),line=-2,cex=1) if(rugs) scat1d(v, ...) if(automf && interactive() && names(dev.list())!='postscript' && (i %% prod(mf)==0)) { if(is.character(mtitl)) mtitle(mtitl) cat("click left mouse button to proceed\n") locator(1) } else if(is.character(mtitl) && i %% prod(mf)==1) mtitle(mtitl) } } } invisible(ceiling(i / prod(mf))) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/spower.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000027331�14112731327�013230� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������spower <- function(rcontrol, rinterv, rcens, nc, ni, test=logrank, cox=FALSE, nsim=500, alpha=.05, pr=TRUE) { crit <- qchisq(1-alpha, 1) group <- c(rep(1,nc), rep(2,ni)) nexceed <- 0 if(cox) beta <- numeric(nsim) maxfail <- 0; maxcens <- 0 for(i in 1:nsim) { if(pr && i %% 10 == 0) cat(i,'\r') yc <- rcontrol(nc) yi <- rinterv(ni) cens <- rcens(nc+ni) y <- c(yc, yi) maxfail <- max(maxfail, max(y)) maxcens <- max(maxcens, max(cens)) S <- cbind(pmin(y,cens), 1*(y <= cens)) nexceed <- nexceed + (test(S, group) > crit) if(cox) { fit <- coxph.fit(as.matrix(group), S, strata=NULL, offset=NULL, init=NULL, control=coxph.control(iter.max=10, eps=.0001), method="efron", rownames=NULL) beta[i] <- fit$coefficients } } cat('\n') if(maxfail < 0.99*maxcens) stop(paste('Censoring time distribution defined at later times than\nsurvival time distribution. There will likely be uncensored failure times\nstacked at the maximum allowed survival time.\nMaximum simulated failure time:', max(y),'\nMaximum simulated censoring time:', max(cens))) power <- nexceed/nsim if(cox) structure(list(power=power, betas=beta, nc=nc, ni=ni, alpha=alpha, nsim=nsim), class='spower') else power } print.spower <- function(x, conf.int=.95, ...) { b <- x$betas hr <- exp(b) pp <- (1+conf.int)/2 cl <- quantile(hr, c((1-conf.int)/2, pp)) meanbeta <- mean(b) medbeta <- median(b) hrmean <- exp(meanbeta) hrmed <- exp(medbeta) moehi <- cl[2]/hrmed moelo <- hrmed/cl[1] g <- function(w) round(w, 4) mmoe <- max(moehi, moelo) cat('\nTwo-Group Event Time Comparison Simulation\n\n', x$nsim,' simulations\talpha: ', x$alpha, '\tpower: ', x$power, '\t', conf.int, ' confidence interval\n', '\nHazard ratio from mean beta : ', g(hrmean), '\nHazard ratio from median beta : ', g(hrmed), '\nStandard error of log hazard ratio: ', g(sd(b)), '\nConfidence limits for hazard ratio: ', g(cl[1]), ', ', g(cl[2]), '\nFold-change margin of error high : ', g(moehi), '\t(upper CL/median HR)', '\nFold-change margin of error low : ', g(moelo), '\t(median HR/lower CL)', '\nMax fold-change margin of error : ', g(mmoe),'\n\n') cat('The fold change margin of error of', g(mmoe), 'represents the margin of error\n', 'the study is likely to achieve in estimating the intervention:control\n', 'hazard ratio. It is the ratio of a', conf.int, 'confidence limit on the\n', 'hazard ratio to the median hazard ratio obtained over the', x$nsim, 'simulations.\n', 'The confidence limit was obtained by computing the', pp, 'quantile of the\n', x$nsim, 'observed hazard ratios. The standard error is the standard deviation\n', 'of the', x$nsim, 'simulated log hazard ratios.\n\n') res <- c(cl, hrmean, hrmed, sd(b), moelo, moehi, x$power) names(res) <- c('CLlower','CLupper','HRmean','HRmedian','SE', 'MOElower','MOEupper','Power') invisible(res) } Quantile2 <- function(scontrol, hratio, dropin=function(times)0, dropout=function(times)0, m=7500, tmax, qtmax=.001, mplot=200, pr=TRUE, ...) { ## Solve for tmax such that scontrol(t)=qtmax dlist <- list(...) k <- length(dlist) && !is.null(dlist) f <- if(k) function(x, scontrol, qt, ...) scontrol(x, ...) - qt else function(x, scontrol, qt) scontrol(x) - qt if(missing(tmax)) { if(k) tmax <- uniroot(f, c(0,1e9), scontrol=scontrol, qt=qtmax, ...)$root else tmax <- uniroot(f, c(0,1e9), scontrol=scontrol, qt=qtmax)$root } if(pr) cat('\nInterval of time for evaluating functions:[0,', format(tmax),']\n\n') ## Generate sequence of times to use in all approximations and sequence ## to use for plot method times <- seq(0, tmax, length.out=m) tim <- seq(0, tmax, length.out=mplot) tinc <- times[2] ## Approximate hazard function for control group sc <- scontrol(times, ...) hc <- diff(-logb(sc)) hc <- c(hc, hc[m-1])/tinc ## to make length=m ## hazard function for intervention group hr <- rep(hratio(times), length.out=m) hi <- hc*hr ## hazard for control group with dropin di <- rep(dropin(times), length.out=m) hc2 <- (1-di)*hc + di*hi ## hazard for intervention group with dropout do <- rep(dropout(times), length.out=m) hi2 <- (1-do)*hi + do*hc ## survival for intervention group si <- exp(-tinc*cumsum(hi)) ## Compute contaminated survival function for control and intervention sc2 <- if(any(di>0))exp(-tinc*cumsum(hc2)) else sc si2 <- exp(-tinc*cumsum(hi2)) ## Store all functions evaluated at shorter times vector (tim), for ## plotting asing <- function(x) x sc.p <- asing(approx(times, sc, xout=tim)$y) hc.p <- asing(approx(times, hc, xout=tim)$y) sc2.p <- asing(approx(times, sc2, xout=tim)$y) hc2.p <- asing(approx(times, hc2, xout=tim)$y) si.p <- asing(approx(times, si, xout=tim)$y) hi.p <- asing(approx(times, hi, xout=tim)$y) si2.p <- asing(approx(times, si2, xout=tim)$y) hi2.p <- asing(approx(times, hi2, xout=tim)$y) dropin.p <- asing(approx(times, di, xout=tim)$y) dropout.p <- asing(approx(times, do, xout=tim)$y) hratio.p <- asing(approx(times, hr, xout=tim)$y) hratio2.p <- hi2.p/hc2.p tim <- asing(tim) plot.info <- list("C Survival" =list(Time=tim,Survival=sc.p), "I Survival" =list(Time=tim,Survival=si.p), "C Survival w/Dropin" =list(Time=tim,Survival=sc2.p), "I Survival w/Dropout" =list(Time=tim,Survival=si2.p), "C Hazard" =list(Time=tim,Hazard=hc.p), "I Hazard" =list(Time=tim,Hazard=hi.p), "C Hazard w/Dropin" =list(Time=tim,Hazard=hc2.p), "I Hazard w/Dropout" =list(Time=tim,Hazard=hi2.p), "Dropin" =list(Time=tim,Probability=dropin.p), "Dropout" =list(Time=tim,Probability=dropout.p), "Hazard Ratio" =list(Time=tim,Ratio=hratio.p), "Hazard Ratio w/Dropin+Dropout"=list(Time=tim,Ratio=hratio2.p)) ## Create S-Plus functions for computing random failure times for ## control and intervention subject to dropin, dropout, and hratio r <- function(n, what=c('control','intervention'), times, csurvival, isurvival) { what <- match.arg(what) approx(if(what=='control')csurvival else isurvival, times, xout=runif(n), rule=2)$y } asing <- function(x) x formals(r) <- list(n=integer(0), what=c('control','intervention'), times=asing(times), csurvival=asing(sc2), isurvival=asing(si2)) structure(r, plot.info=plot.info, dropin=any(di>0), dropout=any(do>0), class='Quantile2') } print.Quantile2 <- function(x, ...) { attributes(x) <- NULL print(x) invisible() } plot.Quantile2 <- function(x, what=c('survival','hazard','both','drop','hratio', 'all'), dropsep=FALSE, lty=1:4, col=1, xlim, ylim=NULL, label.curves=NULL, ...) { what <- match.arg(what) pi <- attr(x, 'plot.info') if(missing(xlim)) xlim <- c(0,max(pi[[1]][[1]])) dropin <- attr(x, 'dropin') dropout <- attr(x, 'dropout') i <- c(1,2, if(dropin)3, if(dropout)4) if(what %in% c('survival','both','all')) { if(dropsep && (dropin|dropout)) { labcurve(pi[1:2], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) labcurve(pi[i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } else labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } if(what %in% c('hazard','both','all')) { if(dropsep && (dropin|dropout)) { labcurve(pi[5:6], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) labcurve(pi[4+i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } else labcurve(pi[4+i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } if(what=='drop' || (what=='all' && (dropin | dropout))) { i <- c(if(dropin)9, if(dropout)10) if(length(i)==0) i <- 10 labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } if(what %in% c('hratio','all')) { i <- c(11, if(dropin|dropout) 12) labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim, opts=label.curves) } invisible() } logrank <- function(S, group) { group <- as.factor(group) i <- is.na(S) | is.na(group) if(any(i)) { i <- !i S <- S[i,,drop=FALSE] group <- group[i] } group <- as.integer(group) y <- S[,1] event <- S[,2] i <- order(-y) y <- y[i] event <- event[i] group <- group[i] x <- cbind(group==1, group==2, (group==1)*event, (group==2)*event) if(TRUE) { s <- rowsum(x, y, FALSE) nr1 <- cumsum(s[,1]) nr2 <- cumsum(s[,2]) d1 <- s[,3] d2 <- s[,4] rd <- d1+d2 rs <- nr1+nr2-rd n <- nr1+nr2 oecum <- d1 - rd*nr1/n vcum <- rd * rs * nr1 * nr2 / n / n / (n-1) chisq <- sum(oecum)^2 / sum(vcum,na.rm=TRUE) hr <- sum(d1*(nr1-d1)/n)/sum(d2*(nr2-d2)/n) } else { # non-working code; trying to get stratification to work OE <- v <- hrn <- hrd <- 0 for(strat in unique(strata)) { j <- strata==strat s <- rowsum(x[j,], y[j], FALSE) nr1 <- cumsum(s[,1]) nr2 <- cumsum(s[,2]) d1 <- s[,3] d2 <- s[,4] rd <- d1+d2 rs <- nr1+nr2-rd n <- nr1+nr2 oecum <- d1 - rd*nr1/n vcum <- rd * rs * nr1 * nr2 / n / n / (n-1) OE <- OE + sum(oecum) v <- v + sum(vcum, na.rm=TRUE) hrn <- hrn + sum(d1*(nr1-d1)/n) hrd <- hrd + sum(d2*(nr2-d2)/n) } chisq <- OE^2 / v hr <- hrn/hrd } structure(chisq, hr=hr) } Weibull2 <- function(times, surv) { z1 <- -logb(surv[1]) z2 <- -logb(surv[2]) t1 <- times[1] t2 <- times[2] gamma <- logb(z2/z1)/logb(t2/t1) alpha <- z1/(t1^gamma) g <- function(times, alpha, gamma) { exp(-alpha*(times^gamma)) } formals(g) <- list(times=NULL, alpha=alpha, gamma=gamma) g } ## Function to fit a Gompertz survival distribution to two points ## The function is S(t) = exp[-(1/b)exp(a+bt)] ## Returns a list with components a and b, and a function for ## generating S(t) for a vector of times Gompertz2 <- function(times, surv) { z1 <- logb(-logb(surv[1])) z2 <- logb(-logb(surv[2])) t1 <- times[1] t2 <- times[2] b <- (z2-z1)/(t2-t1) a <- z1 + logb(b)-b*t1 g <- function(times, a, b) { exp(-exp(a+b*times)/b) } formals(g) <- list(times=NULL, a=a, b=b) g } Lognorm2 <- function(times, surv) { z1 <- qnorm(1-surv[1]) z2 <- qnorm(1-surv[2]) sigma <- logb(times[2]/times[1])/(z2-z1) mu <- logb(times[1]) - sigma*z1 g <- function(times, mu, sigma) { pnorm(- (logb(times) - mu) / sigma) } formals(g) <- list(times=NULL, mu=mu, sigma=sigma) g } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/transace.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000051712�14112727067�013517� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id$ transace <- function(x, monotonic=NULL, categorical=NULL, binary=NULL, pl=TRUE) { ## require(acepack) # provides ace, avas if (!requireNamespace("acepack", quietly = TRUE)) stop("This function requires the 'acepack' package.") nam <- dimnames(x)[[2]] omit <- is.na(x %*% rep(1,ncol(x))) omitted <- (1:nrow(x))[omit] if(length(omitted)) x <- x[!omit,] p <- ncol(x) xt <- x # binary variables retain original coding if(!length(nam)) stop("x must have column names") rsq <- rep(NA, p) names(rsq) <- nam for(i in (1:p)[!(nam %in% binary)]) { lab <- nam[-i] w <- 1:(p-1) im <- w[lab %in% monotonic] ic <- w[lab %in% categorical] if(nam[i] %in% monotonic) im <- c(0, im) if(nam[i] %in% categorical) ic <- c(0, ic) m <- 10*(length(im)>0)+(length(ic)>0) if(m==11) a <- acepack::ace(x[,-i], x[,i], mon=im, cat=ic) else if (m==10) a <- acepack::ace(x[,-i], x[,i], mon=im) else if(m==1) a <- acepack::ace(x[,-i], x[,i], cat=ic) else a <- acepack::ace(x[,-i], x[,i]) xt[,i] <- a$ty rsq[i] <- a$rsq if(pl) plot(x[,i], xt[,i], xlab=nam[i], ylab='') } cat("R-squared achieved in predicting each variable:\n\n") print(rsq) attr(xt, "rsq") <- rsq attr(xt, "omitted") <- omitted invisible(xt) } areg.boot <- function(x, data, weights, subset, na.action=na.delete, B = 100, method=c('areg','avas'), nk=4, evaluation=100, valrsq=TRUE, probs=c(.25,.5,.75), tolerance=NULL) { acall <- match.call() method <- match.arg(method) ## if(method=='avas') require(acepack) if(!inherits(x,'formula')) stop('first argument must be a formula') m <- match.call(expand.dots = FALSE) Terms <- terms(x, specials=c('I','monotone')) m$formula <- x m$x <- m$B <- m$method <- m$evaluation <- m$valrsq <- m$probs <- m$nk <- m$tolerance <- NULL m$na.action <- na.action m[[1]] <- as.name("model.frame") x <- eval(m, sys.parent()) nam <- unique(var.inner(Terms)) ylab <- names(x)[1] k <- length(x) p <- k - 1 nact <- attr(x,"na.action") default <- if(nk==0)'l' else 's' xtype <- rep(default, p); ytype <- default names(xtype) <- nam linear <- attr(Terms,'specials')$I if(length(linear)) { if(any(linear==1)) ytype <- 'l' if(any(linear>1 )) xtype[linear-1] <- 'l' } mono <- attr(Terms,'specials')$monotone if(length(mono)) { if(method=='avas' && any(mono==1)) stop('y is always monotone with method="avas"') if(method=='areg') stop('monotone not implemented by areg') xtype[mono-1] <- 'm' } xbase <- 'x' weights <- model.extract(x, weights) cat.levels <- values <- vector('list',k) names(cat.levels) <- names(values) <- c(ylab,nam) for(j in 1:k) { typ <- ' ' xj <- x[[j]] if(is.character(xj)) { xj <- as.factor(xj) cat.levels[[j]] <- lev <- levels(xj) x[[j]] <- as.integer(xj) typ <- 'c' values[[j]] <- 1:length(lev) } else if(is.factor(xj)) { cat.levels[[j]] <- lev <- levels(xj) x[[j]] <- as.integer(xj) typ <- 'c' values[[j]] <- 1:length(lev) if(method=='avas' && j==1) stop('categorical y not allowed for method="avas"') } else { xj <- unclass(xj) # 5Mar01 xu <- sort(unique(xj)) nu <- length(xu) if(nu < 3) typ <- 'l' values[[j]] <- if(nu <= length(probs)) xu else quantile(xj,probs) } if(typ != ' ') { if(j==1) ytype <- typ else xtype[j-1] <- typ } } y <- x[,1] x <- x[,-1,drop=FALSE] n <- length(y) if(length(weights)) stop('weights not implemented for areg') else weights <- rep(1,n) if(method=='areg') { f <- areg(x, y, xtype=xtype, ytype=ytype, nk=nk, na.rm=FALSE, tolerance=tolerance) rsquared.app <- f$rsquared } else { if (!requireNamespace("acepack", quietly = TRUE)) stop("The 'avas' method requires the 'acepack' package.") Avas <- function(x, y, xtype, ytype, weights) { p <- ncol(x) types <- c(ytype, xtype) mono <- (0:p)[types == 'm'] lin <- (0:p)[types == 'l'] categ <- (0:p)[types == 'c'] acepack::avas(x, y, weights, cat=categ, mon=mono, lin=lin) } f <- Avas(x, y, xtype, ytype, weights) rsquared.app <- f$rsq } f.orig <- lm.fit.qr.bare(f$tx, f$ty) coef.orig <- f.orig$coefficients names(coef.orig) <- cnam <- c('Intercept',nam) lp <- f$ty - f.orig$residuals trans <- cbind(f$ty,f$tx) Xo <- cbind(y, x) xlim <- apply(Xo, 2, range) xlim[,1] <- range(trans[,1]) nam <- c(ylab, nam) fit <- vector('list',k) names(fit) <- nam neval <- rep(evaluation, k) for(i in 1:k) { iscat <- if(i==1) ytype=='c' else xtype[i-1]=='c' if(iscat) neval[i] <- xlim[2,i] ## Note: approx will return NAs even when rule=3 if x coordinate ## contains duplicates, so sort by x and remove dups (fctn in Misc.s) fit[[i]] <- if(i==1) approxExtrap(trans[,1],y, xout=seq(xlim[1,i],xlim[2,i],length=neval[i])) else approxExtrap(Xo[,i], trans[,i], xout=seq(xlim[1,i],xlim[2,i],length=neval[i])) } if(max(neval) > evaluation) stop('evaluation must be >= # levels of categorical predictors') boot <- array(NA, c(evaluation,B,k), list(NULL,NULL,nam)) coefs <- matrix(NA, nrow=B, ncol=k, dimnames=list(NULL,cnam)) optimism <- 0 nfail <- 0 for(b in 1:B) { cat(b,'\r') s <- sample(n, n, replace = TRUE) g <- if(method=='areg') areg(x[s,,drop=FALSE], y[s], xtype=xtype, ytype=ytype, nk=nk, na.rm=FALSE, tolerance=tolerance) else Avas(x[s,,drop=FALSE], y[s], xtype=xtype, ytype=ytype, weights=weights[s]) if(!all(is.finite(g$tx))) { nfail <- nfail + 1 next } f.ols <- lm.fit.qr.bare(g$tx, g$ty) cof <- f.ols$coefficients coefs[b,] <- cof X <- Xo[s,] trans <- cbind(g$ty, g$tx) for(i in 1:k) boot[1:neval[i],b,i] <- if(i==1) approxExtrap(trans[,1],X[,1], xout=seq(xlim[1,i],xlim[2,i],length=neval[i]))$y else approxExtrap(X[,i], trans[,i], xout=seq(xlim[1,i],xlim[2,i], length=neval[i]))$y if(valrsq) { rsq.boot <- f.ols$rsquared yxt.orig <- matrix(NA,nrow=n,ncol=k) for(i in 1:k) yxt.orig[,i] <- approxExtrap(X[,i],trans[,i],xout=Xo[,i])$y yt.hat <- cbind(1,yxt.orig[,-1]) %*% cof yt <- yxt.orig[,1] resid <- yt - yt.hat yt <- yt[!is.na(resid)] resid <- resid[!is.na(resid)] m <- length(resid) sst <- sum((yt - mean(yt))^2) sse <- sum(resid^2) rsquare <- 1 - sse/sst optimism <- optimism + rsq.boot - rsquare } } cat('\n') if(nfail > 0) warning(paste(method,'failed to converge in', nfail,'resamples')) rsq.val <- if(valrsq) rsquared.app - optimism/(B-nfail) structure(list(call=acall, method=method, coefficients=coef.orig, linear.predictors=lp, fitted.values=approxExtrap(fit[[1]],xout=lp)$y, residuals=f.orig$residuals, na.action=nact, fit=fit, n=n, nk=nk, xtype=xtype, ytype=ytype, xdf=f$xdf, ydf=f$ydf, cat.levels=cat.levels, values=values, rsquared.app=rsquared.app,rsquared.val=rsq.val, boot=boot, coef.boot=coefs, nfail=nfail), class='areg.boot') } print.areg.boot <- function(x, ...) { cat("\n") cat(x$method,"Additive Regression Model\n\n") dput(x$call) cat("\n") xinfo <- data.frame(type=x$xtype, row.names=names(x$xtype)) if(length(x$xdf)) xinfo$d.f. <- x$xdf cat('\nPredictor Types\n\n') print(xinfo) cat('\ny type:', x$ytype) if(length(x$ydf)) cat('\td.f.:', x$ydf) cat('\n\n') if(length(x$nfail) && x$nfail > 0) cat('\n',x$method,' failed to converge in ', x$nfail,' resamples\n\n',sep='') if(length(z <- x$na.action)) naprint(z) cat('n=',x$n,' p=',length(x$fit)-1, '\n\nApparent R2 on transformed Y scale:',round(x$rsquared.app,3)) if(length(x$rsquared.val)) cat('\nBootstrap validated R2 :',round(x$rsquared.val,3)) cat('\n\nCoefficients of standardized transformations:\n\n') print(x$coefficients) res <- x$residuals rq <- c(quantile(res), mean(res), sqrt(var(res))) names(rq) <- c("Min", "1Q", "Median", "3Q", "Max", "Mean", "S.D.") cat("\n\nResiduals on transformed scale:\n\n") print(rq) cat('\n') invisible() } summary.areg.boot <- function(object, conf.int=.95, values, adj.to, statistic='median',q=NULL, ...) { scall <- match.call() fit <- object$fit Boot <- object$boot Values <- object$values if(!missing(values)) Values[names(values)] <- values nfail <- object$nfail if(!length(nfail)) nfail <- 0 res <- object$residuals Adj.to <- sapply(Values, function(y)median(1*y)) names(Adj.to) <- names(Values) # median adds .50% in R if(!missing(adj.to)) Adj.to[names(adj.to)] <- adj.to zcrit <- qnorm((1+conf.int)/2) k <- length(fit) p <- k - 1 B <- dim(Boot)[2] nam <- names(fit) coef.orig <- object$coefficients coefs <- object$coef.boot trans.orig.y <- fit[[1]] ytransseq <- trans.orig.y[[1]] ## The next 2 loops are required because it takes an extra step to compute ## the linear predictor at all predictor adjust-to settings, not just jth ## Get predicted transformed y with all variables set to adj. values pred.ty.adj <- double(p) for(j in 2:k) { namj <- nam[j] trans.orig <- fit[[namj]] pred.ty.adj[j-1] <- coef.orig[j] * approxExtrap(trans.orig, xout=Adj.to[namj])$y } ## For each bootstrap rep compute term summarizing the contribution ## of the jth predictor, evaluated at the adj. value, to predicting ## the transformed y, using only transformations from that boot. rep. boot.adj <- matrix(NA, nrow=B, ncol=p) for(j in 2:k) { namj <- nam[j] adjj <- Adj.to[namj] bootj <- Boot[,,j] xt <- fit[[namj]]$x for(i in 1:B) { bootji <- bootj[,i] s <- !is.na(bootji) ## is.na added 3Apr01 if(!is.na(coefs[i,j])) boot.adj[i, j-1] <- coefs[i,j]*approxExtrap(xt[s], bootji[s], xout=adjj)$y } } ## Now for each predictor compute differences in the chosen ## statistical parameter for the original scale of predicted y boot.y <- Boot[,,1] R <- vector('list',p) names(R) <- nam[-1] for(j in 2:k) { namj <- nam[j] xv <- Values[[namj]] trans.orig <- fit[[namj]] pred.term <- coef.orig[j]*approxExtrap(trans.orig, xout=xv)$y pred.ty <- coef.orig[1] + sum(pred.ty.adj[-(j-1)]) + pred.term ## pred.y <- approx(trans.orig.y$y, trans.orig.y$x, xout=pred.ty,rule=3)$y pred.y <- smearingEst(pred.ty, trans.orig.y, res, statistic=statistic, q=q) lab <- attr(pred.y,'label') diff.pred <- pred.y[-1] - pred.y[1] ## For the same variable (j) repeat this over bootstrap reps sumd <- sumd2 <- rep(0, length(xv)-1) bootj <- Boot[,,j] xt <- trans.orig$x b <- 0 bmiss <- 0 for(i in 1:B) { if(is.na(coefs[i,j])) next ## From avas/ace failure bootji <- bootj[,i] s <- !is.na(bootji) pred.term <- coefs[i,j]*approxExtrap(xt[s],bootji[s], xout=xv)$y if(any(is.na(pred.term))) { bmiss <- bmiss+1 next } pred.ty <- coefs[i,1] + sum(boot.adj[i,-(j-1)]) + pred.term s <- !is.na(boot.y[,i]) pred.y <- smearingEst(pred.ty, list(x=ytransseq,y=boot.y[,i]), res, statistic=statistic, q=q) if(any(is.na(pred.y))) { bmiss <- bmiss+1 next } b <- b + 1 dp <- pred.y[-1] - pred.y[1] sumd <- sumd + dp sumd2 <- sumd2 + dp*dp } if(b < B) warning(paste('For',bmiss,'bootstrap samples a predicted value for one of the settings for',namj,'\ncould not be computed. These bootstrap samples ignored.\nConsider using less extreme predictor settings.\n')) sediff <- sqrt((sumd2 - sumd*sumd/b)/(b-1)) r <- cbind(c(0, diff.pred), c(NA, sediff), c(NA, diff.pred-zcrit*sediff), c(NA, diff.pred+zcrit*sediff), c(NA, diff.pred/sediff), c(NA, 2 * pnorm(- abs(diff.pred/sediff)))) cl <- object$cat.levels[[namj]] dimnames(r) <- list(x=if(length(cl))cl else format(xv), c('Differences','S.E',paste('Lower',conf.int), paste('Upper',conf.int),"Z","Pr(|Z|)")) R[[j-1]] <- r } if(nchar(lab) > 10) lab <- substring(lab, 1, 10) structure(list(call=scall, results=R, adj.to=Adj.to, label=lab, B=B, nfail=nfail, bmiss=bmiss), class='summary.areg.boot') } print.summary.areg.boot <- function(x, ...) { R <- x$results adj.to <- x$adj.to nam <- names(R) dput(x$call) cat('\nEstimates based on', x$B-x$nfail-x$bmiss, 'resamples\n\n') cat('\n\nValues to which predictors are set when estimating\neffects of other predictors:\n\n') print(adj.to) cat('\nEstimates of differences of effects on',x$label,'Y (from first X\nvalue), and bootstrap standard errors of these differences.\nSettings for X are shown as row headings.\n') for(j in 1:length(nam)) { cat('\n\nPredictor:',nam[j],'\n') print(R[[j]]) } invisible() } plot.areg.boot <- function(x, ylim, boot=TRUE, col.boot=2, lwd.boot=.15, conf.int=.95, ...) { fit <- x$fit Boot <- x$boot k <- length(fit) B <- dim(Boot)[2] nam <- names(fit) boot <- if(is.logical(boot)) (if(boot) B else 0) else min(boot, B) mfr <- par('mfrow') if(!length(mfr) || max(mfr) == 1) { mf <- if(k<=2)c(1,2) else if(k<=4)c(2,2) else if(k<=6)c(2,3) else if(k<=9)c(3,3) else if(k<=12)c(3,4) else if(k<=16) c(4,4) else c(4,5) oldmfrow <- par('mfrow', 'err') par(mfrow=mf, err=-1) on.exit(par(oldmfrow)) } Levels <- x$cat.levels for(i in 1:k) { fiti <- fit[[i]] if(i==1) fiti <- list(x=fiti[[2]], y=fiti[[1]]) xx <- fiti[[1]] y <- fiti[[2]] lx <- length(xx) booti <- Boot[,,i] yl <- if(!missing(ylim)) ylim else { rbi <- quantile(booti,c(.025,.975),na.rm=TRUE) if(i==1) range(approxExtrap(fiti, xout=rbi)$y) else range(rbi) } levi <- Levels[[i]] plot(xx, y, ylim=yl, xlab=nam[i], ylab=paste('Transformed',nam[i]), type='n', lwd=2, axes=length(levi)==0) if(ll <- length(levi)) { mgp.axis(2, pretty(yl)) mgp.axis(1, at=1:ll, labels=levi) } if(boot>0) for(j in 1:boot) { if(i==1) { if(any(is.na(booti[1:lx,j]))) next lines(xx, approxExtrap(fiti, xout=booti[1:lx,j])$y, col=col.boot, lwd=lwd.boot) } else lines(xx, booti[1:lx,j], col=col.boot, lwd=lwd.boot) } if(!(is.logical(conf.int) && !conf.int)) { quant <- apply(booti[1:lx,],1,quantile, na.rm=TRUE,probs=c((1-conf.int)/2, (1+conf.int)/2)) if(i==1) { lines(xx, approxExtrap(fiti, xout=quant[1,])$y, lwd=1.5) lines(xx, approxExtrap(fiti, xout=quant[2,])$y, lwd=1.5) } else { lines(xx, quant[1,], lwd=1.5) lines(xx, quant[2,], lwd=1.5) } } lines(xx, fiti[[2]], lwd=2) } invisible() } Function.areg.boot <- function(object, type=c('list','individual'), ytype=c('transformed','inverse'), prefix='.', suffix='', pos=-1, ...) { type <- match.arg(type) ytype <- match.arg(ytype) if(missing(type) && !(missing(prefix) & missing(suffix) & missing(pos))) type <- 'individual' fit <- object$fit k <- length(fit) nam <- names(fit) g <- vector('list',k) xtype <- object$xtype typey <- object$ytype catl <- object$cat.levels names(g) <- nam for(i in 1:k) { typ <- if(i==1) typey else xtype[i-1] if(typ=='c') { if(i==1 && ytype=='inverse') stop('currently does not handle ytype="inverse" when y is categorical') h <- function(x, trantab) { if(is.factor(x)) x <- as.character(x) trantab[x] } w <- fit[[i]]$y names(w) <- catl[[nam[i]]] formals(h) <- list(x=numeric(0), trantab=w) } else { h <- function(x, trantab) { s <- !is.na(x) res <- rep(NA, length(x)) res[s] <- approxExtrap(trantab, xout=x[s])$y res } fiti <- fit[[i]] formals(h) <- list(x=numeric(0), trantab=if(i==1 && ytype=='transformed') list(x=fiti[[2]],y=fiti[[1]]) else fiti) } g[[i]] <- h } if(type=='list') return(g) fun.name <- paste(prefix, nam, suffix, sep='') for(i in 1:k) assign(fun.name[i], g[[i]], pos=pos) invisible(fun.name) } predict.areg.boot <- function(object, newdata, statistic=c('lp','median','quantile','mean', 'fitted','terms'), q=NULL, ...) { if(!is.function(statistic)) statistic <- match.arg(statistic) fit <- object$fit fity <- fit[[1]] res <- object$residuals if(missing(newdata)) { if(statistic=='terms') stop('statistic cannot be "terms" when newdata is omitted') lp <- object$linear.predictors y <- smearingEst(lp, fity, res, statistic=statistic, q=q) nac <- object$na.action return(if(length(nac)) naresid(nac, y) ## FEH30Aug09 was nafitted else y) } cof <- object$coefficients Fun <- Function(object) nam <- names(fit) p <- length(nam)-1 X <- matrix(NA, nrow=length(newdata[[1]]), ncol=p) for(i in 1:p) { nami <- nam[i+1] X[,i] <- Fun[[nami]](newdata[[nami]]) } if(!is.function(statistic) && statistic=='terms') return(X) lp <- matxv(X, cof) smearingEst(lp, fity, res, statistic=statistic, q=q) } monotone <- function(x) structure(x, class = unique(c("monotone", attr(x,'class')))) Mean <- function(object, ...) UseMethod("Mean") Quantile <- function(object, ...) UseMethod("Quantile") Mean.areg.boot <- function(object, evaluation=200, ...) { r <- range(object$linear.predictors) lp <- seq(r[1], r[2], length=evaluation) res <- object$residuals ytrans <- object$fit[[1]] asing <- function(x)x if(length(lp)*length(res) < 100000) means <- asing(smearingEst(lp, ytrans, res, statistic='mean')) else { means <- double(evaluation) for(i in 1:evaluation) means[i] <- mean(approxExtrap(ytrans, xout=lp[i]+res)$y) } g <- function(lp, trantab) approxExtrap(trantab, xout=lp)$y formals(g) <- list(lp=numeric(0), trantab=list(x=lp, y=means)) g } Quantile.areg.boot <- function(object, q=.5, ...) { if(length(q) != 1 || is.na(q)) stop('q must be length 1 and not NA') g <- function(lp, trantab, residualQuantile) approxExtrap(trantab, xout=lp+residualQuantile)$y formals(g) <- list(lp=numeric(0), trantab=object$fit[[1]], residualQuantile = quantile(object$residuals, q)) g } smearingEst <- function(transEst, inverseTrans, res, statistic=c('median','quantile','mean','fitted','lp'), q=NULL) { if(is.function(statistic)) label <- deparse(substitute(statistic)) else { statistic <- match.arg(statistic) switch(statistic, median = {statistic <- 'quantile'; q <- .5; label <- 'Median'}, quantile = { if(!length(q)) stop('q must be given for statistic="quantile"'); label <- paste(format(q),'quantile') }, mean = { statistic <- mean; label <- 'Mean' }, fitted = { label <- 'Inverse Transformation' }, lp = { label <- 'Transformed' }) } y <- if(is.function(statistic)) { if(is.list(inverseTrans)) apply(outer(transEst, res, function(a, b, ytab) approxExtrap(ytab, xout=a+b)$y, inverseTrans), 1, statistic) else apply(outer(transEst, res, function(a, b, invfun)invfun(a+b), inverseTrans), 1, statistic) } else switch(statistic, lp = transEst, fitted = if(is.list(inverseTrans)) approxExtrap( inverseTrans, xout=transEst)$y else inverseTrans(transEst), quantile = if(is.list(inverseTrans)) approxExtrap( inverseTrans, xout=transEst+quantile(res,q))$y else inverseTrans(transEst+quantile(res,q))) structure(y, class='labelled', label=label) } ������������������������������������������������������Hmisc/R/mApply.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000003766�12243661443�013165� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mApply <- function(X, INDEX, FUN, ..., simplify=TRUE, keepmatrix=FALSE) { ## Matrix tapply ## X: matrix with n rows; INDEX: vector or list of vectors of length n ## FUN: function to operate on submatrices of x by INDEX ## ...: arguments to FUN; simplify: see sapply ## Modification of code by Tony Plate <tplate@blackmesacapital.com> 10Oct02 ## If FUN returns more than one number, mApply returns a matrix with ## rows corresponding to unique values of INDEX ## X should be either a Matrix or a Vector if((!is.matrix(X) && is.array(X)) || is.list(X)){ if(is.data.frame(X)) X <- as.matrix(X) else stop("X must either be a vector or a matrix") } km <- if(keepmatrix) function(x)x else function(x)drop(x) if(!is.matrix(X)) { ## X is a vector r <- tapply(X, INDEX, FUN, ..., simplify=simplify) if(is.matrix(r)) r <- km(t(r)) else if(simplify && is.list(r)) r <- km(matrix(unlist(r), nrow=length(r), dimnames=list(names(r),names(r[[1]])), byrow=TRUE)) } else { idx.list <- tapply(1:NROW(X), INDEX, c) r <- sapply(idx.list, function(idx,x,fun,...) fun(x[idx,,drop=FALSE],...), x=X, fun=FUN, ..., simplify=simplify) if(simplify) r <- km(t(r)) } dn <- dimnames(r) lengthdn <- length(dn) if(lengthdn && !length(dn[[lengthdn]])) { fx <- FUN(X,...) dnl <- if(length(names(fx))) names(fx) else dimnames(fx)[[2]] dn[[lengthdn]] <- dnl dimnames(r) <- dn } if(simplify && is.list(r) && is.array(r)) { ll <- sapply(r, length) maxl <- max(ll) empty <- (1:length(ll))[ll==0] for(i in empty) r[[i]] <- rep(NA, maxl) ## unlist not keep place for NULL entries for nonexistent categories first.not.empty <- ((1:length(ll))[ll > 0])[1] nam <- names(r[[first.not.empty]]) dr <- dim(r) r <- aperm(array(unlist(r), dim=c(maxl,dr), dimnames=c(list(nam),dimnames(r))), c(1+seq(length(dr)), 1)) } r } ����������Hmisc/R/rcorr.cens.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000002503�13101440726�013756� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Computes rank correlation measures between a variable X and a possibly ## censored variable Y, with event/censoring indicator EVENT ## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5) ## See Harrell et al JAMA 1984(?) ## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal ## gamma-type rank correlation) rcorr.cens <- function(x, S, outx=FALSE) { if(is.Surv(S)) { if(attr(S, 'type') != 'right') stop('only handles right censored times') } else S <- cbind(S, rep(1, length(S))) y <- S[,1] event <- S[,2] if(length(y)!=length(x)) stop("y must have same length as x") miss <- is.na(x) | is.na(y) | is.na(event) nmiss <- sum(miss) if(nmiss>0) { miss <- !miss x <- x[miss] y <- y[miss] event <- event[miss] } n <- length(x) ne <- sum(event) storage.mode(x) <- "double" storage.mode(y) <- "double" storage.mode(event) <- "logical" z <- .Fortran(F_cidxcn,x,y,event,length(x),nrel=double(1),nconc=double(1), nuncert=double(1), c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx)) r <- c(z$c.index,z$gamma,z$sd,n,nmiss,ne,z$nrel,z$nconc,z$nuncert) names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored", "Relevant Pairs", "Concordant","Uncertain") r } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/symbol.freq.s�������������������������������������������������������������������������������0000644�0001762�0000144�00000006300�12243661443�014147� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## marginals applies only to symbol="therm", orig.scale to symbol="circle" symbol.freq <- function(x, y, symbol=c("thermometer","circle"), marginals=FALSE, orig.scale=FALSE, inches=.25, width=.15, subset, srtx=0, ...) { symbol <- match.arg(symbol) if(missing(subset)) subset <- rep(TRUE, length(x)) if(!is.logical(subset)) { s <- rep(FALSE,length(x)) s[subset] <- FALSE subset <- s } xlab <- attr(x,'label') if(!length(xlab)) xlab <- as.character(substitute(x)) ylab <- attr(y,'label') if(!length(ylab)) ylab <- as.character(substitute(y)) s <- !(is.na(x) | is.na(y)) & subset x <- x[s] y <- y[s] f <- table(x, y) dx <- dimnames(f)[[1]] dy <- dimnames(f)[[2]] if(orig.scale) xp <- as.numeric(dimnames(f)[[1]]) else xp <- 1:length(dimnames(f)[[1]]) xp1 <- length(xp)+1 if(orig.scale) yp <- as.numeric(dimnames(f)[[2]]) else yp <- 1:length(dimnames(f)[[2]]) yp1 <- length(yp)+1 m <- nrow(f) * ncol(f) xx <- single(m) yy <- single(m) zz <- single(m) k <- 0 for(i in 1:nrow(f)) { for(j in 1:ncol(f)) { k <- k + 1 xx[k] <- xp[i] yy[k] <- yp[j] if(f[i, j] > 0) zz[k] <- f[i, j] else zz[k] <- NA } } maxn <- max(f) n <- 10^round(log10(maxn)) if(marginals) { xx <- c(xx, rep(xp1, length(yp))) yy <- c(yy, yp) zz <- c(zz, table(y)/2) xx <- c(xx, xp) yy <- c(yy, rep(yp1, length(xp))) zz <- c(zz, table(x)/2) xx <- c(xx, xp1) yy <- c(yy, yp1) zz <- c(zz, n) } if(symbol=="circle") { ## zz <- inches*sqrt(zz/maxn) zz <- sqrt(zz) if(orig.scale) symbols(xx,yy,circles=zz,inches=inches, smo=.02,xlab=xlab,ylab=ylab,...) else symbols(xx,yy,circles=zz,inches=inches,smo=.02, xlab=xlab,ylab=ylab,axes=FALSE,...) title(sub=paste("n=",sum(s),sep=""),adj=0) if(marginals) { axis(1, at = 1:xp1, labels = c(dx, "All/2"), srt=srtx, adj=if(srtx>0)1 else .5) axis(2, at = 1:yp1, labels = c(dy, "All/2"),adj=1) } else { # if(!orig.scale) { axis(1, at=xp, labels=dx, srt=srtx, adj=if(srtx>0)1 else .5) axis(2, at=yp, labels=dy) } return(invisible()) } zz <- cbind(rep(width,length(zz)), inches*zz/maxn, rep(0,length(zz))) symbols(xx,yy,thermometers=zz,inches=FALSE, axes=FALSE,xlab=xlab,ylab=ylab,...) title(sub=paste("n=",sum(s),sep=""),adj=0) if(marginals) { text(xp1-width, yp1, n, adj=1, cex=.5) axis(1, at = 1:xp1, labels = c(dx, "All/2"), srt=srtx, adj=if(srtx>0)1 else .5) axis(2, at = 1:yp1, labels = c(dy, "All/2"),adj=1) abline(h=yp1-.5, lty=2) abline(v=xp1-.5, lty=2) } else { axis(1, at=xp, labels=dx, srt=srtx, adj=if(srtx>0)1 else .5) axis(2, at=yp, labels=dy) cat("click left mouse button to position legend\n") xy <- locator(1) symbols(xy$x, xy$y, thermometers=cbind(width,inches*n/maxn,0), inches=FALSE,add=TRUE,xlab=xlab,ylab=ylab) text(xy$x-width, xy$y, n,adj=1,cex=.5) } box() invisible() } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/summary.formula.s���������������������������������������������������������������������������0000644�0001762�0000144�00000237747�14253371556�015102� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������summary.formula <- function(formula, data=NULL, subset=NULL, na.action=NULL, fun=NULL, method=c('response','reverse','cross'), overall=method == 'response'|method == 'cross', continuous=10, na.rm=TRUE, na.include=method != 'reverse', g=4, quant = c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975), nmin=if(method == 'reverse') 100 else 0, test=FALSE, conTest=conTestkw, catTest=catTestchisq, ordTest=ordTestpo, ...) { call <- match.call() missmethod <- missing(method) method <- match.arg(method) ## Multiple left hand side variables -> automatically call summaryM if(grepl('.*\\+.*~', paste(deparse(formula), collapse=''))) return(summaryM(formula, data=data, subset=subset, na.action=na.action, overall=overall, continuous=continuous, na.include=na.include, quant=quant, nmin=nmin, test=test, conTest=conTest, catTest=catTest, ordTest=ordTest)) X <- match.call(expand.dots=FALSE) X$fun <- X$method <- X$na.rm <- X$na.include <- X$g <- X$overall <- X$continuous <- X$quant <- X$nmin <- X$test <- X$conTest <- X$catTest <- X$... <- NULL if(missing(na.action)) X$na.action <- na.retain Terms <- if(missing(data)) terms(formula,'stratify') else terms(formula,'stratify',data=data) X$formula <- Terms X[[1]] <- as.name("model.frame") X <- eval(X, sys.parent()) Terms <- attr(X,"terms") resp <- attr(Terms,"response") if(resp == 0 && missmethod) method <- 'reverse' if(test && method != 'reverse') stop('test=TRUE only allowed for method="reverse"') if(method != 'reverse' && resp != 1) stop("must have a variable on the left hand side of the formula") nact <- attr(X, "na.action") nvar <- ncol(X)-1 strat <- attr(Terms,'specials')$stratify getlab <- function(x, default) { lab <- attr(x, 'label') if(!length(lab) || lab == '') default else lab } if(length(strat)) { if(method != 'response') stop('stratify only allowed for method="response"') temp <- untangle.specials(Terms,'stratify') strat.name <- var.inner(Terms)[temp$terms] strat <- if(length(temp$vars) == 1) as.factor(X[[temp$vars]]) else stratify(X[,temp$vars]) strat.label <- getlab(X[,temp$vars[1]], strat.name) X[[temp$vars]] <- NULL # remove strata factors } else { strat <- factor(rep('',nrow(X))) strat.name <- strat.label <- '' } nstrat <- length(levels(strat)) if(resp>0) { Y <- X[[resp]] yname <- as.character(attr(Terms,'variables'))[2] ylabel <- getlab(Y, yname) if(!is.matrix(Y)) Y <- matrix(Y, dimnames=list(names(Y),yname)) } else { yname <- ylabel <- NULL } if(method != 'reverse') { if(!length(fun)) { # was missing(fun) 25May01 fun <- function(y) apply(y, 2, mean) uy <- unique(Y[!is.na(Y)]) # fixed 16Mar96 r <- range(uy, na.rm=TRUE) funlab <- if(length(uy) == 2 && r[1] == 0 & r[2] == 1) "Fraction" else "Mean" funlab <- paste(funlab, 'of', yname) } else if(is.character(fun) && fun == '%') { fun <- function(y) { stats <- 100*apply(y, 2, mean) names(stats) <- paste(dimnames(y)[[2]],'%') stats } funlab <- paste('% of', yname) } ## Compute number of descriptive statistics per cell s <- if(inherits(Y,'Surv')) as.vector((1 * is.na(unclass(Y))) %*% rep(1, ncol(Y)) > 0) else ((if(is.character(Y)) Y == ''|Y == 'NA' else is.na(Y)) %*% rep(1,ncol(Y))) > 0 stats <- if(length(dim(Y))) fun(Y[!s,,drop=FALSE]) else fun(Y[!s]) nstats <- length(stats) name.stats <- if(length(dn <- dimnames(stats)) == 2) as.vector(outer(dn[[1]],dn[[2]],FUN=function(a,b)paste(b,a))) else names(stats) if(length(fun)) { if(length(de <- deparse(fun)) == 2) { de <- as.list(fun) de <- as.character(de[[length(de)]]) funlab <- if(de[1] == 'apply') de[length(de)] else de[1] ## 2nd case is for simple function(x)mean(x) function } else funlab <- as.character(substitute(fun)) } if(funlab[1] == '') funlab <- yname if(length(name.stats) == 0) { name.stats <- if(nstats == 1) yname else paste0(yname , 1 : nstats) } } if(method == 'response') { X[[resp]] <- NULL # remove response var s <- if(!na.rm) FALSE else if(inherits(Y,'Surv')) as.vector((1 * is.na(unclass(Y))) %*% rep(1, ncol(Y)) > 0) else ((if(is.character(Y)) Y == ''|Y == 'NA' else is.na(Y)) %*% rep(1,ncol(Y))) > 0 nmissy <- sum(s) if(nmissy) { X <- X[!s,,drop=FALSE] Y <- Y[!s,,drop=FALSE] strat <- strat[!s] } ##Compute total number of columns, counting n nc <- nstrat*(1+nstats) colname <- rep(c('N',name.stats),nstrat) rowname <- vname <- vlabel <- vunits <- res <- NULL dm <- dim(X) nx <- dm[2] n <- dm[1] nlevels <- integer(nx) labels <- character(nx) units <- labels i <- 0 nams <- c(names(X), if(overall)'Overall') for(v in nams) { i <- i+1 x <- if(v == 'Overall') factor(rep('',n)) else X[[v]] if(inherits(x,'mChoice')) x <- as.numeric(x) labels[i] <- getlab(x, nams[i]) units[i] <- if(length(l <- attr(x,'units'))) l else '' if(!(ismc <- is.matrix(x))) { s <- is.na(x) if(!is.factor(x)) { xu <- unique(x[!s]); lu <- length(xu) x <- if(lu < continuous) { r <- range(xu) if(lu == 2 && r[1] == 0 && r[2] == 1) factor(x,labels=c('No','Yes')) else factor(x) } else cut2(x, g=g, ...) } if(na.include && any(s)) { x <- na.include(x) levels(x)[is.na(levels(x))] <- 'NA' ## R 1.5 and later has NA as level not 'NA', satisfies is.na } xlev <- levels(x) if(nmin > 0) { nn <- table(x); xlev <- names(nn)[nn >= nmin] } } else { xlev <- dimnames(x)[[2]] if(!length(xlev)) stop('matrix variables must have column dimnames') if(!is.logical(x)) { if(is.numeric(x)) x <- x == 1 else { x <- structure(casefold(x),dim=dim(x)) x <- x == 'present' | x == 'yes' } } if(nmin > 0) { nn <- apply(x, 2, sum, na.rm=TRUE) xlev <- xlev[nn >= nmin] } } nlevels[i] <- length(xlev) for(lx in xlev) { r <- NULL for(js in levels(strat)) { j <- if(ismc) strat == js & x[,lx] else strat == js & x == lx if(!na.include) j[is.na(j)] <- FALSE nj <- sum(j) f <- if(nj) { statz <- unlist(fun(Y[j,,drop=FALSE])) ## 23apr03; had just let matrix replicate to fill ## Thanks: Derek Eder <derek.eder@neuro.gu.se> if(length(statz) != nstats) stop(paste('fun for stratum',lx,js,'did not return', nstats, 'statistics')) matrix(statz, ncol=nstats, byrow=TRUE) } else rep(NA,nstats) r <- c(r, nj, f) } res <- rbind(res, r) } rowname <- c(rowname, xlev) bl <- rep('',length(xlev)-1) vname <- c(vname,v,bl) vlabel <- c(vlabel,labels[i],bl) vunits <- c(vunits,units[i],bl) } rowname[rowname == 'NA'] <- 'Missing' dimnames(res) <- list(rowname,colname) at <- list(formula=formula, call=call, n=n, nmiss=nmissy, yname=yname, ylabel=ylabel, ycolname=if(length(d<-dimnames(Y)[[2]]))d else yname, funlab=funlab, vname=vname, vlabel=vlabel, nlevels=nlevels, labels=labels, units=units, vunits=vunits, strat.name=strat.name, strat.label=strat.label, strat.levels=levels(strat)) attributes(res) <- c(attributes(res), at) attr(res,'class') <- 'summary.formula.response' return(res) } if(method == 'reverse') { quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975)) if(resp) { group <- as.factor(X[[resp]])[,drop=TRUE] group.freq <- table(group) group.freq <- group.freq[group.freq>0] if(overall) group.freq <- c(group.freq, Combined=sum(group.freq)) } else { group <- rep(0,nrow(X)) group.freq <- NULL } nv <- ncol(X)-resp n <- integer(nv) type <- n nams <- names(X) comp <- dat <- vector("list",nv) names(comp) <- names(dat) <- if(resp)nams[-1] else nams labels <- Units <- vector("character",nv) if(test) { testresults <- vector('list', nv) names(testresults) <- names(comp) } for(i in 1:nv) { w <- X[[resp+i]] if(length(attr(w, "label"))) labels[i] <- attr(w, "label") if(length(attr(w, 'units'))) Units[i] <- attr(w, 'units') if(!inherits(w, 'mChoice')) { if(!is.factor(w) && !is.logical(w) && length(unique(w[! is.na(w)])) < continuous) w <- as.factor(w) s <- !is.na(w) if(na.include && !all(s) && length(levels(w))) { w <- na.include(w) levels(w)[is.na(levels(w))] <- 'NA' s <- rep(TRUE, length(s)) } n[i] <- sum(s) w <- w[s] g <- group[s, drop=TRUE] if(is.factor(w) || is.logical(w)) { tab <- table(w, g) if(test) { if(is.ordered(w)) testresults[[i]] <- ordTest(g, w) else testresults[[i]] <- catTest(tab) } if(nrow(tab) == 1) { # 7sep02 b <- casefold(dimnames(tab)[[1]],upper=TRUE) pres <- c('1','Y','YES','PRESENT') abse <- c('0','N','NO', 'ABSENT') jj <- match(b, pres, nomatch=0) if(jj > 0) bc <- abse[jj] else { jj <- match(b, abse, nomatch=0) if(jj > 0) bc <- pres[jj] } if(jj) { tab <- rbind(tab, rep(0, ncol(tab))) dimnames(tab)[[1]][2] <- bc } } if(overall) tab <- cbind(tab, Combined=apply(tab,1,sum)) comp[[i]] <- tab type[i] <- 1 } else { sfn <- function(x, quant) { o <- options('digits') options(digits=15) ## so won't lose precision in quantile names on.exit(options(o)) c(quantile(x,quant), Mean=mean(x), SD=sqrt(var(x))) } qu <- tapply(w, g, sfn, simplify=TRUE, quants) if(test) testresults[[i]] <- conTest(g, w) if(overall) qu$Combined <- sfn(w, quants) comp[[i]] <- matrix(unlist(qu),ncol=length(quants)+2,byrow=TRUE, dimnames=list(names(qu), c(format(quants),'Mean','SD'))) if(any(group.freq <= nmin)) dat[[i]] <- lapply(split(w,g),nmin=nmin, function(x,nmin) if(length(x) <= nmin)x else NULL) type[i] <- 2 } } else { w <- as.numeric(w) == 1 ## multiple choice variables n[i] <- nrow(w) g <- as.factor(group) ncat <- ncol(w) tab <- matrix(NA, nrow=ncat, ncol=length(levels(g)), dimnames=list(dimnames(w)[[2]], levels(g))) if(test) { pval <- numeric(ncat) names(pval) <- dimnames(w)[[2]] d.f. <- stat <- pval } for(j in 1:ncat) { tab[j,] <- tapply(w[,j], g, sum, simplify=TRUE, na.rm=TRUE) if(test) { tabj <- rbind(table(g) - tab[j, ], tab[j, ]) st <- catTest(tabj) pval[j] <- st$P stat[j] <- st$stat d.f.[j] <- st$df } } if(test) testresults[[i]] <- list(P = pval, stat = stat, df = d.f., testname = st$testname, statname = st$statname, namefun = st$namefun, latexstat = st$latexstat, plotmathstat = st$plotmathstat) if(overall) tab <- cbind(tab, Combined=apply(tab,1,sum)) comp[[i]] <- tab type[i] <- 3 } } labels <- ifelse(nchar(labels), labels, names(comp)) return(structure(list(stats=comp, type=type, group.name=if(resp)nams[1] else NULL, group.label=ylabel, group.freq=group.freq, labels=labels, units=Units, quant=quant, data=dat, N=sum(!is.na(group)), n=n, testresults=if(test)testresults else NULL, call=call, formula=formula), class="summary.formula.reverse")) } if(method == 'cross') { X[[resp]] <- NULL Levels <- vector("list",nvar) nams <- names(X) names(Levels) <- names(X) labels <- character(nvar) for(i in 1:nvar) { xi <- X[[i]] if(inherits(xi,'mChoice')) xi <- factor(format(xi)) else if(is.matrix(xi) && ncol(xi) > 1) stop('matrix variables not allowed for method="cross"') labels[i] <- getlab(xi, nams[i]) if(is.factor(xi)) xi <- xi[,drop=TRUE] if(!is.factor(xi) && length(unique(xi[!is.na(xi)]))>=continuous) xi <- cut2(xi, g=g, ...) X[[i]] <- na.include(as.factor(xi)) levels(X[[i]])[is.na(levels(X[[i]]))] <- 'NA' Levels[[i]] <- c(levels(X[[i]]),if(overall)"ALL") } ##Make a data frame with all combinations of values (including those ##that don't exist in the data, since trellis needs them) df <- expand.grid(Levels) nl <- nrow(df) N <- Missing <- integer(nl) na <- is.na(Y %*% rep(1,ncol(Y))) S <- matrix(NA, nrow=nl, ncol=nstats, dimnames=list(NULL,name.stats)) chk <- function(z, nstats) { if(length(z) != nstats) stop(paste('fun did not return',nstats, 'statistics for a stratum')) z } if(nvar == 1) { df1 <- as.character(df[[1]]); x1 <- X[[1]] for(i in 1:nl) { s <- df1[i] == 'ALL' | x1 == df1[i] w <- if(na.rm) s & !na else s N[i] <- sum(w) Missing[i] <- sum(na[s]) S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats) else rep(NA,nstats) } } else if(nvar == 2) { df1 <- as.character(df[[1]]); df2 <- as.character(df[[2]]) x1 <- X[[1]]; x2 <- X[[2]] for(i in 1:nl) { s <- (df1[i] == 'ALL' | x1 == df1[i]) & (df2[i] == 'ALL' | x2 == df2[i]) w <- if(na.rm) s & !na else s N[i] <- sum(w) Missing[i] <- sum(na[s]) S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats) else rep(NA,nstats) } } else if(nvar == 3) { df1 <- as.character(df[[1]]); df2 <- as.character(df[[2]]) df3 <- as.character(df[[3]]) x1 <- X[[1]]; x2 <- X[[2]]; x3 <- X[[3]] for(i in 1:nl) { s <- (df1[i] == 'ALL' | x1 == df1[i]) & (df2[i] == 'ALL' | x2 == df2[i]) & (df3[i] == 'ALL' | x3 == df3[i]) w <- if(na.rm) s & !na else s N[i] <- sum(w) Missing[i] <- sum(na[s]) S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats) else rep(NA,nstats) } } else stop('no more than 3 independent variables allowed') lab <- names(df) lab2 <- if(length(lab)>1) paste(lab,collapse=", ") else lab heading <- paste(funlab,"by",lab2) S <- S[,,drop=TRUE] attr(S,"label") <- yname #funlab df$S <- S df$N <- N df$Missing <- Missing a <- list(heading=heading,byvarnames=lab2,Levels=Levels,labels=labels, na.action=nact,formula=formula,call=call,yname=yname,ylabel=ylabel, class=c("summary.formula.cross","data.frame")) attributes(df) <- c(attributes(df), a) df } } ##The following makes formula(object) work (using especially for update()) formula.summary.formula.cross <- function(x, ...) attr(x,'formula') na.retain <- function(d) d print.summary.formula.response <- function(x, vnames=c('labels','names'), prUnits=TRUE, abbreviate.dimnames=FALSE, prefix.width, min.colwidth, formatArgs=NULL, markdown=FALSE, ...) { stats <- x stats <- unclass(stats) vnames <- match.arg(vnames) ul <- vnames == 'labels' at <- attributes(stats) ns <- length(at$strat.levels) vlabels <- at$labels if(prUnits) { atu <- gsub('*', ' ', at$units, fixed=TRUE) vlabels <- ifelse(atu == '',vlabels, paste0(vlabels,' [',atu,']')) } cap <- paste(at$ylabel, if(ns>1) paste0(' by', if(ul) at$strat.label else at$strat.name), ' N=',at$n, if(at$nmiss) paste0(', ',at$nmiss,' Missing')) if(! markdown) cat(cap, '\n\n') d <- dim(stats) nr <- length(at$nlevels) vlab <- if(ul) vlabels[vlabels != ''] else at$vname[at$vname != ''] z <- matrix('',nrow=nr,ncol=1+d[2],dimnames=list(vlab,NULL)) dz <- dimnames(stats)[[1]] cstats <- matrix('',nrow=d[1],ncol=d[2]) for(j in 1:d[2]) { ww <- c(list(stats[,j]), formatArgs) cstats[,j] <- do.call('format', ww) cstats[is.na(stats[,j]),j] <- '' } if(markdown) { if(! requireNamespace("knitr", quietly=TRUE)) stop('markdown=TRUE requires the knitr package to be installed') vlab <- if(ul) at$vlabel else at$vname if(prUnits && any(at$units != '')) vlab[vlab != ''] <- paste0(vlab[vlab != ''], '[', at$units, ']') if(vlab[length(vlab)] == 'Overall') vlab[length(vlab)] <- '**Overall**' z <- cbind(vlab, rownames(x), cstats) colnames(z) <- c('', '', colnames(stats)) return(knitr::kable(z, align=c('l', 'l', rep('r', ncol(cstats))), caption=cap)) } is <- 1 for(i in 1:nr) { ie <- is+at$nlevels[i]-1 z[i,1] <- paste(dz[is:ie],collapse='\n') for(j in 1:d[2]) z[i,j+1] <- paste(cstats[is:ie,j],collapse='\n') is <- ie+1 } if(missing(prefix.width)) prefix.width <- max(nchar(dimnames(z)[[1]])) if(missing(min.colwidth)) min.colwidth <- max(min(nchar(cstats)[nchar(cstats)>0]), min(nchar(dimnames(stats)[[2]]))) z <- rbind(c('',dimnames(stats)[[2]]), z) print.char.matrix(z, col.names=FALSE, ...) invisible() } latex.summary.formula.response <- function(object, title=first.word(deparse(substitute(object))), caption, trios, vnames=c('labels','names'), prn=TRUE, prUnits=TRUE, rowlabel='', cdec=2, ncaption=TRUE, ...) { stats <- object if(!prn) stats <- stats[, dimnames(stats)[[2]] != 'N', drop=FALSE] title <- title # otherwise problem with lazy evaluation 25May01 stats <- unclass(stats) at <- attributes(stats) vnames <- match.arg(vnames) ul <- vnames == 'labels' ns <- length(at$strat.levels) nstat <- ncol(stats)/ns if(!missing(trios)) { if(is.logical(trios)) trios <- at$ycolname ntrio <- length(trios) if(ntrio*3 + prn != nstat) #allow for N stop('length of trios must be 1/3 the number of statistics computed') } if(missing(caption)) caption <- latexTranslate(at$ylabel) if(ns>1) caption <- paste(caption,' by', if(ul)at$strat.label else at$strat.name) if(ncaption) caption <- paste0(caption, '~~~~~N=',at$n, if(at$nmiss) paste0(',~',at$nmiss,' Missing')) dm <- dimnames(stats) dm[[1]] <- latexTranslate(dm[[1]], greek=TRUE) dm[[2]] <- latexTranslate(dm[[2]], greek=TRUE) dimnames(stats) <- dm caption <- sedit(caption, "cbind", "") vn <- if(ul)at$vlabel else at$vname if(prUnits) { atvu <- gsub('*', ' ', at$vunits, fixed=TRUE) vn <- ifelse(atvu == '', vn, paste0(vn,'~\\hfill\\tiny{', atvu, '}')) } vn <- latexTranslate(vn, greek=TRUE) if(missing(trios)) { cdec <- rep(cdec, length = nstat) } else { cdec <- rep(cdec, length = nstat / 3) } cdec <- rep(cdec, ns) isn <- colnames(stats) %in% c('N', 'n') if(any(isn) && prn) cdec[isn] <- 0 if(missing(trios)) { cstats <- unclass(stats) } else { fmt <- function(z, cdec) ifelse(is.na(z), '', format(round(z, cdec))) cstats <- list() k <- m <- 0 for(is in 1 : ns) { if(prn) { k <- k + 1 m <- m + 1 cstats[[k]] <- stats[, m] ## N, numeric mode } for(j in 1 : ntrio) { m <- m + 1 k <- k + 1 cstats[[k]] <- paste0('{\\scriptsize ', fmt(stats[,m], cdec[k]), '~}', fmt(stats[,m + 1], cdec[k]), ' {\\scriptsize ', fmt(stats[,m + 2], cdec[k]), '}') m <- m + 2 } } names(cstats) <- rep(c(if(prn)'N' else NULL, trios), ns) attr(cstats, 'row.names') <- dm[[1]] attr(cstats,'class') <- 'data.frame' if(prn) nstat <- 2 # for n.cgroup below else nstat <- 1 } insert.bottom <- if(missing(trios)) '' else '\\noindent {\\scriptsize $a$\\ } $b$ {\\scriptsize $c$\\ } represent the lower quartile $a$, the median $b$, and the upper quartile $c$.' r <- if(ns > 1) latex(cstats, title=title, caption=caption, rowlabel=rowlabel, n.rgroup=at$nlevels, rgroup=vn[vn != ''], n.cgroup=rep(nstat,ns), cgroup=at$strat.levels, cdec=cdec, col.just=rep('c',ncol(cstats)), rowname=dm[[1]], insert.bottom=insert.bottom, ...) else latex(cstats, title=title, caption=caption, rowlabel=rowlabel, n.rgroup=at$nlevels, rgroup=vn[vn != ''], cdec=cdec, col.just=rep('c',ncol(cstats)), rowname=dm[[1]], insert.bottom=insert.bottom, ...) r } plot.summary.formula.response <- function(x, which = 1, vnames = c('labels', 'names'), xlim, xlab, pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), superposeStrata = TRUE, dotfont=1, add=FALSE, reset.par=TRUE, main, subtitles=TRUE, ...) { stats <- x stats <- unclass(stats) vnames <- match.arg(vnames) ul <- vnames == 'labels' at <- attributes(stats) ns <- length(at$strat.levels) if(ns>1 && length(which)>1) stop('cannot have a vector for which if > 1 strata present') if(ns < 2) superposeStrata <- FALSE vn <- if(ul) at$vlabel else at$vname Units <- at$vunits vn <- ifelse(Units == '', vn, paste0(vn, ' [', Units, ']')) ## dotchart2 groups argument may not be an R plotmath expression vn <- vn[vn != ''] d <- dim(stats) n <- d[1] nstat <- d[2]/ns vnd <- factor(rep(vn, at$nlevels)) dn <- dimnames(stats) if(missing(xlim)) xlim <- range(stats[,nstat*((1:ns)-1)+1+which],na.rm=TRUE) if(missing(main)) main <- at$funlab nw <- length(which) pch <- rep(pch, length.out=if(superposeStrata)ns else nw) dotfont <- rep(dotfont, length.out=nw) opar <- par(no.readonly=TRUE) if(reset.par) on.exit(par(opar)) if(superposeStrata) Ns <- apply(stats[,nstat*((1:ns)-1)+1],1,sum) for(is in 1:ns) { for(w in 1:nw) { js <- nstat*(is-1)+1+which[w] z <- stats[,js] if(missing(xlab)) xlab <- if(nw>1) dn[[2]][js] else at$ylabel dotchart2(z, groups=vnd, xlab=xlab, xlim=xlim, auxdata=if(superposeStrata) Ns else stats[,js-which[w]], auxtitle='N', sort.=FALSE, pch=pch[if(superposeStrata)is else w], dotfont=dotfont[w], add=add | w>1 | (is > 1 && superposeStrata), reset.par=FALSE, ...) if(ns>1 && !superposeStrata) title(paste(paste(main,if(main != '')' '),at$strat.levels[is])) else if(main != '') title(main) if(ns == 1 && subtitles) { title(sub=paste0('N=', at$n), adj=0, cex=.6) if(at$nmiss>0) title(sub=paste0('N missing=', at$nmiss), cex=.6, adj=1) } } } if(superposeStrata) { ##set up for Key() Key1 <- function(x=NULL, y=NULL, lev, pch) { oldpar <- par('usr', 'xpd') par(usr=c(0,1,0,1),xpd=NA) on.exit(par(oldpar)) if(is.list(x)) { y <- x$y; x <- x$x } if(!length(x)) x <- 0 if(!length(y)) y <- 1 ## because of formals() rlegend(x, y, legend=lev, pch=pch, ...) invisible() } formals(Key1) <- list(x=NULL,y=NULL,lev=at$strat.levels, pch=pch) .setKey(Key1) } invisible() } plot.summary.formula.reverse <- function(x, vnames = c('labels', 'names'), what = c('proportion','%'), which = c('both', 'categorical', 'continuous'), xlim = if(what == 'proportion') c(0,1) else c(0,100), xlab = if(what == 'proportion') 'Proportion' else 'Percentage', pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE, dotfont = 1, main, prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001, conType = c('dot', 'bp', 'raw'), cex.means = 0.5, ...) { obj <- x vnames <- match.arg(vnames) what <- match.arg(what) which <- match.arg(which) conType <- match.arg(conType) ul <- vnames == 'labels' if(is.logical(prtest) && ! prtest) prtest <- 'none' test <- obj$testresults if(!length(test)) prtest <- 'none' varNames <- names(obj$stats) vn <- if(ul) obj$labels else varNames Units <- obj$units nw <- if(lg <- length(obj$group.freq)) lg else 1 gnames <- names(obj$group.freq) if(missing(main)) main <- if(nw == 1)'' else paste(if(what == 'proportion')'Proportions' else 'Percentages','Stratified by', obj$group.label) pch <- rep(pch, length.out=nw) dotfont <- rep(dotfont, length.out=nw) lab <- vnd <- z <- nmiss <- vnamd <- NULL type <- obj$type; n <- obj$n opar <- par() on.exit(setParNro(opar)) npages <- 0 if(which != 'continuous' && any(type %in% c(1,3))) { ftstats <- NULL for(i in (1:length(type))[type == 1 | type == 3]) { nam <- vn[i] tab <- obj$stats[[i]] if(nw == 1) tab <- as.matrix(tab) nr <- nrow(tab) denom <- if(type[i] == 1) apply(tab, 2, sum) else obj$group.freq y <- (if(what == 'proportion') 1 else 100) * sweep(tab, 2, denom, FUN='/') lev <- dimnames(y)[[1]] exc <- exclude1 && (nr == 2) jstart <- if(exc) 2 else 1 ## nn <- c(nn, n[i], rep(NA, if(exc) nr-2 else nr-1)) ## k <- 0 rl <- casefold(lev) binary <- type[i] == 1 && exc && (all(rl %in% c("0","1"))|all(rl %in% c("false","true"))| all(rl %in% c("absent","present"))) for(j in jstart:nrow(y)) { if(nw == 1) z <- rbind(z, y[j,]) else { yj <- rep(NA, nw) names(yj) <- gnames yj[names(y[j,])] <- y[j,] z <- rbind(z, yj) } lab <- c(lab, if(binary) '' else lev[j]) vnd <- c(vnd, nam) vnamd <- c(vnamd, varNames[i]) } if(any(prtest != 'none')) { fts <- formatTestStats(test[[varNames[i]]], type[i] == 3, if(type[i] == 1)1 else 1 : nr, prtest=prtest, plotmath=TRUE, pdig=pdig, eps=eps) ftstats <- c(ftstats, fts, if(type[i] == 1 && nr - exc - 1 > 0) rep(expression(''), nr - exc - 1)) } } dimnames(z) <- list(lab, dimnames(z)[[2]]) for(i in 1 : nw) { zi <- z[,i] if(any(prtest == 'none') || i > 1) dotchart2(zi, groups=vnd, xlab=xlab, xlim=xlim, sort.=FALSE, pch=pch[i], dotfont=dotfont[i], add=i > 1, ...) else dotchart2(zi, groups=vnd, auxdata=ftstats, xlab=xlab, xlim=xlim, sort.=FALSE, pch=pch[i], dotfont=dotfont[i], add=i > 1, ...) } if(main != '') title(main) npages <- npages + 1 setParNro(opar) ## Dummy key if only one column, so won't use another Key from an ## earlier run if(nw < 2) { Key1 <- function(...)invisible(NULL) .setKey(Key1) } else { ##set up for key() if > 1 column Key3 <- function(x=NULL, y=NULL, lev, pch) { oldpar <- par('usr', 'xpd') par(usr=c(0,1,0,1),xpd=NA) on.exit(par(oldpar)) if(is.list(x)) { y <- x$y; x <- x$x } ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting if(!length(x)) x <- 0 if(!length(y)) y <- 1 ## because of formals() rlegend(x, y, legend=lev, pch=pch, ...) invisible() } formals(Key3) <- list(x=NULL,y=NULL,lev=names(obj$group.freq), pch=pch) .setKey(Key3) } } ncont <- sum(type == 2) if(which != 'categorical' && ncont) { mf <- par('mfrow') if(length(mf) == 0) mf <- c(1,1) if(ncont > 1 & max(mf) == 1) { mf <- if(ncont <= 4)c(2,2) else if(ncont <= 6)c(2,3) else if(ncont <= 9)c(3,3) else c(4,3) ## if(ncont <= 12)c(4,3) else if(ncont <= 16) c(4,4) else c(5,4) nr <- mf[1] m <- par('mar') par(mfrow=mf) } npages <- npages + ceiling(sum(type == 2) / prod(mf)) for(i in (1:length(type))[type == 2]) { nam <- labelPlotmath(vn[i], Units[i]) st <- obj$stats[[i]] if(nw == 1) st <- as.matrix(st) if(conType == 'dot') { quantile.columns <- dimnames(st)[[2]] %nin% c('Mean','SD') st <- st[,quantile.columns,drop=FALSE] xlim <- range(st) ns <- as.numeric(dimnames(st)[[2]]) l <- 1:length(ns) q1 <- l[abs(ns-.25) < .001] med <- l[abs(ns-.5) < .001] q3 <- l[abs(ns-.75) < .001] st <- st[,c(q1,med,q3),drop=FALSE] for(j in 1:3) { stj <- st[,j] if(nw == 1) names(stj) <- '' dotchart2(stj, xlab=nam, xlim=xlim, sort.=FALSE, pch=c(91, if(FALSE)183 else 16, 93)[j], dotfont=dotfont[1], add=j > 1, ...) } Key2 <- function(x=NULL, y=NULL, quant, ...) { quant <- format(quant) txt <- paste0('(',quant[2],',',quant[3],',',quant[4], ') quantiles shown\nx-axes scaled to (',quant[1],',', quant[5],') quantiles') if(length(x)) { if(is.list(x)) { y <- x$y; x <- x$x } text(x,y,txt, cex=.8, adj=0, ...) } else mtitle(lr=txt, cex.l=.8, line=1, ...) invisible() } formals(Key2) <- list(x=NULL,y=NULL,quant=obj$quant) .setKey2(Key2) } else if(conType == 'bp') bpplt(st, xlab=nam, cex.points=cex.means) else stripChart(obj$data[[i]], xlab=nam) if(all(prtest != 'none')) { fts <- formatTestStats(test[[varNames[i]]], prtest=prtest, plotmath=TRUE, pdig=pdig, eps=eps) title(fts, line=.5) } } } invisible(npages) } #This version of the stardard dotchart function allows a vector of values #to be specified (typically cell sizes) that are written to the right #or horizontal (only) dot charts. New vectors and auxdata and auxgdata and #a label for auxdata, auxtitle. #Also added: sort. parameter, to allow suppression of rearrangements of data, #and added the parameter `add'. Reference lines are always drawn with lwd=1. #There's also a new parameter, groupfont, which specifies a font number for #group headings. #cex.labels is a cex to be used only for category labels. Default is cex. #Added reset.par - set to T to reset par() after making plot. You will #need to set reset.par to T for the last call in a sequence. dotchart2 <- function(data, labels, groups = NULL, gdata = NA, horizontal = TRUE, pch = 16, xlab = "", ylab="", xlim=NULL, auxdata, auxgdata=NULL, auxtitle, lty = 1, lines = TRUE, dotsize = .8, cex = par("cex"), cex.labels = cex, cex.group.labels = cex.labels*1.25, sort.=TRUE, add=FALSE, dotfont=par('font'), groupfont=2, reset.par=add, xaxis=TRUE, width.factor=1.1, lcolor='gray', leavepar=FALSE, axisat=NULL, axislabels=NULL, ...) { if(!add) { plot.new() ## needed for strwidth par(new=TRUE) } ieaux <- if(missing(auxdata)) FALSE else is.expression(auxdata) mtextsrt <- function(..., srt=0) mtext(..., las=1) ndata <- length(data) if(missing(labels)) { if(!is.null(names(data))) labels <- names(data) else labels <- paste("#", seq(along = ndata)) } else labels <- rep(as.character(labels), length = ndata) if(missing(groups)) { glabels <- NULL gdata <- NULL } else { if(!sort.) { ##assume data sorted in groups, but re-number groups ##to be as if groups given in order 1,2,3,... ug <- unique(as.character(groups)) groups <- factor(as.character(groups),levels=ug) } groups <- unclass(groups) glabels <- levels(groups) gdata <- rep(gdata, length = length(glabels)) ord <- order(groups, seq(along = groups)) groups <- groups[ord] data <- data[ord] labels <- labels[ord] if(!missing(auxdata)) auxdata <- auxdata[ord] } alldat <- c(data, gdata) if(!missing(auxdata)) { auxdata <- c(auxdata, auxgdata) if(!ieaux) auxdata <- format(auxdata) } alllab <- paste(c(labels, glabels),'') ## set up margins and user coordinates, draw box tcex <- par('cex') tmai <- par("mai") oldplt <- par("plt") if(reset.par && !leavepar) on.exit(par(mai = tmai, cex = tcex)) par(cex = cex) mxlab <- .1+max(strwidth(labels, units='inches',cex=cex.labels), if(length(glabels)) strwidth(glabels,units='inches',cex=cex.group.labels))* width.factor if(horizontal) { tmai2 <- tmai[3:4] if(!missing(auxdata)) tmai2[2] <- .2+width.factor* max(strwidth(if(ieaux) auxdata else format(auxdata), units='inches',cex=cex.labels)) if(!leavepar) par(mai = c(tmai[1], mxlab, tmai2)) if(!add) plot(alldat, seq(along = alldat), type = "n", ylab = '', axes = FALSE, xlab = '', xlim=xlim, ...) logax <- par("xaxt") == "l" } else { if(!leavepar) par(mai = c(mxlab, tmai[2:4])) if(!add) plot(seq(along = alldat), alldat, type = "n", xlab = "", axes = FALSE, ylab = '', ...) logax <- par("yaxt") == "l" } tusr <- par("usr") if(!add && logax) { if(horizontal) abline(v = 10^tusr[1:2], h = tusr[3:4]) else abline(v = tusr[1:2], h = 10^tusr[3:4]) } else if(!add) abline(v = tusr[1:2], h = tusr[3:4]) den <- ndata + 2 * length(glabels) + 1 if(horizontal) { if(!add && xaxis) mgp.axis(1, axistitle=xlab, at=axisat, labels=axislabels) delt <- ( - (tusr[4] - tusr[3]))/den ypos <- seq(tusr[4], by = delt, length = ndata) } else { if(!add) mgp.axis(2, axistitle=xlab, at=axisat, labels=axislabels) delt <- (tusr[2] - tusr[1])/den ypos <- seq(tusr[1], by = delt, length = ndata) } if(!missing(groups)) { ypos1 <- ypos + 2 * delt * (if(length(groups)>1) cumsum(c(1, diff(groups) > 0)) else 1) diff2 <- c(3 * delt, diff(ypos1)) ypos2 <- ypos1[abs(diff2 - 3 * delt) < abs(0.001 * delt)] - delt ypos <- c(ypos1, ypos2) - delt } ##put on labels and data ypos <- ypos + delt nongrp <- 1:ndata if(horizontal) { xmin <- par('usr')[1] if(!add && lines) abline(h = ypos[nongrp], lty = lty, lwd=1, col=lcolor) points(alldat, ypos, pch = pch, cex = dotsize * cex, font=dotfont, ...) if(!add && !missing(auxdata)) { faux <- if(ieaux) auxdata else format(auxdata) upedge <- par('usr')[4] outerText(faux, ypos[nongrp], cex=cex.labels) if(!missing(auxtitle)) outerText(auxtitle, upedge+strheight(auxtitle,cex=cex.labels)/2, cex=cex.labels) } if(!add) { labng <- alllab[nongrp] ## Bug in sending character strings to mtext or text containing ## [ or ] - they don't right-justify in S+ bracket <- substring(labng,1,1) == '[' | substring(labng,nchar(labng),nchar(labng)) == ']' yposng <- ypos[nongrp] s <- !bracket if(!is.na(any(s)) && any(s)) mtextsrt(paste(labng[s],''), 2, 0, at=yposng[s], srt=0, adj=1, cex=cex.labels) s <- bracket if(!is.na(any(s)) && any(s)) text(rep(par('usr')[1],sum(s)), yposng[s], labng[s], adj=1, cex=cex.labels, srt=0,xpd=NA) if(!missing(groups)) mtextsrt(paste(alllab[ - nongrp],''), 2, 0, at = ypos[ - nongrp], srt = 0, adj = 1, cex = cex.group.labels, font=groupfont) } } else { if(!add && lines) abline(v = ypos[nongrp], lty = lty, lwd=1, col=lcolor) points(ypos, alldat, pch = pch, cex = dotsize * cex, font=dotfont, ...) if(!add) mtextsrt(alllab[nongrp], 1, 0, at = ypos[nongrp], srt = 90, adj = 1, cex = cex.labels) if(!add && !missing(groups)) mtextsrt(alllab[ - nongrp], 1, 0, at = ypos[ - nongrp], srt = 90, adj = 1, cex = cex.group.labels, font=groupfont) } plt <- par("plt") if(horizontal) { frac <- (oldplt[2] - oldplt[1])/(oldplt[2] - plt[1]) umin <- tusr[2] - (tusr[2] - tusr[1]) * frac tusr <- c(umin, tusr[2:4]) } else { frac <- (oldplt[4] - oldplt[3])/(oldplt[4] - plt[3]) umin <- tusr[4] - (tusr[4] - tusr[3]) * frac tusr <- c(tusr[1:2], umin, tusr[4]) } invisible() } print.summary.formula.reverse <- function(x, digits, prn=any(n != N), pctdig=0, what=c('%', 'proportion'), npct=c('numerator','both','denominator','none'), exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE, sep="/", abbreviate.dimnames=FALSE, prefix.width=max(nchar(lab)), min.colwidth, formatArgs=NULL, round=NULL, prtest=c('P','stat','df','name'), prmsd=FALSE, long=FALSE, pdig=3, eps=0.001, ...) { npct <- match.arg(npct) vnames <- match.arg(vnames) what <- match.arg(what) if(is.logical(prtest) && !prtest) prtest <- 'none' stats <- x$stats nv <- length(stats) cstats <- lab <- character(0) nn <- integer(0) type <- x$type n <- x$n N <- x$N nams <- names(stats) labels <- x$labels Units <- x$units test <- x$testresults if(! length(test)) prtest <- 'none' nw <- if(lg <- length(x$group.freq)) lg else 1 gnames <- names(x$group.freq) if(!missing(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } cstats <- NULL for(i in 1:nv) { nn <- c(nn, n[i]) nam <- if(vnames == "names") nams[i] else labels[i] if(prUnits && nchar(Units[i])) nam <- paste0(nam,' [', gsub('*',' ', Units[i], fixed=TRUE),']') tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]] else NULL if(type[i] == 1 || type[i] == 3) { cs <- formatCats(stats[[i]], nam, tr, type[i], if(length(x$group.freq)) x$group.freq else x$n[i], what, npct, pctdig, exclude1, long, prtest, pdig=pdig, eps=eps) nn <- c(nn, rep(NA, nrow(cs) - 1)) } else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd, sep, formatArgs, round, prtest, pdig=pdig, eps=eps) cstats <- rbind(cstats, cs) } lab <- dimnames(cstats)[[1]] gl <- names(x$group.freq) gl <- if(length(gl)) paste0(gl," \n(N=",x$group.freq,")") else "" if(length(test) && ! all(prtest == 'none')) gl <- c(gl, if(length(prtest) == 1 && prtest != 'stat') if(prtest == 'P')'P-value' else prtest else ' Test\nStatistic') nc <- nchar(cstats) spaces <- substring(" ", 1, (max(nc)-nc+1)/2) # center strings dc <- dim(cstats) cstats <- paste0(spaces, cstats) dim(cstats) <- dc if(prn) { cnn <- format(nn) cnn[is.na(nn)] <- '' cstats <- cbind(cnn, cstats) gl <- c('N', gl) } cstats <- rbind(gl, cstats) dimnames(cstats) <- list(c('',lab), NULL) cat("\n\nDescriptive Statistics", if(length(x$group.label)) paste(" by",x$group.label) else paste0(" (N=",x$N,")"),"\n\n", sep="") if(missing(min.colwidth)) min.colwidth <- max(min(nchar(gl)),min(nc[nc>0])) print.char.matrix(cstats, col.names=FALSE, col.txt.align='left', ...) invisible(cstats) } ## Function to format subtable for categorical var, for method='reverse' formatCats <- function(tab, nam, tr, type, group.freq, what=c('%', 'proportion'), npct, pctdig, exclude1, long, prtest, lang='plain', testUsed=character(0), npct.size='scriptsize', pdig=3, eps=.001, footnoteTest=TRUE, dotchart=FALSE, mspecs=markupSpecs) { what <- match.arg(what) gnames <- names(group.freq) nr <- nrow(tab) specs <- mspecs[[lang]] spc <- specs$space sspc <- if(lang == 'plain') '' else specs$sspace lspc <- specs$lspace bold <- specs$bold frac <- specs$frac if(lang != 'latex' && ! is.function(npct.size)) npct.size <- function(x) x else if(! is.function(npct.size)) { npctsize <- npct.size npct.size <- function(x) paste0('{\\', npctsize, ' ', x, '}') } ## If there was a missing column of tab because e.g. the variable was ## always NA for one (or more) of the groups, add columns of NAs if(ncol(tab) < length(group.freq)) { tabfull <- matrix(NA, nrow=nr, ncol=length(group.freq), dimnames=list(dimnames(tab)[[1]], gnames)) tabfull[,dimnames(tab)[[2]]] <- tab tab <- tabfull } denom <- if(type == 1) apply(tab, 2, sum) else group.freq pct <- if(ncol(tab) > 1) sweep(tab, 2, denom, FUN='/') else tab / denom pct <- pct * (if(what == '%') 100 else 1) cpct <- paste0(format(round(pct, pctdig)), if(lang == 'latex' && what == '%') '\\%' else if(what == '%') "%") denom.rep <- matrix(rep(format(denom), nr), nrow=nr, byrow=TRUE) if(npct != 'none') cpct <- paste(cpct, switch(npct, numerator = npct.size(paste0(' (', format(tab), ')')), denominator = npct.size(paste0(' of ', denom.rep)), both = npct.size(paste0(frac(format(tab), denom.rep))), slash = npct.size(paste0(spc, format(tab), sspc, '/', sspc, denom.rep)) ) ) if(lang == 'latex') cpct <- sedit(cpct, ' ', spc) dim(cpct) <- dim(pct) dimnames(cpct) <- dimnames(pct) cpct[is.na(pct)] <- "" lev <- dimnames(pct)[[1]] exc <- exclude1 && (nr == 2) && (type == 1) rl <- casefold(dimnames(pct)[[1]]) binary <- type == 1 && exc && (all(rl %in% c("0","1")) | all(rl %in% c("false","true")) | all(rl %in% c("absent","present"))) if(binary) long <- FALSE jstart <- if(exc) 2 else 1 nw <- if(lg <- length(group.freq)) lg else 1 lab <- if(binary) nam else if(long) c(nam, paste(spc, spc, lev[jstart : nr])) else c(paste(nam, ':', lev[jstart]), if(nr > jstart) paste(lspc, lev[(jstart + 1) : nr])) cs <- matrix('', nrow=long + (if(exc) nr - 1 else nr), ncol = nw + (length(tr) > 0), dimnames = list(lab, c(gnames, if(length(tr)) '' else NULL))) if(nw == 1) cs[(long + 1) : nrow(cs), 1] <- cpct[jstart : nr, ] else cs[(long + 1) : nrow(cs), 1 : nw] <- cpct[jstart : nrow(cpct), gnames] if(lang == 'latex' && dotchart && ncol(pct) <= 3) { locs <- c(3,-3,5,-5,7,-7,9,-9) points <- c("\\circle*{4}","\\circle{4}","\\drawline(0,2)(-1.414213562,-1)(1.414213562,-1)(0,2)") point.loc <- sapply(jstart:nrow(pct), function(i) { paste(ifelse(is.na(pct[i,]), "", paste0("\\put(", pct[i,], ",0){", points[1:ncol(pct)],"}")), collapse='') }) error.loc <- character(nrow(tab) - exc) k <- 0 for(i in jstart:ncol(tab)) { if(i > jstart) { p1prime <- (tab[,i] + 1)/(denom[i] + 2) d1 <- p1prime*(1-p1prime)/denom[i] for(j in jstart:(i-1)) { k <- k + 1 p2prime <- (tab[,j] + 1)/(denom[j] + 2) error <- 196 * sqrt(d1 + p2prime * (1 - p2prime)/denom[j]) bar <- ifelse(is.na(error), "", paste0("\\put(", (pct[,i] + pct[,j])/2 - error, ",", locs[k],"){\\line(1,0){",error*2,"}}")) error.loc <- paste0(error.loc, bar) } } } scale <- character(nrow(tab) - exc) scale[1] <- "\\multiput(0,2)(25,0){5}{\\color[gray]{0.5}\\line(0,-1){4}}\\put(-5,0){\\makebox(0,0){\\tiny 0}}\\put(108,0){\\makebox(0,0){\\tiny 1}}" cl <- paste0("\\setlength\\unitlength{1in/100}\\begin{picture}(100,10)(0,-5)", scale,"\\put(0,0){\\color[gray]{0.5}\\line(1,0){100}}", point.loc, error.loc, "\\end{picture}") cs[(long + 1) : nrow(cs), ncol(cs)] <- cl } if(length(tr)) { ct <- formatTestStats(tr, type == 3, if(type == 1) 1 else 1 : nr, prtest, lang=lang, testUsed=testUsed, pdig=pdig, eps=eps, footnoteTest=footnoteTest, mspecs=mspecs) if(length(ct) == 1) cs[1, ncol(cs)] <- ct else cs[(long + 1) : nrow(cs), ncol(cs)] <- ct } cs } ## Function to format subtable for continuous var, for method='reverse' formatCons <- function(stats, nam, tr, group.freq, prmsd, sep='/', formatArgs=NULL, round=NULL, prtest, lang='plain', testUsed=character(0), middle.bold=FALSE, outer.size=NULL, msdsize=NULL, brmsd=FALSE, pdig=3, eps=.001, footnoteTest=TRUE, prob=c(0.25, 0.5, 0.75), prN=FALSE, mspecs=markupSpecs) { specs <- mspecs[[lang]] spc <- specs$space bold <- if(middle.bold) specs$bold else function(x) x lspc <- specs$lspace sup <- specs$sup br <- specs$br plminus <- specs$plminus math <- specs$math if(lang != 'latex' || ! length(msdsize)) msdsize <- function(x) x if(! is.function(msdsize)) { Msdsize <- msdsize msdsize <- function(x) paste0('{\\', Msdsize, ' ', x, '}') } if(lang == 'plain') outer.size <- function(x) x if(! is.function(outer.size)) { Outer.size <- outer.size outer.size <- function(x) paste0('{\\', Outer.size, ' ', x, '}') } nw <- if(lg <- length(group.freq)) lg else 1 ns <- dimnames(stats)[[2]] ns <- ifelse(ns %in% c('Mean','SD','N'), '-1', ns) ns <- as.numeric(ns) l <- 1:length(ns) if(length(prob) == 3) { qs <- numeric(3) for(i in seq_along(qs)) { qs[i] <- l[abs(ns - prob[i]) < .001] } } else { q1 <- l[abs(ns - .25) < .001] med <- l[abs(ns - .5 ) < .001] q3 <- l[abs(ns - .75) < .001] qs <- c(q1, med, q3) } qu <- stats[,qs,drop=FALSE] if(prmsd) qu <- cbind(qu, stats[, c('Mean', 'SD'), drop=FALSE]) if(length(round) && round == 'auto') { r <- max(abs(stats[, colnames(stats) %nin% c('N', 'SD')]), na.rm=TRUE) round <- if(r == 0) 2 else max(0, min(5, 3 - round(log10(r)))) } if(length(round)) qu <- round(qu, round) ww <- c(list(qu), formatArgs) cqu <- do.call('format', ww) if(prN) cqu <- cbind(cqu,stats[,'N',drop=FALSE]) cqu[is.na(qu)] <- '' if(lang != 'plain') { st <- character(nrow(cqu)) names(st) <- dimnames(qu)[[1]] for(j in 1:nrow(cqu)) { st[j] <- paste0(outer.size(cqu[j, 1]), ' ', bold( cqu[j, 2]), ' ', outer.size(cqu[j, 3])) if(prmsd) { z <- if(brmsd) paste0(br, msdsize(paste0(cqu[j, 4], spc, plminus, spc, cqu[j, 5]))) else paste0(spc, spc, msdsize(paste0('(', cqu[j, 4], spc, plminus, cqu[j, 5], ')'))) st[j] <- paste0(st[j], z) } if(prN) st[j] <- paste0(st[j], outer.size(paste0(spc, math(paste0('N=', cqu[j, ncol(cqu)]))))) } } else { if(prmsd) { st <- apply(cqu, 1, function(x,sep) paste(x[1], sep, x[2], sep,x[3], ' ', x[4], '+/-', x[5], sep=''), sep=sep) } else { st <- apply(cqu[,seq(3),drop=FALSE], 1, paste, collapse=sep) } if(prN) { st <- setNames(sprintf("%s N=%s", st, cqu[, ncol(cqu), drop=FALSE]), names(st)) } } ### if(any(is.na(qu))) st <- "" # Why was this here? if(nw == 1) yj <- st else { yj <- rep('', nw) names(yj) <- names(group.freq) yj[names(st)] <- st } if(length(tr)) { ct <- formatTestStats(tr, prtest=prtest, lang=lang, testUsed=testUsed, pdig=pdig, eps=eps, footnoteTest=footnoteTest, mspecs=mspecs) yj <- c(yj, ct) } matrix(yj, nrow=1, dimnames=list(nam,names(yj))) } formatTestStats <- function(tr, multchoice=FALSE, i=if(multchoice) NA else 1, prtest, lang='plain', testUsed=character(0), pdig=3, eps=.001, plotmath=FALSE, footnoteTest=TRUE, mspecs=markupSpecs) { ## tr=an element of testresults (created by summary.formula method='reverse') ## or summaryM if(any(i > 1) && ! multchoice) stop('logic error') ## was i > 1; length mismatch specs <- mspecs[[lang]] spc <- specs$space sup <- specs$sup math <- specs$math pval <- tr$P[i] teststat <- tr$stat[i] testname <- tr$testname if(any(is.na(pval)) || any(is.na(teststat))) { res <- rep('', length(pval)) if(lang == 'latex' && length(testUsed)) res <- if(footnoteTest) rep(paste0(sup(match(testname, testUsed))), length(pval)) else rep('', length(pval)) return(res) } ## Note: multchoice tests always have only one type of d.f. deg <- if(multchoice) tr$df[i] else tr$df dof <- if(multchoice) as.character(deg) else paste(deg, collapse=',') namefun <- specs[[tr$namefun]] ## function for typesetting stat name statmarkup <- if(lang == 'latex') tr$latexstat else if(plotmath) tr$plotmathstat else tr$statname if(length(prtest) > 1 && 'stat' %in% prtest && (lang != 'plain' || plotmath)) { if(plotmath) { ## replace "df" inside statmarkup with actual d.f. if(length(grep('df', statmarkup))) statmarkup <- sedit(statmarkup, 'df', if(lang == 'latex' || length(deg) == 1) dof else paste0('list(', dof, ')')) } else statmarkup <- namefun(deg) } pval <- format.pval(pval, digits=pdig, eps=eps) plt <- substring(pval,1,1) == '<' if(any(plt) && lang == 'latex') pval <- sub('<', '\\\\textless ', pval) if(lang != 'plain') { if(length(prtest) == 1) paste0( switch(prtest, P = pval, stat = format(round(teststat, 2)), df = dof, name = statmarkup), if(footnoteTest && length(testUsed)) paste0(sup(match(testname, testUsed)))) else paste0( if('stat' %in% prtest) paste0(statmarkup, '=', format(round(teststat, 2))), if(all(c('stat', 'P') %in% prtest)) (if(lang == 'html') ', ' else paste0(',', spc)), if('P' %in% prtest) paste0('P',ifelse(plt,'','='), pval), if(footnoteTest && length(testUsed)) paste0(sup(match(testname, testUsed)))) } else if(plotmath) { if(length(prtest) == 1) parse(text=switch(prtest, P = ifelse(plt, paste0('~', 'P', pval), paste0('~', 'P==', pval)), stat = format(round(teststat, 2)), dof = format(dof), name = statmarkup)) else parse(text=paste(if('stat' %in% prtest) paste0('~list(',statmarkup,'==', format(round(teststat,2))), if(all(c('stat','P') %in% prtest)) ', ', if('P' %in% prtest)paste0(ifelse(plt,'~P','~P=='), pval,')'))) } else { if(length(prtest) == 1) switch(prtest, P = pval, stat = format(round(teststat, 2)), df = dof, name = statmarkup) else paste(if('stat' %in% prtest) paste0(statmarkup, '=', format(round(teststat, 2))), if('df' %in% prtest) paste0('d.f.=', dof), if('P' %in% prtest) paste0('P', ifelse(plt,'','='), pval)) } } latex.summary.formula.reverse <- function(object, title=first.word(deparse(substitute(object))), digits, prn = any(n != N), pctdig=0, what=c('%', 'proportion'), npct=c('numerator','both','denominator','slash','none'), npct.size='scriptsize', Nsize='scriptsize', exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE, middle.bold=FALSE, outer.size="scriptsize", caption, rowlabel="", insert.bottom=TRUE, dcolumn=FALSE, formatArgs=NULL, round=NULL, prtest=c('P','stat','df','name'), prmsd=FALSE, msdsize=NULL, long=dotchart, pdig=3, eps=.001, auxCol=NULL, dotchart=FALSE, ...) { x <- object npct <- match.arg(npct) vnames <- match.arg(vnames) what <- match.arg(what) if(is.logical(prtest) && ! prtest) prtest <- 'none' stats <- x$stats nv <- length(stats) cstats <- lab <- character(0) nn <- integer(0) type <- x$type n <- x$n N <- x$N nams <- names(stats) labels <- x$labels Units <- x$units nw <- if(lg <- length(x$group.freq)) lg else 1 gnames <- names(x$group.freq) test <- x$testresults if(! length(test)) prtest <- 'none' mspecs <- markupSpecs gt1.test <- if(all(prtest == 'none')) FALSE else length(unique(sapply(test, function(a)a$testname))) > 1 if(!missing(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } if(missing(caption)) caption <- paste0("Descriptive Statistics", if(length(x$group.label)) paste(" by",x$group.label) else paste0(" $(N=",x$N,")$")) bld <- if(middle.bold) '\\bf ' else '' cstats <- NULL testUsed <- auxc <- character(0) for(i in 1:nv) { if(length(auxCol)) auxc <- c(auxc, auxCol[[1]][i]) nn <- c(nn, n[i]) nam <- if(vnames == "names") nams[i] else labels[i] if(prUnits && nchar(Units[i]) > 0) nam <- paste0(nam, '~\\hfill\\tiny{', gsub('*',' ', Units[i], fixed=TRUE),'}') tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]] else NULL if(length(test) && all(prtest != 'none')) testUsed <- unique(c(testUsed, tr$testname)) if(type[i] %in% c(1, 3)) { cs <- formatCats(stats[[i]], nam, tr, type[i], if(length(x$group.freq)) x$group.freq else x$n[i], what, npct, pctdig, exclude1, long, prtest, lang='latex', testUsed=testUsed, npct.size=npct.size, pdig=pdig, eps=eps, footnoteTest=gt1.test, dotchart=dotchart, mspecs=mspecs) nn <- c(nn, rep(NA, nrow(cs)-1)) } else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd, prtest=prtest, formatArgs=formatArgs, round=round, lang='latex', testUsed=testUsed, middle.bold=middle.bold, outer.size=outer.size, msdsize=msdsize, pdig=pdig, eps=eps, footnoteTest=gt1.test, mspecs=mspecs) cstats <- rbind(cstats, cs) if(length(auxc) && nrow(cstats) > 1) auxc <- c(auxc, rep(NA, nrow(cs)-1)) } lab <- dimnames(cstats)[[1]] gl <- names(x$group.freq) if(!length(gl)) gl <- " " lab <- latexTranslate(lab, c(" "), c("~"), greek=TRUE) gl <- latexTranslate(gl, greek=TRUE) extracolheads <- if(any(gl != " ")) c(if(prn)'', paste0('$N=', x$group.freq, '$')) else NULL # 21jan03 if(length(test) && !all(prtest == 'none')) { gl <- c(gl, if(length(prtest) == 1 && prtest != 'stat') if(prtest == 'P') 'P-value' else prtest else 'Test Statistic') if(length(extracolheads)) extracolheads <- c(extracolheads,'') # 21jan03 } dimnames(cstats) <- list(NULL,gl) cstats <- data.frame(cstats, check.names=FALSE, stringsAsFactors=FALSE) col.just <- rep("c",length(gl)) if(dcolumn && all(prtest != 'none') && gl[length(gl)] %in% c('P-value','Test Statistic')) col.just[length(col.just)] <- '.' if(prn) { cstats <- data.frame(N=nn, cstats, check.names=FALSE, stringsAsFactors=FALSE) col.just <- c("r",col.just) } if(!insert.bottom) legend <- NULL else { legend <- character() if(any(type == 2)) { legend <- paste0("\\noindent {\\", outer.size, " $a$\\ }{", bld, "$b$\\ }{\\", outer.size, " $c$\\ } represent the lower quartile $a$, the median $b$, and the upper quartile $c$\\ for continuous variables.", if(prmsd) '~~$x\\pm s$ represents $\\bar{X}\\pm 1$ SD.' else '') } if(prn) { legend <- c(legend, '$N$', '~is the number of non--missing values.') } if(any(type == 1) && npct == 'numerator') { legend <- c(legend, 'Numbers after percents are frequencies.') } if(length(testUsed)) legend <-c(legend, if(length(testUsed) == 1)'\\noindent Test used:' else '\\indent Tests used:', if(length(testUsed) == 1) paste(testUsed,'test') else paste(paste0('\\textsuperscript{\\normalfont ', 1:length(testUsed),'}',testUsed, ' test'),collapse='; ')) } if(length(auxc)) { if(length(auxc) != nrow(cstats)) stop(paste0('length of auxCol (',length(auxCol[[1]]), ') is not equal to number or variables in table (', nv,').')) auxcc <- format(auxc) auxcc[is.na(auxc)] <- '' cstats <- cbind(auxcc, cstats) nax <- names(auxCol) heads <- get2rowHeads(nax) names(cstats)[1] <- heads[[1]] if(length(col.just)) col.just <- c('r', col.just) if(length(extracolheads)) extracolheads <- c(heads[2], extracolheads) } resp <- latex.default(cstats, title=title, caption=caption, rowlabel=rowlabel, col.just=col.just, numeric.dollar=FALSE, insert.bottom=legend, rowname=lab, dcolumn=dcolumn, extracolheads=extracolheads, extracolsize=Nsize, ...) if(dotchart) resp$style <- unique(c(resp$style, 'calc', 'epic', 'color')) resp } print.summary.formula.cross <- function(x, twoway=nvar == 2, prnmiss=any(stats$Missing>0), prn=TRUE, abbreviate.dimnames=FALSE, prefix.width=max(nchar(v)), min.colwidth, formatArgs=NULL, ...) { stats <- x a <- attributes(stats) cat("\n",a$heading,"\n\n") attr(stats,'class') <- NULL ylab <- attr(stats$S,"label") nvar <- length(a$Levels) vnames <- names(a$Levels) nam <- c(vnames, if(prn)"N", if(prnmiss) "Missing", "S") stats <- stats[nam] S <- stats$S ars <- length(dim(S)) # may always be TRUE attr(stats,"row.names") <- rep("",length(a$row.names)) if(twoway && nvar == 2) { V <- stats[[vnames[1]]] H <- stats[[vnames[2]]] v <- levels(V) h <- levels(H) z <- dimnames(stats$S)[[2]] if(!length(z)) z <- ylab z <- c(if(prn)"N", if(prnmiss)"Missing", z) # 5Oct00 header <- matrix(paste(z,collapse="\n"),1,1) print.char.matrix(header, col.names=FALSE) d <- c(length(v),length(h),length(z)) st <- array(NA, dim=d, dimnames=list(v,h,z)) cstats <- array("", dim=d, dimnames=list(v,h,z)) for(i in 1:length(V)) { j <- V == V[i,drop=FALSE] & H == H[i,drop=FALSE] st[V[i,drop=FALSE],H[i,drop=FALSE],] <- c(if(prn)stats$N[j], if(prnmiss)stats$Missing[j], if(ars)S[j,] else S[j]) } for(k in 1:d[3]) { ww <- c(list(st[,,k]), formatArgs) cstats[,,k] <- ifelse(is.na(st[,,k]),"",do.call('format',ww)) } dimn <- dimnames(cstats)[1:2] names(dimn) <- vnames cstats2 <- matrix("", nrow=d[1], ncol=d[2], dimnames=dimn) for(i in 1:d[1]) { for(j in 1:d[2]) { cstats2[i,j] <- paste(cstats[i,j,], collapse="\n") } } if(missing(min.colwidth)) min.colwidth <- max(min(nchar(dimnames(cstats2)[[2]])), min(nchar(cstats)[nchar(cstats)>0])) return(invisible(print.char.matrix(cstats2, col.names=TRUE, ...))) } ##print.data.frame messes up matrix names (here prefixing by S) if(ars) { stats$S <- NULL snam <- dimnames(S)[[2]] for(i in 1:ncol(S)) stats[[snam[i]]] <- S[,i] } else names(stats)[length(stats)] <- ylab stats <- as.data.frame(stats, stringsAsFactors=FALSE) invisible(print(stats, ...)) } latex.summary.formula.cross <- function(object, title=first.word(deparse(substitute(object))), twoway=nvar == 2, prnmiss=TRUE, prn=TRUE, caption=attr(object,"heading"), vnames=c('labels','names'), rowlabel="", ...) { stats <- object vnames <- match.arg(vnames) ul <- vnames == 'labels' stats <- unclass(stats) a <- attributes(stats) ylab <- attr(stats$S,"label") nvar <- length(a$Levels) nam <- c(names(a$Levels), if(prn)"N", if(prnmiss)"Missing", "S") ##Force lazy evaluation since stats about to change caption <- caption; title <- title stats <- stats[nam] S <- stats$S ars <- length(dim(S)) inn <- c('cbind','c(','ALL', 'NA') out <- c('', '(' ,'Total','Missing') caption <- latexTranslate(caption, inn, out, pb=TRUE, greek=TRUE) if(twoway) rowlab <- if(ul) latexTranslate(a$labels[1],inn,out,pb=TRUE,greek=TRUE) else names(stats)[1] rvar <- stats[[1]] cvar <- stats[[2]] lev1 <- levels(rvar) lev2 <- levels(cvar) if(!twoway) { for(i in 1:nvar) stats[[i]] <- latexTranslate(as.character(stats[[i]]),inn, out,pb=TRUE,greek=TRUE) if(ars) { stats$S <- NULL snam <- latexTranslate(dimnames(S)[[2]],inn,out,pb=TRUE,greek=TRUE) for(i in 1:ncol(S)) stats[[snam[i]]] <- S[,i] } else names(stats)[length(stats)] <- ylab stats <- structure(stats, row.names=rep("",length(stats$N)), class="data.frame") if(hasArg("col.just")) { return(latex(stats, title=title, caption=caption, rowlabel=rowlabel, ...)) } else return(latex(stats, title=title, caption=caption, rowlabel=rowlabel, col.just=c("l","l",rep("r",length(stats)-2)), ...)) } ##Two-way S <- cbind(N=if(prn)stats$N, Missing=if(prnmiss && any(stats$Missing)) stats$Missing, #5Oct00 stats$S) nr <- length(lev1) nc <- length(lev2) ns <- ncol(S) snam <- dimnames(S)[[2]] snam <- latexTranslate(snam, inn, out, pb=TRUE,greek=TRUE) dn <- if(ns > 1) rep(snam, nc) else latexTranslate(lev2,inn,out,pb=TRUE,greek=TRUE) # 5Oct00 st <- matrix(NA, nrow=nr, ncol=nc*ns, dimnames=list(NULL,dn)) for(i in 1:nr) { l <- 0 for(j in 1:nc) { w <- rvar == lev1[i] & cvar == lev2[j] if(any(w)) for(k in 1:ns) { l <- l+1 st[i,l] <- S[w,k] } } } latex(st, title=title, caption=caption, rowlabel=if(rowlabel == '') rowlab else rowlabel, n.rgroup=c(nrow(st)-1,1), n.cgroup=if(ns>1) rep(ns,nc), # ns>1 5Oct00 cgroup =if(ns>1) latexTranslate(lev2,inn,out,pb=TRUE,greek=TRUE), check.names=FALSE, rowname=latexTranslate(lev1,inn,out,pb=TRUE,greek=TRUE), ...) } ##stratify is a modification of Therneau's survival4 strata function ##Saves label attributute and defaults shortlabel to T stratify <- function(..., na.group = FALSE, shortlabel = TRUE) { words <- as.list((match.call())[-1]) if(!missing(na.group)) words$na.group <- NULL if(!missing(shortlabel)) words$shortlabel <- NULL allf <- list(...) if(length(allf) == 1 && is.list(ttt <- unclass(allf[[1]]))) { allf <- ttt words <- names(ttt) } xlab <- sapply(allf, function(x){lab <- valueLabel(x); if(is.null(lab)) NA else lab}) xname <- sapply(allf, function(x){name <- valueName(x); if(is.null(name)) NA else name}) xname <- ifelse(is.na(xname), words, xname) xlab <- paste(ifelse(is.na(xlab), xname, xlab), collapse=' and ') xname <- paste(xname, collapse = ' and ') nterms <- length(allf) what <- allf[[1]] if(is.null(levels(what))) what <- factor(what) levs <- unclass(what) - 1 wlab <- levels(what) if(na.group && any(is.na(what))) { levs[is.na(levs)] <- length(wlab) wlab <- c(wlab, "NA") } if(shortlabel) labs <- wlab else labs <- paste(words[1], wlab, sep = "=") for(i in (1:nterms)[-1]) { what <- allf[[i]] if(is.null(levels(what))) what <- factor(what) wlab <- levels(what) wlev <- unclass(what) - 1 if(na.group && any(is.na(wlev))) { wlev[is.na(wlev)] <- length(wlab) wlab <- c(wlab, "NA") } if(!shortlabel) wlab <- format(paste(words[i], wlab, sep = "=")) levs <- wlev + levs * (length(wlab)) labs <- paste(rep(labs, rep(length(wlab), length(labs))), rep(wlab, length(labs)), sep = ", ") } levs <- levs + 1 ulevs <- sort(unique(levs[!is.na(levs)])) levs <- match(levs, ulevs) labs <- labs[ulevs] levels(levs) <- labs class(levs) <- "factor" if(length(xlab)) valueLabel(levs) <- xlab #FEH 2Jun95 if(length(xname)) valueName(levs) <- xname levs } '[.summary.formula.response' <- function(x,i,j,drop=FALSE) { at <- attributes(x) at$dim <- at$dimnames <- NULL if(!missing(j)) { x <- unclass(x)[,j,drop=FALSE] at$ycolname <- at$ycolname[j] attributes(x) <- c(attributes(x), at) } if(missing(i)) return(x) if(is.character(i)) { vn <- at$vname[at$vname != ''] k <- match(i, vn, nomatch=0) if(any(k == 0)) stop(paste('requested variables not in object:', paste(i[k == 0],collapse=' '))) i <- k } j <- integer(0) nl <- at$nlevels is <- 1 for(m in 1:length(nl)) { ie <- is+nl[m]-1 if(any(i == m)) j <- c(j,is:ie) is <- ie+1 } at$vname <- at$vname[j] at$vlabel <- at$vlabel[j] at$nlevels <- at$nlevels[i] at$labels <- at$labels[i] x <- unclass(x)[j,,drop=FALSE] attributes(x) <- c(attributes(x), at) x } cumcategory <- function(y) { if(!is.factor(y)) y <- factor(y) lev <- levels(y) y <- unclass(y) Y <- matrix(NA, nrow=length(y), ncol=length(lev)-1, dimnames=list(NULL, paste0('>=', lev[-1]))) storage.mode(Y) <- 'integer' for(i in 2:length(lev)) Y[,i-1] <- 1*(y >= i) Y } summarize <- function(X, by, FUN, ..., stat.name=deparse(substitute(X)), type=c('variables','matrix'), subset=TRUE, keepcolnames=FALSE) { type <- match.arg(type) if(missing(stat.name) && length(stat.name) > 1) stat.name <- 'X' if(!is.list(by)) { nameby <- deparse(substitute(by)) bylabel <- label(by) by <- list(by[subset]) names(by) <- if(length(nameby) == 1) nameby else 'by' } else { bylabel <- sapply(by, label) if(!missing(subset)) by <- lapply(by, function(y, subset) y[subset], subset=subset) } nby <- length(by) bylabel <- ifelse(bylabel == '', names(by), bylabel) typical.computation <- FUN(X, ...) nc <- length(typical.computation) xlabel <- deparse(substitute(X)) if(length(xlabel) != 1) xlabel <- 'X' if(length(xlab <- attr(X,'label'))) xlabel <- xlab if(!missing(subset)) X <- if(is.matrix(X)) X[subset,,drop=FALSE] else X[subset] byc <- do.call('paste', c(by, sep='|')) ## split does not handle matrices ## msplit <- function(x, group) { ## if(is.matrix(x)) { ## group <- as.factor(group) ## l <- levels(group) ## res <- vector('list', length(l)) ## names(res) <- l ## for(j in l) res[[j]] <- x[group==j,,drop=FALSE] ## res ## } else split(x, group) ## } ## Following was streamlined 10oct02 using the new mApply ## if(nc==1) r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE) else { ## r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE) ## r <- matrix(unlist(r), nrow=nc, dimnames=dimnames(r)) ## 2Mar00: added unlist because sapply was creating an array of ## lists in S+2000 ## } r <- mApply(X, byc, FUN, ..., keepmatrix=nc > 1) rdimn <- dimnames(r)[[1]] ## someday can use unpaste defined in Misc.s ans <- strsplit(if(nc == 1) names(r) else rdimn, '\\|') ans <- sapply(ans, function(x)if(length(x)) x else '') ## strsplit returns list "transpose" of unpaste bb <- matrix(unlist(ans), nrow=nby) ans <- vector('list', nby) for(jj in 1:nby) ans[[jj]] <- bb[jj,] names(ans) <- names(by) if(nc>1 && (nc != ncol(r))) stop('program logic error') snames <- names(typical.computation) if(! length(snames)) snames <- paste0(stat.name, 1 : nc) if(! keepcolnames) { if(length(stat.name) == 1) snames[1] <- stat.name else if(length(stat.name)) snames <- stat.name } oldopt <- options('warn') options(warn = -1) on.exit(options(oldopt)) notna <- rep(TRUE, length(ans[[1]])) for(i in 1:length(by)) { byi <- by[[i]] ansi <- ans[[i]] if(is.factor(byi)) { if(!is.character(ansi)) stop('program logic error:ansi not character') ansi <- factor(ansi, levels(byi)) } else if(is.numeric(byi)) ansi <- as.numeric(ansi) names(ansi) <- NULL label(ansi) <- bylabel[i] ans[[i]] <- ansi notna <- notna & !is.na(ansi) } if(type == 'matrix' || nc == 1) { ans[[stat.name]] <- if(nc == 1) structure(r, names=NULL) else structure(r, dimnames=list(NULL, snames), names=NULL) label(ans[[stat.name]]) <- xlabel } else { snames <- make.names(snames) for(i in 1:length(snames)) { ans[[snames[i]]] <- structure(r[, i], names=NULL) label(ans[[snames[i]]]) <- xlabel } } notna <- notna & !is.na(if(nc == 1) r else (r %*% rep(1,nc))) ans <- structure(ans, class='data.frame', row.names=1 : length(ans[[1]])) ## removed [notna,] from end of above line; not sure why this was needed iorder <- do.call('order', structure(unclass(ans)[1 : nby], names=NULL)) ## order can bomb if data frame given (preserves names) ans[iorder,] } ##Following code is based on tapply instead if(FALSE) { r <- as.array(tapply(x, by, FUN, ...)) dn <- dimnames(r) wrn <- .Options$warn .Options$warn <- -1 for(i in 1:length(by)) { byi <- by[[i]] if(is.numeric(byi) && !is.factor(byi)) dn[[i]] <- as.numeric(dn[[i]]) } .Options$warn <- wrn names(dn) <- names(by) ans <- expand.grid(dn) typical.computation <- FUN(x, ...) nc <- length(typical.computation) snames <- names(typical.computation) if(length(snames)) snames <- paste(stat.name, snames) else snames <- if(nc == 1) stat.name else paste(stat.name, 1 : nc) for(i in 1 : length(r)) if(!length(r[[i]])) r[[i]] <- rep(NA, nc) ## unlist will skip positions where calculations not done (NULLs) S <- matrix(unlist(r), ncol=length(snames), dimnames=list(NULL, snames), byrow=TRUE) if(type == 'matrix') { ans$S <- S if(stat.name != 'S') names(ans)[length(ans)] <- stat.name } else ans <- cbind(ans, S) ans } smean.cl.normal <- function(x, mult=qt((1+conf.int)/2,n-1), conf.int=.95, na.rm=TRUE) { if(na.rm) x <- x[!is.na(x)] n <- length(x) if(n < 2) return(c(Mean=mean(x),Lower=NA,Upper=NA)) xbar <- sum(x)/n se <- sqrt(sum((x - xbar)^2) / n / (n-1)) c(Mean=xbar, Lower=xbar - mult*se, Upper=xbar + mult*se) } smean.sd <- function(x, na.rm=TRUE) { if(na.rm) x <- x[!is.na(x)] n <- length(x) if(n == 0) return(c(Mean=NA, SD=NA)) xbar <- sum(x)/n sd <- sqrt(sum((x - xbar)^2)/(n-1)) c(Mean=xbar, SD=sd) } smean.sdl <- function(x, mult=2, na.rm=TRUE) { if(na.rm) x <- x[!is.na(x)] n <- length(x) if(n == 0) return(c(Mean=NA, Lower=NA, Upper=NA)) xbar <- sum(x)/n sd <- sqrt(sum((x - xbar)^2)/(n-1)) c(Mean=xbar, Lower=xbar - mult * sd, Upper=xbar + mult * sd) } #S-Plus gives a parse error for R's .Internal() #Might try not using an else to see if S still parses smean.cl.boot <- function(x, conf.int=0.95, B=1000, na.rm=TRUE, reps=FALSE) { if(na.rm) x <- x[!is.na(x)] n <- length(x) xbar <- mean(x) if(n < 2L) return(c(Mean=xbar, Lower=NA, Upper=NA)) z <- unlist(lapply(seq_len(B), function(i, x, N) sum(x[sample.int(N, N, TRUE, NULL)]), x=x, N=n)) / n quant <- quantile(z, c((1 - conf.int)/2, (1 + conf.int)/2)) names(quant) <- NULL res <- c(Mean=xbar, Lower=quant[1L], Upper=quant[2L]) if(reps) attr(res, "reps") <- z res } smedian.hilow <- function(x, conf.int=.95, na.rm=TRUE) { quant <- quantile(x, probs=c(.5,(1-conf.int)/2,(1+conf.int)/2), na.rm=na.rm) names(quant) <- c('Median','Lower','Upper') quant } asNumericMatrix <- function(x) { a <- attributes(x) k <- length(a$names) at <- vector('list', k); names(at) <- a$names for(i in 1:k) { xi <- x[[i]] type <- storage.mode(xi) A <- attributes(xi) if(type == 'character') { xi <- factor(xi) A <- c(A, attributes(xi)) x[[i]] <- xi } A$dim <- A$names <- A$dimnames <- NULL A$.type. <- type at[[i]] <- A } resp <- matrix(unlist(x), ncol=k, dimnames=list(a$row.names, a$names)) attr(resp, 'origAttributes') <- at resp } matrix2dataFrame <- function(x, at=attr(x, 'origAttributes'), restoreAll=TRUE) { d <- dimnames(x) k <- length(d[[2]]) w <- vector('list',k) nam <- names(w) <- d[[2]] for(i in 1 : k) { a <- at[[nam[i]]] type <- a$.type. a$.type. <- NULL xi <- x[, i] names(xi) <- NULL lev <- a$levels if(restoreAll) { if(type == 'character') { xi <- as.character(factor(xi, 1 : length(lev), lev)) a$levels <- NULL if(length(a$class)) a$class <- setdiff(a$class, 'factor') } storage.mode(xi) <- type ## R won't let something be assigned class factor by brute ## force unless it's an integer object attributes(xi) <- a } else { if(length(l <- a$label)) label(xi) <- l if(length(u <- a$units)) units(xi) <- u if(length(lev)) { xi <- factor(xi, 1 : length(lev), lev) if(type == 'character') xi <- as.character(xi) } } w[[i]] <- xi } rn <- d[[1]] if(! length(rn)) rn <- as.character(seq(along=xi)) structure(w, class='data.frame', row.names=rn) } stripChart <- function(x, xlim, xlab='', pch=1, cex.labels=par('cex'), cex.points=.5, lcolor='gray', grid=FALSE) { groups <- names(x) if(missing(xlim)) xlim <- range(unlist(x),na.rm=TRUE) i <- integer(0) if(grid) { lines <- llines; points <- lpoints; segments <- lsegments } plot.new() mai <- omai <- par('mai') on.exit(par(mai=omai)) mxlab <- .3+max(strwidth(groups, units='inches', cex=cex.labels)) mai[2] <- mxlab par(mai=mai, new=TRUE) plot(xlim, c(.5,length(groups)+.5), xlim=xlim, xlab='', ylab='', axes=FALSE, type='n') box() mgp.axis(1, axistitle=xlab) mtext(paste(groups,''), 2, 0, at=length(groups):1, adj=1, las=1, cex=cex.labels) y <- 0 abline(h = 1:length(groups), lty = 1, lwd=1, col=lcolor) for(Y in length(groups):1) { y <- y + 1 X <- x[[y]] if(length(X)) points(X, rep(Y, length(X)), pch=pch) } } conTestkw <- function(group,x) { st <- spearman2(group,x) list(P = st['P'], stat = st['F'], df = st[c('df1','df2')], testname = if(st['df1'] == 1) 'Wilcoxon' else 'Kruskal-Wallis', statname = 'F', namefun = 'fstat', latexstat = 'F_{df}', plotmathstat = 'F[df]') } catTestchisq=function(tab) { st <- if(!is.matrix(tab) || nrow(tab) < 2 || ncol(tab) < 2) list(p.value=NA, statistic=NA, parameter=NA) else { rowcounts <- tab %*% rep(1, ncol(tab)) tab <- tab[rowcounts > 0,] if(!is.matrix(tab)) list(p.value=NA, statistic=NA, parameter=NA) else chisq.test(tab, correct=FALSE) } list(P = st$p.value, stat = st$statistic, df = st$parameter, testname = 'Pearson', statname = 'Chi-square', namefun = 'chisq', latexstat = '\\chi^{2}_{df}', plotmathstat = 'chi[df]^2') } ordTestpo=function(group, x) { if (!requireNamespace("rms", quietly = TRUE)) stop("This function requires the 'rms' package.") f <- rms::lrm(x ~ group)$stats list(P = f['P'], stat = f['Model L.R.'], df = f['d.f.'], testname = 'Proportional odds likelihood ratio', statname = 'Chi-square', namefun = 'chisq', latexstat = '\\chi^{2}_{df}', plotmathstat = 'chi[df]^2') } �������������������������Hmisc/R/ciapower.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000005535�12243661443�013530� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## tref time at which mortalities estimated ## n1 total sample size, stratum 1 ## n2 total sample size, stratum 2 ## m1c tref-year mortality, stratum 1 control ## m2c " " 2 " ## r1 % reduction in m1c by intervention, stratum 1 ## r2 % reduction in m2c by intervention, stratum 2 ## accrual duration of accrual period ## tmin minimum follow-up time ## alpha type I error ## pr set to T to print intermediate results ciapower <- function(tref, n1, n2, m1c, m2c, r1, r2, accrual, tmin, alpha=.05, pr=TRUE) { ## Find mortality in intervention groups if(m1c>1 | m2c>1) stop("m1c and m2c must be fractions") m1i <- (1-r1/100)*m1c m2i <- (1-r2/100)*m2c if(pr) { cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n") cat("\nSample size Stratum 1:",n1," Stratum 2:",n2,"\n") cat("\nAlpha=",alpha,"\n") d <- list(c("Stratum 1","Stratum 2"), c("Control","Intervention")) m <- cbind(c(m1c,m2c),c(m1i,m2i)) dimnames(m) <- d cat("\n",tref,"-year Mortalities\n",sep=""); print(m) } ## Find exponential hazards for all groups lam1c <- -logb(1-m1c)/tref lam2c <- -logb(1-m2c)/tref lam1i <- -logb(1-m1i)/tref lam2i <- -logb(1-m2i)/tref if(pr) { lam <- cbind(c(lam1c,lam2c),c(lam1i,lam2i)) dimnames(lam) <- d cat("\nHazard Rates\n"); print(lam) } ## Find probability that a subject will have her event observed during ## the study, for all groups tmax <- tmin+accrual p1c <- 1-1/accrual/lam1c*(exp(-tmin*lam1c)-exp(-tmax*lam1c)) p2c <- 1-1/accrual/lam2c*(exp(-tmin*lam2c)-exp(-tmax*lam2c)) p1i <- 1-1/accrual/lam1i*(exp(-tmin*lam1i)-exp(-tmax*lam1i)) p2i <- 1-1/accrual/lam2i*(exp(-tmin*lam2i)-exp(-tmax*lam2i)) if(pr) { p <- cbind(c(p1c,p2c), c(p1i,p2i)) dimnames(p) <- d cat("\nProbabilities of an Event During Study\n") print(p) } ##Find expected number of events, all groups m1c <- p1c*n1/2 m2c <- p2c*n2/2 m1i <- p1i*n1/2 m2i <- p2i*n2/2 if(pr) { m <- cbind(c(m1c,m2c), c(m1i,m2i)) dimnames(m) <- d cat("\nExpected Number of Events\n") print(round(m,1)) } ## Find expected value of observed log hazard ratio delta <- logb((lam1i/lam1c)/(lam2i/lam2c)) if(pr) cat("\nRatio of hazard ratios:",format(exp(delta)),"\n") ## Find its variance v <- 1/m1c + 1/m2c + 1/m1i + 1/m2i sd <- sqrt(v) if(pr) cat("Standard deviation of log ratio of ratios:",format(sd),"\n") z <- -qnorm(alpha/2) ## if(pr) cat("\nCritical value:",format(z),"\n") c(Power = 1 - ( pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd) ) ) } �������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/score.binary.s������������������������������������������������������������������������������0000644�0001762�0000144�00000002114�12250357227�014303� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������score.binary <- function(..., fun=max, points=1:p, na.rm=funtext=='max', retfactor=TRUE) { x <- list(...) p <- length(x) nam <- (as.character(sys.call())[-1])[1:p] x <- matrix(unlist(x), ncol=p) if(!missing(points)) { if(length(points)==1) points <- rep(points, p) if(length(points)!=p) stop('wrong length for points') } x <- x * rep(points, rep.int(nrow(x),p)) funtext <- as.character(substitute(fun)) if(funtext=='max' && !missing(points) && retfactor) warning('points do not matter for fun=max with retfactor=T\nas long as they are in ascending order') if(!missing(retfactor) && retfactor && funtext!='max') stop('retfactor=T only applies to fun=max') xna <- apply(x, 1, function(x) any(is.na(x))) funargs <- as.list(args(fun)) funargs <- funargs[-length(funargs)] if(any(names(funargs) == "na.rm")) { x <- apply(x, 1, fun, na.rm=na.rm) } else { x <- apply(x, 1, fun) } if(!na.rm) x[x==0 & xna] <- NA if(retfactor && funtext=='max') factor(x, c(0,points), c("none",nam)) else x } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/plotCorrM.r���������������������������������������������������������������������������������0000644�0001762�0000144�00000007134�14252346124�013632� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Plot Correlation Matrix and Correlation vs. Time Gap ##' ##' Constructs two `ggplot2` graphics. The first is a half matrix of rectangles where the height of the rectangle is proportional to the absolute value of the correlation coefficient, with positive and negative coefficients shown in different colors. The second graphic is a variogram-like graph of correlation coefficients on the y-axis and absolute time gap on the x-axis, with a `loess` smoother added. The times are obtained from the correlation matrix's row and column names if these are numeric. If any names are not numeric, the times are taken as the integers 1, 2, 3, ... The two graphics are `ggplotly`-ready if you use `plotly::ggplotly(..., tooltip='label')`. ##' @title plotCorrM ##' @param r correlation matrix ##' @param what specifies whether to return plots or the data frame used in making the plots ##' @param type specifies whether to use bottom-aligned rectangles (the default) or centered circles ##' @param xlab x-axis label for correlation matrix ##' @param ylab y-axis label for correlation matrix ##' @param maxsize maximum circle size if `type='circle'` ##' @param xangle angle for placing x-axis labels, defaulting to 0. Consider using `xangle=45` when labels are long. ##' @return a list containing two `ggplot2` objects if `what='plots'`, or a data frame if `what='data'` ##' @author Frank Harrell ##' @md ##' @export ##' @examples ##' set.seed(1) ##' r <- cor(matrix(rnorm(100), ncol=10)) ##' g <- plotCorrM(r) ##' g[[1]] # plot matrix ##' g[[2]] # plot correlation vs gap time ##' # ggplotlyr(g[[2]]) ##' # ggplotlyr uses ggplotly with tooltip='label' then removes ##' # txt: from hover text plotCorrM <- function(r, what=c('plots', 'data'), type=c('rectangle', 'circle'), xlab='', ylab='', maxsize=12, xangle=0) { what <- match.arg(what) type <- match.arg(type) p <- dim(r)[1] v <- dimnames(r)[[1]] if(! length(v)) v <- as.character(1 : p) vn <- as.numeric(v) if(any(is.na(vn))) vn <- 1 : p mn <- min(abs(r)) R <- as.vector(r) x <- as.vector(col(r)) y <- as.vector(row(r)) txt <- paste0(v[x], ' vs. ', v[y], '<br>r=', round(R, 3)) d <- subset(data.frame(x, y, delta=abs(vn[x] - vn[y]), r=R, txt), x < y) if(what == 'data') return(d) mx <- max(abs(d$r)) g1 <- switch(type, rectangle = ggplot(d, aes(x=x, y=y, color=ifelse(r > 0, '+', '-'), label=txt)) + geom_segment(aes(x=x, y=y, xend=x, yend=y + 0.9 * abs(r) / mx), size=3), circle = ggplot(d, aes(x=x, y=y, color=ifelse(r > 0, '+', '-'), label=txt, size=abs(r))) + geom_point() + scale_size(range = c(0, maxsize)) ) g1 <- g1 + scale_x_continuous(breaks = 1 : p, labels=v) + scale_y_continuous(breaks = 1 : p, labels=v) + guides(color = guide_legend(title=''), size = guide_legend(title='r')) + xlab(xlab) + ylab(ylab) + theme(axis.text.x=element_text(angle=xangle, hjust=if(xangle != 0) 1)) + labs(subtitle=paste0('max |r|:', round(mx, 3), ' min |r|:', round(mn, 3))) # Would not run loess if use text=txt # Need to run ggplotly with tooltip='label' g2 <- ggplot(d, aes(x=delta, y=r, label=txt)) + geom_point() + geom_smooth(method=loess) + xlab('Absolute Time Difference') + ylab('Correlation') list(g1, g2) } utils::globalVariables('delta') ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/bystats.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000016275�12250442003�013376� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bystats <- function(y, ..., fun, nmiss, subset) { ## Fri, 16 Sep 2005 - Shawn@ori.org removed left argument to ## interaction x <- interaction(..., drop=TRUE, sep=" ") l <- levels(x) if(any(is.na(x))) { l <- c(l, "NA") attr(x,"class") <- NULL x[is.na(x)] <- length(l) levels(x) <- l attr(x,'class') <- "factor" } y <- as.matrix(y) if(!missing(subset)) { x <- x[subset] y <- y[subset,,drop=FALSE] } if(missing(fun)) { fun <- function(y) apply(y, 2, mean) r <- range(y, na.rm=TRUE) uy <- unique(y[!is.na(y)]) #fixed 1Jun95, 16Mar96 funlab <- if(length(uy)==2 && r[1]==0 & r[2]==1) "Fraction" else "Mean" } else { funlab <- as.character(substitute(fun)) funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x) } lab <- as.character(sys.call())[-1] m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset)) lab <- lab[1:(length(lab)-m)] if(length(lab)>2) lab2 <- paste(lab[-1],collapse=", ") else lab2 <- lab[-1] heading <- if(funlab=="") paste(lab[1],"by",lab2) else paste(funlab,"of",lab[1],"by",lab2) nna <- !is.na(y %*% rep(1,ncol(y))) N <- sum(nna) stats <- fun(y[nna,,drop=FALSE]) nstats <- length(stats) name.stats <- if(length(dn <- dimnames(stats))) as.vector(outer(dn[[1]],dn[[2]], FUN=function(a,b)paste(b, a))) else names(stats) if(length(name.stats)) funlab <- name.stats if(nstats>1 && length(name.stats)==0) funlab <- rep(" ", nstats) s <- matrix(NA, nrow=length(l) + 1, ncol=2 + nstats, dimnames=list(c(l, "ALL"),c("N", "Missing", funlab))) j <- 0 for(i in l) { j <- j+1 w <- y[x==i,,drop=FALSE] nna <- !is.na(w %*% rep(1,ncol(w))) n <- sum(nna) s[j,] <- c(n, nrow(w)-n, if(n) fun(w[nna,,drop=FALSE]) else rep(NA,nstats)) } s[j+1,] <- c(N, nrow(y)-N, stats) if((!missing(nmiss) && !nmiss) || (missing(nmiss) && all(s[,"Missing"]==0))) s <- s[,-2] attr(s, "heading") <- heading attr(s, "byvarnames") <- lab2 attr(s,'class') <- "bystats" s } print.bystats <- function(x, ...) { cat("\n",attr(x,"heading"),"\n\n") attr(x,"heading") <- NULL attr(x,"byvarnames") <- NULL attr(x,'class') <- NULL invisible(print(x, ...)) } latex.bystats <- function(object, title=first.word(expr=substitute(object)), caption=attr(object,"heading"), rowlabel=attr(object,"byvarnames"), ...) { dm <- dimnames(object) ##inn <- c("%","<=","<",">=",">","\\[") ##out <- c("\\\\%","$\\\\leq$","$<$","$\\\\geq$","$>$","\\\\verb|[|") ##dm[[1]] <- translate(dm[[1]],inn,out) ##dm[[2]] <- translate(dm[[2]],inn,out) inn <- c("%","<=","<",">=",">","[") out <- c("\\%","$\\leq$","$<$","$\\geq$","$>$","\\verb|[|") dimnames(object) <- dm caption <- sedit(caption, "cbind", "") latex(unclass(object), title=title, caption=caption, rowlabel=rowlabel, n.rgroup=c(nrow(object)-1,1), ...) } bystats2 <- function(y, v, h, fun, nmiss, subset) { y <- as.matrix(y) if(!missing(subset)) { y <- y[subset,,drop=FALSE]; v <- v[subset]; h <- h[subset] } v <- factor(v, exclude=NULL) h <- factor(h, exclude=NULL) lv <- levels(v) lh <- levels(h) nv <- length(lv) nh <- length(lh) if(missing(fun)) { fun <- function(y) apply(y, 2, mean) r <- range(y, na.rm=TRUE) funlab <- if(length(r)==2 && r[1]==0 & r[2]==1) "Fraction" else "Mean" } else { funlab <- as.character(substitute(fun)) funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x) } lab <- as.character(sys.call())[-1] m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset)) lab <- lab[1:(length(lab)-m)] if(length(lab)>2) lab2 <- paste(lab[-1],collapse=", ") else lab2 <- lab[-1] heading <- if(funlab=="") paste(lab[1],"by",lab2) else paste(funlab,"of",lab[1],"by",lab2) nna <- !is.na(y %*% rep(1,ncol(y))) N <- sum(nna) stats <- fun(y[nna,,drop=FALSE]) nstats <- length(stats) name.stats <- if(length(dn <- dimnames(stats))) as.vector(outer(dn[[1]],dn[[2]],FUN=function(a,b)paste(b,a))) else names(stats) if(length(name.stats)) funlab <- name.stats if(nstats>1 && length(name.stats)==0) funlab <- rep(" ", nstats) s <- array(NA,dim=c(nv+1,nh+1,2+nstats), dimnames=list(c(lv,"ALL"), c(lh,"ALL"), c("N","Missing",funlab))) for(xv in c(lv,"ALL")) { for(xh in c(lh,"ALL")) { if(xv=="ALL" && xh=="ALL") st <- c(N, nrow(y)-N, stats) else { if(xv=="ALL") u <- h==xh else if(xh=="ALL") u <- v==xv else u <- h==xh & v==xv if(any(u)) { w <- y[u,,drop=FALSE] nna <- !is.na(w %*% rep(1,ncol(w))) n <- sum(nna) st <- c(n, nrow(w)-n, fun(w[nna,,drop=FALSE])) } else st <- c(0, n, rep(NA, length(stats))) } s[xv,xh,] <- st } } if((!missing(nmiss) && !nmiss) || (missing(nmiss) && all(s[,,"Missing"]==0))) s <- s[,,-2,drop=FALSE] attr(s, "heading") <- heading attr(s, "byvarnames") <- lab[-1] attr(s,'class') <- "bystats2" s } print.bystats2 <- function(x, abbreviate.dimnames=FALSE, prefix.width=max(nchar(dimnames(x)[[1]])),...) { cat("\n",attr(x,"heading"),"\n\n") if(!exists("print.char.matrix")) { # Vanilla S attr(x, "heading") <- attr(x, "byvarnames") <- attr(x, "class") <- NULL return(invisible(print(x))) } d <- dim(x) cstats <- array("", dim=d[1:3]) header <- matrix(paste(dimnames(x)[[3]],collapse="\n"),1,1) print.char.matrix(header) for(k in 1:d[3]) cstats[,,k] <- format(x[,,k]) dimn <- dimnames(x)[1:2] names(dimn) <- attr(x,"byvarnames") cstats2 <- matrix("", nrow=d[1], ncol=d[2], dimnames=dimn) for(i in 1:d[1]) { for(j in 1:d[2]) { cstats2[i,j] <- paste(cstats[i,j,],collapse="\n") } } invisible(print.char.matrix(cstats2,...)) } latex.bystats2 <- function(object, title=first.word(expr=substitute(object)), caption=attr(object, "heading"), rowlabel="", ...) { dm <- dimnames(object) inn <- c("%", "<=", "<", ">=", ">", "[") out <- c("\\%", "$\\leq$","$<$", "$\\geq$","$>$", "\\verb|[|") dm[[1]] <- sedit(dm[[1]], inn, out) dm[[2]] <- sedit(dm[[2]],inn,out) dm[[3]] <- sedit(dm[[3]],inn,out) dimnames(object) <- dm caption <- sedit(caption, "cbind", "") d <- dim(object) dn <- rep(dimnames(object)[[3]], d[2]) st <- matrix(NA, nrow=d[1], ncol=d[2]*d[3], dimnames=list(dimnames(object)[[1]], dn)) for(i in 1:d[1]) { l <- 0 for(j in 1:d[2]) { for(k in 1:d[3]) { l <- l+1 st[i,l] <- object[i,j,k] } } } latex(st, title=title, caption=caption, rowlabel=rowlabel, n.rgroup=c(nrow(st)-1,1), cgroup=dimnames(object)[[2]], n.cgroup=rep(d[3],d[2]),...) } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/rcspline.eval.s�����������������������������������������������������������������������������0000644�0001762�0000144�00000012666�12306624223�014462� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##rcspline.eval - function to create design matrix for restricted cubic ## spline function of Stone & Koo, given an input vector and optionally ## a vector of knots. If knots are not given, knots are set using ## default algorithm. If the number of knots is not given, 5 are used. ## Terms are normalized by (outer-inner knot)^2. ## Can optionally return antiderivative of spline functions if ## type="integral". ## norm=0 : no normalization of constructed variables ## norm=1 : divide by cube of difference in last 2 knots ## makes all variables unitless ## norm=2 : (default) divide by square of difference in outer knots ## makes all variables in original units of x ## ## Returns: ## x - design matrix for derived spline variables ## (includes original x in first column if inclx=T or ## type="integral") ## attribute knots - input or derived vector of knots ## If knots.only=T, returns instead the vector of estimated or given ## knots. ## If rpm is not null, replaces missing x with rpm before evaluating ## but after estimating knots. ## ## F. Harrell 13 Feb 90 ## Modified 28 Mar 90 - improved default knot computation ## 22 Aug 90 - put knots as attribute, return matrix ## 20 Sep 90 - added knots.only argument ## 16 Oct 90 - added rpm argument ## 11 Dec 91 - added type argument ## 27 Dec 91 - added norm argument ## 26 Jun 93 - added evasive action if <3 knots ## 1 Oct 13 - added logic to handle excessive ties at start or end x ## 8 Mar 14 - refined that logic, added logic for low # uniques rcspline.eval <- function(x, knots=NULL, nk=5, inclx=FALSE, knots.only=FALSE, type="ordinary", norm=2, rpm=NULL, pc=FALSE, fractied=0.05) { if(! length(knots)) { ## knot locations unspecified xx <- x[!is.na(x)] n <- length(xx) if(n < 6) stop('knots not specified, and < 6 non-missing observations') if(nk < 3) stop('nk must be >= 3') xu <- sort(unique(xx)) nxu <- length(xu) if((nxu - 2) <= nk) { warning(sprintf('%s knots requested with %s unique values of x. knots set to %s interior values.', nk, nxu, nxu - 2)) knots <- xu[- c(1, length(xu))] } else { outer <- if(nk > 3) .05 else .1 if(nk > 6) outer <- .025 knots <- numeric(nk) overrideFirst <- overrideLast <- FALSE nke <- nk firstknot <- lastknot <- numeric(0) if(fractied > 0 && fractied < 1) { f <- table(xx) / n if(max(f[- c(1, length(f))]) < fractied) { if(f[1] >= fractied) { firstknot <- min(xx[xx > min(xx)]) xx <- xx[xx > firstknot] nke <- nke - 1 overrideFirst <- TRUE } if(f[length(f)] >= fractied) { lastknot <- max(xx[xx < max(xx)]) xx <- xx[xx < lastknot] nke <- nke - 1 overrideLast <- TRUE } } } if(nke == 1) knots <- median(xx) else { if(nxu <= nke) knots <- xu else { p <- if(nke == 2) seq(.5, 1.0 - outer, length=nke) else seq(outer, 1.0 - outer, length=nke) knots <- quantile(xx, p) if(length(unique(knots)) < min(nke, 3)) { knots <- quantile(xx, seq(outer, 1.0 - outer, length=2 * nke)) if(length(firstknot) && length(unique(knots)) < 3) { midval <- if(length(firstknot) && length(lastknot)) (firstknot + lastknot) / 2. else median(xx) knots <- sort(c(firstknot, midval, if(length(lastknot)) lastknot else quantile(xx, 1.0 - outer) )) } if((nu <- length(unique(knots))) < 3) { cat("Fewer than 3 unique knots. Frequency table of variable:\n") print(table(x)) stop() } warning(paste("could not obtain", nke, "interior knots with default algorithm.\n", "Used alternate algorithm to obtain", nu, "knots")) } } if(length(xx) < 100) { xx <- sort(xx) if(! overrideFirst) knots[1] <- xx[5] if(! overrideLast) knots[nke] <- xx[length(xx) - 4] } } knots <- c(firstknot, knots, lastknot) } } ## end knot locations not specified knots <- sort(unique(knots)) nk <- length(knots) if(nk < 3) { cat("fewer than 3 unique knots. Frequency table of variable:\n") print(table(x)) stop() } if(knots.only) return(knots) if(length(rpm)) x[is.na(x)] <- rpm xx <- matrix(1.1, length(x), nk - 2) knot1 <- knots[1 ] knotnk <- knots[nk ] knotnk1 <- knots[nk - 1] kd <- if(norm == 0) 1 else if(norm == 1) knotnk - knotnk1 else (knotnk - knot1) ^ (2 / 3) power <- if(type=="integral") 4 else 3 for(j in 1 : (nk - 2)) { xx[,j] <- pmax((x - knots[j]) / kd, 0) ^ power + ((knotnk1 - knots[j]) * pmax((x - knotnk) / kd, 0) ^ power - (knotnk - knots[j]) * (pmax((x - knotnk1) / kd, 0) ^ power)) / (knotnk - knotnk1) } if(power == 4) xx <- cbind(x, x * x / 2, xx * kd / 4) else if(inclx) xx <- cbind(x, xx) if(pc) { p <- prcomp(xx, scale=TRUE, center=TRUE) pcparms <- p[c('center', 'scale', 'rotation')] xx <- p$x attr(xx, 'pcparms') <- pcparms } attr(xx, 'knots') <- knots xx } ��������������������������������������������������������������������������Hmisc/R/mChoice.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000016323�14333264673�013271� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mChoice <- function(..., label='', sort.levels=c('original','alphabetic'), add.none=FALSE, drop=TRUE, ignoreNA=TRUE) { sort.levels <- match.arg(sort.levels) dotlist <- list(...) if (drop) lev <- unique(as.character(unlist(dotlist))) else lev <- unique(unlist(lapply(dotlist, function(x) levels(as.factor(x))))) if(ignoreNA) lev <- setdiff(lev, NA) if(sort.levels=='alphabetic') lev <- sort(lev) lev <- setdiff(lev,'') vcall <- as.character(sys.call())[-1] dotlist <- lapply(dotlist, FUN=match, table=lev) #, nomatch=0) g <- function(...) { set <- c(...) set <- set[!is.na(set)] if(!length(set)) return('') paste(sort(unique(set)), collapse=';') } Y <- do.call(mapply, c(list(FUN=g, SIMPLIFY=TRUE, USE.NAMES=FALSE, MoreArgs=NULL), dotlist)) if(add.none && any(Y=='') && 'none' %nin% lev) { lev <- c(lev, 'none') Y[Y==''] <- as.character(length(lev)) } if(label == '') label <- attr(dotlist[[1]],'label') if(!length(label)) { label <- vcall[1] if(length(nn <- names(dotlist)[1])) label <- nn } structure(Y, label=label, levels=lev, class=c('mChoice','labelled')) } Math.mChoice <- function(x, ...) { stop(.Generic, " not meaningful for mChoice") } Summary.mChoice <- function(..., na.rm) { .NotYetImplemented() } Ops.mChoice <- function(e1, e2) { ok <- switch(.Generic, "=="=, "!="=TRUE, FALSE) if(!ok) { warning(.Generic, " not meaningful for mChoice") return(rep.int(NA, max(length(e1), if(!missing(e2))length(e2)))) } nas <- is.na(e1) | is.na(e2) if (nchar(.Method[1])) { l1 <- levels(e1) e1 <- l1[e1] } if (nchar(.Method[2])) { l2 <- levels(e2) e2 <- l2[e2] } if (all(nchar(.Method)) && (length(l1) != length(l2) || !all(sort.int(l2) == sort.int(l1)))) stop("level sets of factors are different") value <- NextMethod(.Generic) value[nas] <- NA value } format.mChoice <- function(x, minlength=NULL, sep=";", ...) { lev <- attr(x, 'levels') if(length(minlength)) lev <- abbreviate(lev, minlength) w <- strsplit(x, ';') sapply(w, function(x, lev, sep) paste(lev[as.numeric(x)], collapse=sep), lev=lev, sep=sep) } '[.mChoice' <- function(x, ..., drop=FALSE) { if(drop) stop('drop=TRUE not implemented') atr <- attributes(x) atr$names <- NULL x <- NextMethod('[') consolidate(attributes(x)) <- atr x } print.mChoice <- function(x, quote=FALSE, max.levels=NULL, width = getOption("width"), ...) { if (length(x) <= 0) cat("mChoice", "(0)\n", sep = "") else { xx <- x class(xx) <- NULL levels(xx) <- NULL xx[] <- as.character(x) print(xx, quote=quote, ...) } maxl <- if (is.null(max.levels)){ TRUE }else max.levels if (maxl) { n <- length(lev <- encodeString(levels(x), quote = ifelse(quote, "\"", ""))) colsep <- " " T0 <- "Levels: " if(is.logical(maxl)) maxl <- { width <- width - (nchar(T0, "w") + 3 + 1 + 3) lenl <- cumsum(nchar(lev, "w") + nchar(colsep, "w")) if(n <= 1 || lenl[n] <= width) n else max(1, which(lenl > width)[1] - 1) } drop <- n > maxl cat(if(drop) paste(format(n), ""), T0, paste(if(drop) {c(lev[1:max(1, maxl - 1)], "...", if (maxl > 1) lev[n]) }else lev, collapse = colsep), "\n", sep = "") } invisible(x) } as.character.mChoice <- function(x, ...) { lev <- levels(x) sapply(strsplit(x=x, split=';'), function(z) paste(lev[as.integer(z)], collapse=';')) } as.double.mChoice <- function(x, drop=FALSE, ...) { lev <- attr(x,'levels') X <- matrix(0, nrow=length(x), ncol=length(lev), dimnames=list(names(x), lev)) unused <- numeric(0) for(i in 1:length(lev)) { xi <- 1*inmChoice(x, i) if(sum(xi)==0) unused <- c(unused, i) X[,i] <- xi } if(drop && length(unused)) X <- X[, -unused, drop=FALSE] X } nmChoice <- function(object) { y <- gsub('[^;]', '', object) nchoices <- nchar(y) + 1 nchoices[object == ''] <- 0 nchoices } summary.mChoice <- function(object, ncombos=5, minlength=NULL, drop=TRUE, ...) { nunique <- length(unique(object)) y <- gsub('[^;]', '', object) nchoices <- nchar(y) + 1 nchoices[object == ''] <- 0 nchoices <- table(nchoices) X <- as.numeric(object, drop=drop) if(length(minlength)) dimnames(X)[[2]] <- abbreviate(dimnames(X)[[2]],minlength) crosstab <- crossprod(X) combos <- table(format(object, minlength)) i <- order(-combos) combos <- combos[i[1:min(ncombos,length(combos))]] structure(list(nunique=nunique, nchoices=nchoices, crosstab=crosstab, combos=combos, label=label(object)), class='summary.mChoice') } print.summary.mChoice <- function(x, prlabel=TRUE, ...) { if(prlabel) cat(x$label, ' ', x$nunique, ' unique combinations\n', sep='') cat('Frequencies of Numbers of Choices Per Observation\n\n') print(x$nchoices) crosstab <-format(x$crosstab) crosstab[lower.tri(crosstab)] <- '' cat('\nPairwise Frequencies (Diagonal Contains Marginal Frequencies)\n') print(crosstab, quote=FALSE) s <- if(length(x$combos)==x$nunique) 'Frequencies of All Combinations' else paste('Frequencies of Top', length(x$combos), 'Combinations') cat('\n', s, '\n') print(x$combos) invisible() } match.mChoice <- function(x, table, nomatch = NA, incomparables = FALSE) { if (!is.logical(incomparables) || incomparables) { .NotYetUsed("incomparables != FALSE") } lev <- attr(table, 'levels') if(is.factor(x) || is.character(x)) { x <- match(as.character(x), lev, nomatch=0) } return(.Call("do_mchoice_match", as.integer(x), table, as.integer(nomatch))) } # inmChoice <- function(x, values) { # match.mChoice(values, x, nomatch=0) > 0 # } inmChoice <- function(x, values, condition=c('any', 'all')) { condition <- match.arg(condition) lev <- attr(x, 'levels') if(is.character(values)) { v <- match(values, lev) if(any(is.na(v))) stop(paste('values not in levels:', paste(values[is.na(v)],collapse=';'))) values <- v } x <- paste(';', unclass(x), ';', sep='') values <- paste(';', values, ';', sep='') res <- rep(condition != 'any', length(x)) for(j in 1:length(values)) { i <- grep(values[j], x) if(length(i)) { if(condition == 'any') res[i] <- TRUE else res[-i] <- FALSE } else if(condition == 'all') res[] <- FALSE } res } inmChoicelike <- function(x, values, condition=c('any', 'all'), ignore.case=FALSE, fixed=FALSE) { condition <- match.arg(condition) if(! is.character(values)) stop('values must be a character vector') x <- as.character(x) res <- rep(condition != 'any', length(x)) for(j in 1 : length(values)) { i <- grep(values[j], x, ignore.case=ignore.case, fixed=fixed) if(length(i)) { if(condition == 'any') res[i] <- TRUE else res[-i] <- FALSE } else if(condition == 'all') res[] <- FALSE } res } is.mChoice <- function(x) inherits(x, 'mChoice') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/showPsfrag.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000001105�12243661443�014027� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������showPsfrag <- function(filename) { file <- paste(as.character(substitute(filename)),'ps',sep='.') out <- "TEMPltx" cat('\\documentclass{article}', '\\usepackage{graphics}', '\\usepackage[scanall]{psfrag}', '\\begin{document}', paste('\\includegraphics{',file,'}',sep=''), '\\end{document}',sep='\n', file=paste(out,'tex',sep='.')) sys(paste('latex "\\scrollmode\\input" ',out,';dvips -o ',out,'.ps ',out, '; gv ',out,'.ps &', sep='')) unlink(paste(out,c('tex','log','dvi','ps','aux','pfg'),sep='.')) invisible() } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/scat1d.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000100033�14247630450�013063� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Improvements due to Martin Maechler <maechler@stat.math.ethz.ch> scat1d <- function(x, side=3, frac=.02, jitfrac=.008, tfrac, eps=ifelse(preserve,0,.001), lwd=0.1, col=par('col'), y=NULL, curve=NULL, bottom.align=FALSE, preserve=FALSE, fill=1/3, limit=TRUE, nhistSpike=2000, nint=100, type=c('proportion','count','density'), grid=FALSE, ...) { type <- match.arg(type) if(length(x) >= nhistSpike) return(histSpike(x, side=side, type=type, frac=2.5 * frac, col=col, y=y, curve=curve, bottom.align=if(type == 'density') TRUE else bottom.align, add=TRUE, nint=nint, grid=grid, ...)) gfun <- ordGridFun(grid) if(side == 1 || side == 3 || length(y) || length(curve)) { l <- 1:2 ax <- 1 } else { l <- 3:4 ax <- 2 } pr <- parGrid(grid) usr <- pr$usr; pin <- pr$pin; uin <- pr$uin u <- usr[ l] u.opp <- usr[- l] w <- u[2] - u[1] ## Start JOA 12.8.97 : handle xy missings parallel if (length(y) > 1) { ## length=1 special case needed for datadensity if (length(x) != length(y)) stop("y must have same length as x (or length(y)=1)") selector <- ! (is.na(x) | is.na(y)) x <- unclass(x[selector]) y <- unclass(y[selector]) } else x <- unclass(x[! is.na(x)]) ## Stop JOA 12.8.97 if(length(curve)) y <- approx(curve, xout=x, rule=2)$y n <- length(x) if(missing(tfrac)) tfrac <- if(n < 125) 1 else max(.1, 125 / n) else if (tfrac < 0 || tfrac > 1) stop("must have 0 <= tfrac <= 1") ## Start JOA 19.8.97 if(jitfrac > 0 && anyDuplicated( if(eps > 0) round(x / w / eps) else x )) if (preserve) x <- jitter2(x, fill=fill, limit=limit, eps=w * eps) else ## Stop JOA 19.8.97 x <- x + runif(n, -w * jitfrac, w * jitfrac) h <- min(pin) * frac / uin[- ax] h2 <- h / 2 if(grid && length(y) && inherits(y, 'unit')) { h <- unit(frac, 'npc') h2 <- unit(frac/2, 'npc') } if(length(y)) { a <- y - h2 b <- y + h2 } else { a <- if(side < 3) u.opp[1] else u.opp[2] - h b <- if(side < 3) u.opp[1] + h else u.opp[2] } if(tfrac < 1) { l <- tfrac * (b - a) a <- a + runif(n) * (b - l - a) ##runif(n, a, b-l) if frac>0 b <- a + l } if(ax == 1 && bottom.align) { a <- a + h2 b <- b + h2 } if(ax == 1) gfun$segments(x, a, x, b, lwd=lwd, xpd=frac < 0, col=col) else gfun$segments(a, x, b, x, lwd=lwd, xpd=frac < 0, col=col) invisible() } jitter2 <- function(x,...) UseMethod("jitter2") jitter2.default <- function(x, fill=1/3, limit=TRUE, eps=0, presorted=FALSE, ...) { x2 <- x[!is.na(x)] if (!presorted){ o <- order(x2) x2 <- x2[o] } r <- if (eps > 0) rle(round(x2 / eps) * eps) else rle(x2) if ( length(r$length) < 2 || max(r$length) < 2 ) return(x) d <- abs(diff(r$values)) d <- pmin( c(d[1], d), c(d, d[length(d)]) ) who <- rep(r$lengths > 1, r$lengths) d <- d[r$lengths > 1] * fill / 2 if (is.logical(limit) && limit) limit <- min(d) if (limit) d <- pmin(d,limit) r$values <- r$values[r$lengths > 1] - d r$lengths <- r$lengths[r$lengths > 1] d <- d * 2 / (r$lengths - 1) k <- length(r$lengths) n <- sum(who) val <- rep(r$values, r$lengths) add <- (0 : (n - 1)) - rep(c(0, cumsum(r$lengths[-k])), r$lengths) add <- add[order(rep(1 : k, r$lengths), runif(n))] add <- add * rep(d, r$lengths) val <- val + add x2[who] <- val if (!presorted) x2[o]<-x2 x[!is.na(x)] <- x2 x } jitter2.data.frame <- function(x, ...) { as.data.frame(lapply(x, function(z,...) { if (is.numeric(z)) jitter2.default(z,...) else z }, ...)) } datadensity <- function(object, ...) { if(!length(class(object))) class(object) <- data.class(object) UseMethod('datadensity') } datadensity.data.frame <- function(object, group, which=c('all','continuous','categorical'), method.cat=c('bar','freq'), col.group=1:10, n.unique=10, show.na=TRUE, nint=1, naxes, q, bottom.align=nint > 1, cex.axis=sc(.5,.3), cex.var=sc(.8,.3), lmgp=NULL, tck=sc(-.009,-.002), ranges=NULL, labels=NULL, ...) { which <- match.arg(which) method.cat <- match.arg(method.cat) maxna <- 0 mgroup <- missing(group) # before R changes it z <- sapply(object, function(x, n.unique) { xp <- x[!is.na(x)] nu <- if(length(xp)) length(unique(xp)) else 0 if(nu < 2) c(0,0) else c(type=if(is.factor(x) || is.character(x) || nu < n.unique) 1 else 2, na=sum(is.na(x))) }, n.unique=n.unique) types <- c('nil','cat','cont')[z[1,]+1] numna <- z[2,] fnumna <- format(numna) maxna <- max(numna) w <- switch(which, all = types != 'nil', continuous = types == 'cont', categorical= types == 'cat') if(missing(naxes)) naxes <- sum(w) ## Function to scale values such that when naxes<=3 get hi, >=50 get ## lo, otherwise linearly interpolate between 3 and 50 sc <- function(hi,lo,naxes) approx(c(50,3),c(lo,hi),xout=naxes,rule=2)$y formals(sc) <- list(hi=NA,lo=NA,naxes=naxes) nams <- names(object) max.length.name <- max(nchar(nams)) if(!length(lmgp)) lmgp <- sc(0,0) oldpar <- oPar() # in Hmisc Misc.s mgp <- c(0,lmgp,0) mai <- oldpar$mai plot.new(); par(new=TRUE) ## enables strwidth mxlb <- .1 + max(strwidth(nams, units='inches', cex=cex.var)) mai[2] <- mxlb if(!show.na) maxna <- 0 max.digits.na <- if(maxna == 0) 0 else trunc(log10(maxna)) + 1 if(maxna > 0) mai[4] <- .1 + strwidth('Missing', units='inches', cex=cex.var) par(mgp=mgp, mai=mai,tck=tck) on.exit(setParNro(oldpar)) if(!mgroup) group <- as.factor(group) else { group <- factor(rep(1,length(object[[1]]))) ngroup <- 0 } ngroup <- length(levels(group)) col.group <- rep(col.group, length.out=ngroup) y <- 0 for(i in (1:length(nams))[w]) { if(y < 1) { plot(c(0,1),c(1,naxes),xlim=c(.02,.98),ylim=c(1,naxes), xlab='',ylab='',type='n',axes=FALSE) usr <- par('usr') y <- naxes + 1 if(maxna > 0) { outerText('Missing', y=naxes+strheight('Missing',units='user',cex=cex.var), cex=cex.var) } charheight <- strheight('X',units='user',cex=.6) ## par('cxy')[2] } y <- y - 1 x <- object[[i]] if(types[i] == 'cont' ) { ## continuous variable x <- unclass(x) ## handles dates isna <- is.na(x) nna <- sum(isna) N <- length(x) - nna r <- if(length(ranges) && length(ranges[[nams[i]]])) ranges[[nams[i]]] else range(x, na.rm=TRUE) p <- pretty(r, if(nint == 1)5 else nint) if(nint < 2) p <- c(p[1],p[length(p)]) ##bug in pretty for nint=1 xmin <- p[1] xmax <- p[length(p)] cex <- par(cex=cex.axis) # Bug in R: cex= ignored in # axis( ) axis(side=1, at=(p-xmin)/(xmax-xmin), labels=format(p), pos=y, cex=cex.axis) par(cex=cex) if(mgroup) scat1d((x-xmin)/(xmax-xmin), y=y, bottom.align=bottom.align, minf=.075, frac=sc(.02,.005), ...) else for(g in 1:ngroup) { j <- group == levels(group)[g] scat1d((x[j]-xmin)/(xmax-xmin), y=y, bottom.align=bottom.align, col=col.group[g], tfrac=if(N<125) 1 else max(.1, 125/N), minf=.075, frac=sc(.02,.005), ...) } if(!missing(q)) { quant <- quantile(x, probs=q, na.rm=nna>0) points((quant-xmin)/(xmax-xmin), rep(y-.5*charheight,length(q)), pch=17, cex=.6) } } else { ## character or categorical or discrete numeric if(is.character(x)) x <- as.factor(x) isna <- is.na(x) nna <- sum(isna) if(length(group) != length(x)) { if(is.data.frame(object)) stop('length of group must equal length of variables in data frame') group <- rep(1, length(x)) } tab <- table(group,x) lev <- dimnames(tab)[[2]] nl <- length(lev) if(is.numeric(x)) { xx <- as.numeric(lev) xx <- (xx - min(xx)) / (max(xx) - min(xx)) } else { if(sum(nchar(lev)) > 200) lev <- substring(lev, 1, max(1, round(200 / length(lev)))) xx <- (0 : (nl - 1)) / (nl - 1) } cex <- par(cex=cex.axis) axis(side=1, at=xx, labels=lev, pos=y, cex=cex.axis, tick=FALSE) par(cex=cex) lines(c(0,1), c(y,y)) maxfreq <- max(tab) for(g in if(ngroup == 0) 1 else 1 : ngroup) { tabg <- tab[g,] if(method.cat == 'bar') symbols(xx, y + .4 * tabg / maxfreq / 2, add=TRUE, rectangles=cbind(.02, .4 * tabg / maxfreq), inches=FALSE, col=col.group[g]) else text(xx, rep(y + .1, nl), format(tabg), cex=cex.axis * sqrt(tab / maxfreq), adj=.5) } } mtext(if(length(labels))labels[i] else nams[i], 2, 0, at = y, srt = 0, cex = cex.var, adj = 1, las=1) ## las=1 for R (also 3 lines down) if(show.na && nna > 0) outerText(fnumna[i], y, cex=cex.var) } invisible() } histSpike <- function(x, side=1, nint=100, bins=NULL, frac=.05, minf=NULL, mult.width=1, type=c('proportion','count','density'), xlim=range(x), ylim=c(0, max(f)), xlab=deparse(substitute(x)), ylab=switch(type, proportion='Proportion', count ='Frequency', density ='Density'), y=NULL, curve=NULL, add=FALSE, minimal=FALSE, bottom.align=type == 'density', col=par('col'), lwd=par('lwd'), grid=FALSE, ...) { type <- match.arg(type) if(! minimal) { if(! add && side != 1) stop('side must be 1 if add=FALSE') if(add && type == 'count') warning('type="count" is ignored if add=TRUE') } if(length(y) > 1) { if(length(y) != length(x)) stop('lengths of x and y must match') if(length(curve)) warning('curve ignored when y specified') i <- ! is.na(x + y) curve <- list(x=x[i], y=y[i]) } if(length(curve) && !missing(bottom.align) && bottom.align) warning('bottom.align=T specified with curve or y; ignoring bottom.align') gfun <- ordGridFun(grid) x <- x[!is.na(x)] x <- x[x >= xlim[1] & x <= xlim[2]] if(type != 'density') { if(is.character(nint) || length(x) <= 10) { f <- table(x) x <- as.numeric(names(f)) } else { ncut <- nint + 1 if(! length(bins)) bins <- seq(xlim[1], xlim[2], length = ncut) delta <- (bins[2] - bins[1]) / 2 f <- table(cut(x, c(bins[1] - delta, bins))) x <- bins j <- f > 0 x <- x[j] f <- f[j] } if(type == 'proportion') f <- f / sum(f) } else { nbar <- logb(length(x), base = 2) + 1 width <- diff(range(x)) / nbar * .75 * mult.width den <- density(x, width=width, n=200, from=xlim[1], to=xlim[2]) x <- den$x f <- den$y } if(! minimal && ! add) { if(grid) stop('add=FALSE not implemented for lattice') plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type='n') } if(side == 1 || side == 3) { l <- 1:2; ax <- 1 } else { l <- 3:4; ax <- 2 } f <- f / max(f) if(length(minf)) f <- pmax(f, minf) if(minimal) { plot.new() # sets (0,0) to (1,1) del <- diff(xlim) usr <- c(xlim[1] - 0.04 * del, xlim[2] + 0.04 * del, 0, 1) par(mar=c(3.5, 0.5, 0.5, 0.5), mgp=c(2, 0.5, 0), usr=usr) axis(1, line=0.25) title(xlab=xlab) segments(x, rep(0, length(x)), x, f, lwd=lwd, col=col) return(invisible(xlim)) } pr <- parGrid(grid) usr <- pr$usr; pin <- pr$pin; uin <- pr$uin u <- usr[l] u.opp <- usr[- l] h <- min(pin) * frac / uin[- ax] * f h2 <- h / 2 if(length(y) && inherits(y, 'unit')) { h <- unit(frac, 'npc') h2 <- unit(frac / 2, 'npc') } if(length(curve) || length(y)) { if(length(curve)) y <- approx(curve, xout=x, rule=2)$y a <- y - h2; b <- y + h2 } else { a <- if(side < 3) u.opp[1] else u.opp[2] - h b <- if(side < 3) u.opp[1] + h else u.opp[2] } if(ax == 1 && bottom.align && type != 'density') { a <- a + h2 b <- b + h2 } if(type == 'density') { lll <- gfun$lines ## Problem in S+ getting right value of lwd if(ax == 1) do.call('lll',list(x, if(side == 1)b else a, lwd=lwd, col=col)) else do.call('lll',list(if(side == 2)b else a, x, lwd=lwd, col=col)) } else { lll <- gfun$segments if(ax == 1) do.call('lll',list(x, a, x, b, lwd=lwd, xpd=frac < 0, col=col)) else do.call('lll',list(a, x, b, x, lwd=lwd, xpd=frac < 0, col=col)) } invisible(xlim) } if(FALSE) histSpikep <- function(p, x, y, z, group=NULL, color=NULL, hovertext=NULL, colors=NULL, bottom.align=TRUE, tracename='Proportion', ...) { d <- data.frame(x=rep(x, each=3), y=rep(y, each=3), z=rep(z, each=3)) origcolor <- color if(length(group)) d$group <- rep(group, each=3) if(length(hovertext)) d$hovertext <- rep(hovertext, each=3) if(length(color)) d$color <- rep(color, each=3) n <- nrow(d) j <- seq(1, n, by=3) if(length(hovertext)) d$hovertext[j] <- '' if(! bottom.align) d$y[j] <- d$y[j] - d$z[j] / 2 j <- seq(3, n, by=3) d$x[j] <- NA if(length(hovertext)) d$hovertext[j] <- '' j <- seq(2, n, by=3) d$y[j] <- d$y[j] + d$z[j] / ifelse(bottom.align, 1, 2) plotly::plot_ly(d, x=~ x, y=~ y, mode='lines', type='scatter', line=list(color=d$color, width=1.4), # ... text=~ hovertext, hoverinfo=if(length(hovertext)) 'text' else 'none') } histboxp <- function(p=plotly::plot_ly(height=height), x, group=NULL, xlab=NULL, gmd=TRUE, sd=FALSE, bins=100, wmax=190, mult=7, connect=TRUE, showlegend=TRUE) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") if(! length(xlab)) xlab <- label(x, html=TRUE, plot=TRUE, default=deparse(substitute(x))) if(! length(group)) group <- rep(1, length(x)) if(length(x) != length(group)) stop('x and group must be same length') distinct <- unique(x) distinct <- distinct[! is.na(distinct)] xmin <- min(distinct) xr <- x ## Still do slight rounding if < bins distinct values because ## values extremely close to each other won't show otherwise if((length(distinct) > bins) || min(diff(sort(distinct))) < range(distinct) / (5 * bins)) { pret <- pretty(x, if(length(distinct) > bins) bins else 5 * bins) dist <- pret[2] - pret[1] r <- range(pret) xr <- r[1] + dist * round((x - r[1]) / dist) } mu <- markupSpecs$html fmt <- function(x) htmlSN(x, digits=5) y <- 0 dh <- dm <- dq1 <- dq2 <- dq3 <- dgmd <- dsd <- levs <- NULL group <- as.factor(group) mis <- is.na(x) levs <- levels(group) ng <- length(levs) Qu <- matrix(NA, nrow=ng, ncol=5) j <- 0 for(g in levels(group)) { i <- group == g j <- j + 1 miss <- sum(mis[i]) if(miss > 0) i <- i & ! mis if(! any(i)) next u <- x[i] ur <- xr[i] tab <- as.data.frame(table(ur)) z <- as.numeric(as.character(tab$ur)) prop <- tab$Freq / length(ur) y <- y - 1 dh <- rbind(dh, data.frame(x=z, prop=prop, freq=tab$Freq, txt=paste0(fmt(z), '<br>', round(prop, 3), '<br>n=', tab$Freq), y=y)) dm <- rbind(dm, data.frame(Mean=mean(u), n=length(u), miss=miss, y=y)) if(gmd) { Gmd <- GiniMd(u) dgmd <- rbind(dgmd, data.frame(Gmd, x=xmin, txt=paste0('Gini mean difference:', fmt(Gmd)), y=y)) } if(sd) { Sd <- sd(u) dsd <- rbind(dsd, data.frame(sd=Sd, x=xmin, txt=paste0('SD:', fmt(Sd)), y=y)) } probs <- c(0.05, 0.25, 0.5, 0.75, 0.95) qu <- quantile(u, probs) Qu[j, ] <- qu nam <- paste0('Q', mu$sub(probs)) txt <- paste0(nam, ':', fmt(qu)) dq1 <- rbind(dq1, data.frame(Median=qu[3], txt=txt[3], y=y)) dq2 <- rbind(dq2, data.frame(quartiles=qu[c(2,4)], txt=txt[c(2,4)], y=y)) dq3 <- rbind(dq3, data.frame(outer=qu[c(1,5)], txt=txt[c(1,5)], y=y)) } height <- plotlyParm$heightDotchart(1.2 * ng) + 50 * (gmd & sd) if(length(.Options$plotlyauto) && .Options$plotlyauto) height <- NULL dh$prop <- 0.6 * dh$prop / max(dh$prop) p <- plotly::add_segments(p, data=dh, x = ~ x, y = ~ y, xend = ~ x, yend = ~ y + prop, text = ~ txt, hoverinfo = 'text', color = I('black'), name = 'Histogram', legendgroup = 'Histogram', showlegend=showlegend) dm$txt <- with(dm, paste0('Mean:', fmt(Mean), '<br>n=', n, '<br>', miss, ' missing')) a <- 0.05 b <- 0.4 k <- (a + b) / 2 w <- (b - a) / 2 p <- plotly::add_markers(p, data=dm, mode='markers', color=I('black'), x = ~ Mean, y = ~ y - k, text = ~ txt, hoverinfo = 'text', size=I(5), name='Mean', legendgroup='Mean', showlegend=showlegend) segs <- function(p, x, y, yend, text, data, color, name, width=2) { plotly::add_segments(p, data=data, x=x, y=y, xend=x, yend=yend, text=text, hoverinfo='text', name=name, legendgroup=name, showlegend=showlegend, color=color, line=list(width=width)) } p <- segs(p, x=~Median, y=~y-k-w, yend=~y-k+w, text=~txt, data=dq1, color=I('black'), name='Median', width=3) p <- segs(p, x=~quartiles, y=~y-k-w*.8, yend=~y-k+w*.8, text=~txt, data=dq2, color=I('blue'), name='Quartiles') onam <- '0.05, 0.95<br>Quantiles' p <- segs(p, x=~outer, y=~y-k-w*.64, yend=~y-k+w*.64, text=~txt, data=dq3, color=I('red'), name=onam) if(connect) { ys <- -(1 : ng) - k qs <- function(p, x, xend, color, lg) plotly::add_segments(p, x=x, xend=xend, y=~ys, yend=~ys, hoverinfo='none', showlegend=FALSE, alpha=0.3, color=color, legendgroup=lg, name='ignored') p <- qs(p, x= ~ Qu[,1], xend=~ Qu[,2], color=I('red'), lg=onam) p <- qs(p, x= ~ Qu[,2], xend=~ Qu[,4], color=I('blue'), lg='Quartiles') p <- qs(p, x= ~ Qu[,4], xend=~ Qu[,5], color=I('red'), lg=onam) } gnam <- paste0('Gini ', mu$overbar(paste0('|', htmlGreek('Delta'), '|'))) if(gmd) p <- plotly::add_segments(p, data=dgmd, x = ~ x, y = ~ y - 0.19, xend = ~ x + Gmd, yend = ~ y - 0.19, text = ~ txt, hoverinfo = 'text', color = I('light gray'), name = gnam, legendgroup=gnam, visible='legendonly', showlegend=showlegend) if(sd) p <- plotly::add_segments(p, data=dsd, x = ~ x, y = ~ y - 0.23, xend = ~ x + sd, yend = ~ y - 0.23, text = ~ txt, hoverinfo = 'text', color = I('light blue'), name = 'SD', legendgroup='SD', visible='legendonly', showlegend=showlegend) p <- plotly::layout(p, margin = list(l=plotlyParm$lrmargin(levs, wmax=wmax, mult=mult)), xaxis = list(title=xlab, zeroline=FALSE), yaxis = list(title='', tickvals= - (1 : ng), ticktext = levs)) p } dhistboxp <- function(x, group=NULL, strata=NULL, xlab=NULL, gmd=FALSE, sd=FALSE, bins=100, nmin=5, ff1=1, ff2=1) { if(! length(group)) group <- rep(1, length(x)) if(length(x) != length(group)) stop('x and group must be same length') if(! length(strata)) strata <- rep(1, length(x)) if(length(x) != length(strata)) stop('x and strata must be same length') distinct <- unique(x) distinct <- distinct[! is.na(distinct)] xmin <- min(distinct) xr <- x ustrata <- sort(unique(strata)) ## Still do slight rounding if < bins distinct values because ## values extremely close to each other won't show otherwise if(length(distinct) > bins || min(diff(sort(distinct))) < range(distinct) / (5 * bins)) { pret <- pretty(x, if(length(distinct) > bins) bins else 5 * bins) dist <- pret[2] - pret[1] r <- range(pret) xr <- r[1] + dist * round((x - r[1]) / dist) } mu <- markupSpecs$html fmt <- function(x) htmlSN(x, digits=5) quant <- function(x, probs=0.5) { x <- x[! is.na(x)] n <- length(x) if(! n) return(rep(NA, length(probs))) if(n < 3) return(quantile(x, probs)) hdquantile(x, probs, se=FALSE) } group <- as.factor(group) mis <- is.na(x) levs <- levels(group) ng <- length(levs) stdel <- diff(range(ustrata)) * 0.025 * ff1 stmin <- min(strata, na.rm=TRUE) R <- NULL ## Compute maximum proportion in any bin in any group/stratum prop <- numeric(0) nn <- integer(0) j <- 0 for(st in ustrata) { for(g in levels(group)) { i <- strata == st & group == g if(any(i)) { tab <- table(xr[i]) den <- sum(tab) if(den > 0) { j <- j + 1 prop <- c(prop, max(tab) / den) nn <- c(nn, den) } } } } maxp <- if(any(nn >= nmin)) max(prop[nn >= nmin]) else max(prop) maxp <- min(maxp, wtd.quantile(prop, probs=0.98, weights=nn)) propfac <- stdel / maxp j <- 0 for(st in ustrata) { stdir <- 1 ig <- 0 for(g in levels(group)) { ig <- ig + 1 stdir <- - stdir stoffset <- stdir * (stdel * floor((ig + 1) / 2)) i <- strata == st & group == g miss <- sum(mis[i]) if(miss > 0) i <- i & ! mis if(! any(i)) next u <- x[i] ur <- xr[i] nn <- length(ur) if(nn >= nmin) { tab <- as.data.frame(table(ur)) z <- as.numeric(as.character(tab$ur)) prop <- tab$Freq / nn R <- rbind(R, data.frame(x=z, y =st + 0.5 * stdir * stdel, xhi=NA, yhi=st + 0.5 * stdir * stdel + stoffset * prop * propfac * ff2, group=g, strata=st, txt=paste0(fmt(z), '<br>', round(prop, 3), '<br>n=', tab$Freq), type='histogram', connect=NA)) } med <- quant(u) R <- rbind(R, data.frame(x=med, y=st, xhi=NA, yhi=NA, group=g, strata=st, txt=paste0('Median:', fmt(med), '<br>n=', length(u), '<br>', miss, ' missing'), type='median', connect=TRUE)) if(gmd) { gnam <- paste0('Gini ', mu$overbar(paste0('|', htmlGreek('Delta'), '|'))) Gmd <- GiniMd(u) R <- rbind(R, data.frame(x = xmin, xhi = xmin + Gmd, y = st - stdel * 2, yhi = NA, group=g, strata=st, txt=paste0(gnam, ': ', fmt(Gmd)), type=gnam, connect=NA)) } if(sd) { Sd <- sd(u) R <- rbind(R, data.frame(x = xmin, xhi = xmin + Sd, y = st - stdel * (if(gmd) 3 else 2), yhi = NA, group=g, strata=st, txt = paste0('SD:', fmt(Sd)), type = 'SD', connect=NA)) } if(nn >= nmin) { probs <- c(0.5, 0.25, 0.75, 0.05, 0.95) qu <- quant(u, probs) nam <- paste0('Q', mu$sub(probs)) txt <- paste0(nam, ':', fmt(qu)) mult <- c(1.15, 1, 1, 0.64, 0.64) yinc <- 0.3 * stdir * stdel * mult ycenter <- st + stdir * stdel + stoffset * maxp * propfac R <- rbind(R, data.frame(x = qu, xhi = NA, y = ycenter - yinc, yhi = ycenter + yinc, group=g, strata=st, txt = txt, type='quantiles', connect=FALSE)) R <- rbind(R, data.frame(x = min(qu), xhi = max(qu), y = ycenter, yhi = ycenter, group=g, strata=st, txt = '', type='quantiles', connect=FALSE)) } } ## end groups } ## end strata R } histboxpM <- function(p=plotly::plot_ly(height=height, width=width), x, group=NULL, gmd=TRUE, sd=FALSE, width=NULL, nrows=NULL, ncols=NULL, ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") ## See stackoverflow.com/questions/26939121 ## stackoverflow.com/questions/39948151 nx <- if(is.data.frame(x)) ncol(x) else 1 ng <- if(length(group)) length(unique(group)) else 1 height <- nx * (plotlyParm$heightDotchart(1.2 * ng) + 50 * (gmd & sd)) height <- min(height, 1700) auto <- .Options$plotlyauto if(length(auto) && auto) height <- width <- NULL nam <- deparse(substitute(x)) if(is.data.frame(x) && ncol(x) == 1) x <- x[[1]] if(! is.data.frame(x)) return(histboxp(p=p, x=x, group=group, xlab=labelPlotmath(label(x, default=nam), units(x), html=TRUE), gmd=gmd, sd=sd, ...)) P <- list() for(i in 1 : nx) { y <- x[[i]] xlab <- labelPlotmath(label(y, default=names(x)[i]), units(y), html=TRUE) P[[i]] <- histboxp(p, x=y, group=group, xlab=xlab, showlegend=i==nx, gmd=gmd, sd=sd, ...) } if(length(ncols)) nrows <- ceil(ncol(x) / ncols) else if(! length(nrows)) nrows <- ncol(x) plotly::subplot(P, nrows=nrows, shareX=FALSE, shareY=FALSE, titleX=TRUE, margin=c(.02, .02, .05, .04)) } ecdfpM <- function(x, group=NULL, what=c('F','1-F','f','1-f'), q=NULL, extra=c(0.025, 0.025), xlab=NULL, ylab=NULL, height=NULL, width=NULL, colors=NULL, nrows=NULL, ncols=NULL, ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") auto <- .Options$plotlyauto if(length(auto) && auto) height <- width <- NULL what <- match.arg(what) nam <- deparse(substitute(x)) if(! is.data.frame(x)) x <- data.frame(x) nx <- ncol(x) if(! length(group)) group <- rep('', nrow(x)) group <- as.factor(group) fmt <- function(x) htmlSN(x, digits=5) trans <- switch(what, 'F' = function(y) y, '1-F' = function(y) 1 - y, 'f' = function(y) n * y, '1-f' = function(y) n * (1 - y)) P <- list() kq <- length(q) for(i in 1 : nx) { y <- x[[i]] rng <- range(y, na.rm=TRUE) xl <- if(! length(xlab)) labelPlotmath(label(y, default=names(x)[i]), units(y), html=TRUE) else xlab[min(length(xlab), i)] D <- Dq <- NULL p <- plotly::plot_ly(height=height, width=width) for(gv in levels(group)) { j <- group == gv & ! is.na(y) yg <- sort(y[j]) n <- length(yg) vals <- unique(yg) # see stats::ecdf a <- approx(vals, cumsum(tabulate(match(yg, vals))) / n, method='constant', yleft=0, yright=1, f=0, ties='ordered', xout=vals) delta <- diff(rng) a$x <- c(min(a$x) - extra[1] * delta, a$x, max(a$x) + extra[2] * delta) a$y <- c(0, a$y, 1) yg <- a$y d <- data.frame(x = a$x, y = trans(yg), g=gv) D <- rbind(D, d) if(kq) for(k in 1 : kq) { quant <- min(a$x[yg >= q[k]]) tx <- paste0(q[k], ' quantile of ', names(x)[i], ':', fmt(quant)) if(gv != '') tx <- paste0(tx, '<br>', gv) Dq <- rbind(Dq, data.frame(x=rng[1], xe=quant, y=trans(q[k]), ye=trans(q[k]), g=gv, txt=''), data.frame(x=quant, xe=quant, y=trans(q[k]), ye=trans(0), g=gv, txt=tx)) } } linet <- if(what %in% c('F', 'f')) 'hv' else 'vh' p <- plotly::add_lines(p, data=D, x= ~ x, y= ~ y, showlegend = i == nx, color= ~ g, colors=colors, line=list(shape=linet), ...) if(kq) p <- plotly::add_segments(p, data=Dq, x = ~ x, xend = ~ xe, y = ~ y, yend = ~ ye, color = ~ g, colors = colors, alpha = 0.4, text = ~ txt, hoverinfo='text', name = 'Quantiles', legendgroup = "quantiles", showlegend = i == nx) yt <- if(length(ylab)) ylab else if(what %in% c('F', '1-F')) 'Cumulative Proportion' else 'Number of Observations' p <- plotly::layout(p, xaxis = list(title=xl, zeroline=FALSE), yaxis = list(title=yt)) P[[i]] <- p } if(nx == 1) return(p) if(length(ncols)) nrows <- ceil(nx / ncols) else if(! length(nrows)) nrows <- nx plotly::subplot(P, nrows=nrows, shareX=FALSE, titleX=TRUE, margin=c(.02, .02, .05, .04)) } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/event.chart.s�������������������������������������������������������������������������������0000644�0001762�0000144�00000054372�12250441246�014136� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## event.chart.q: eventchart program 1.0 (creates function event.chart) ## ## last edited: 9-27-97 ## last edited: 10-20-98, add pty='m' for the default plotting; ## one may change to pty='s' to get the 'square plot' for the Goldman's Event Chart ## FEH changes 9may02 for R event.chart <- function(data, subset.r = 1:dim(data)[1], subset.c = 1:dim(data)[2], sort.by = NA, sort.ascending = TRUE, sort.na.last = TRUE, sort.after.subset = TRUE, y.var = NA, y.var.type = "n", y.jitter = FALSE, y.jitter.factor = 1, y.renum = FALSE, NA.rm = FALSE, x.reference = NA, now = max(data[, subset.c], na.rm = TRUE), now.line = FALSE, now.line.lty = 2, now.line.lwd = 1, now.line.col = 1, pty = "m", date.orig = c(1, 1, 1960), titl = "Event Chart", y.idlabels = NA, y.axis = "auto", y.axis.custom.at = NA, y.axis.custom.labels = NA, y.julian = FALSE, y.lim.extend = c(0, 0), y.lab = ifelse(is.na(y.idlabels), "", as.character(y.idlabels)), x.axis.all = TRUE, x.axis = "auto", x.axis.custom.at = NA, x.axis.custom.labels = NA, x.julian = FALSE, x.lim.extend = c(0, 0), x.scale = 1, x.lab = ifelse(x.julian, "Follow-up Time", "Study Date"), line.by = NA, line.lty = 1, line.lwd = 1, line.col = 1, line.add = NA, line.add.lty = NA, line.add.lwd = NA, line.add.col = NA, point.pch = 1:length(subset.c), point.cex = rep(0.6, length(subset.c)), point.col = rep(1, length(subset.c)), point.cex.mult = 1., point.cex.mult.var = NA, extra.points.no.mult = rep(NA, length(subset.c)), legend.plot = FALSE, legend.location = "o", legend.titl = titl, legend.titl.cex = 3, legend.titl.line = 1, legend.point.at = list(x = c(5, 95), y = c(95, 30)), legend.point.pch = point.pch, legend.point.text = ifelse(rep(is.data.frame(data), length(subset.c)), names(data[, subset.c]), subset.c), legend.cex = 2.5, legend.bty = "n", legend.line.at = list(x = c(5, 95), y = c(20, 5)), legend.line.text = names(table(as.character(data[, line.by]), exclude = c("", "NA"))), legend.line.lwd = line.lwd, legend.loc.num = 1, ...) { legnd <- function(..., pch) { if(missing(pch)) legend(...) else legend(..., pch = pch) } month.day.year <- function(jul, origin.) { if (missing(origin.) || is.null(origin.)) { origin. <- .Options$chron.origin if (is.null(origin.)) origin. <- c(month = 1, day = 1, year = 1960) } shift <- if (all(origin. == 0)) 0 else julian(origin = origin.) ## relative origin ## "absolute" origin j <- jul + shift j <- j - 1721119 y <- (4 * j - 1) %/% 146097 j <- 4 * j - 1 - 146097 * y d <- j %/% 4 j <- (4 * d + 3) %/% 1461 d <- 4 * d + 3 - 1461 * j d <- (d + 4) %/% 4 m <- (5 * d - 3) %/% 153 d <- 5 * d - 3 - 153 * m d <- (d + 5) %/% 5 y <- 100 * y + j y <- y + ifelse(m < 10, 0, 1) m <- m + ifelse(m < 10, 3, -9) list(month = m, day = d, year = y) } ## julian.r ## Convert between Julian and Calendar Dates julian <- function(m, d, y, origin.) { only.origin <- all(missing(m), missing(d), missing(y)) if (only.origin) m <- d <- y <- NULL ## return days since origin if (missing(origin.) || is.null(origin.)) { origin. <- .Options$chron.origin if (is.null(origin.)) origin. <- c(month = 1, day = 1, year = 1960) } nms <- names(d) max.len <- max(length(m), length(d), length(y)) ## prepend new origin value and rep out to common max. length: m <- c(origin.[1], rep(m, length = max.len)) d <- c(origin.[2], rep(d, length = max.len)) y <- c(origin.[3], rep(y, length = max.len)) ## code from julian date in the S book (p.269) y <- y + ifelse(m > 2, 0, -1) m <- m + ifelse(m > 2, -3, 9) c <- y %/% 100 ya <- y - 100 * c out <- (146097 * c) %/% 4 + (1461 * ya) %/% 4 + (153 * m + 2) %/% 5 + d + 1721119 ## now subtract the new origin from all dates if (!only.origin) { if (all(origin. == 0)) out <- out[-1] else out <- out[-1] - out[1] } names(out) <- nms out } ## stop function if unacceptable violations occur ## (other stops may occur later) if (!is.matrix(data) && !is.data.frame(data)) stop("argument data must be a matrix or a data frame\n") ## section 1: do necessary subsetting and sorting of data targodata <- apply(data[, subset.c, drop = FALSE], 2, as.numeric) ## targodata for target 'overall' data if (!is.na(x.reference)) targodata <- apply(targodata - data[, x.reference], 2, as.numeric) ## start of sort routine if (!is.na(sort.by[1])) { if (sort.after.subset == TRUE) data <- data[subset.r, ] m <- dim(data)[1] keys <- 1:m rotate <- m:1 length.sort.by <- length(sort.by) asc <- rep(sort.ascending, length.sort.by) for (i in length.sort.by:1) { if (asc[i]) keys[] <- keys[sort.list(data[, sort.by[[i]]][keys], na.last = sort.na.last)] else keys[] <- keys[order(data[, sort.by[[i]]][keys], rotate, na.last = sort.na.last)[rotate]] } data <- data[keys, ] if (sort.after.subset == FALSE) { subset.r <- (1:dim(data)[1])[subset.r] targdata <- apply(data[subset.r, subset.c, drop = FALSE], 2, as.numeric) } else if (sort.after.subset == TRUE) { targdata <- apply(data[, subset.c, drop = FALSE], 2, as.numeric) subset.ro <- (1:dim(data)[1])[subset.r] subset.r <- seq(length(subset.ro)) } } else if (is.na(sort.by[1])) { subset.r <- (1:dim(data)[1])[subset.r] targdata <- apply(data[subset.r, subset.c, drop = FALSE], 2, as.numeric) } ## end of sort routine ## start to deal with missing values and renumbering y-axis if (NA.rm == TRUE) { whotoplot <- subset.r[!(apply(is.na(targdata), 1, all))] ## whotoplot is for use for data matrix(dataframe); ## essentially who will be plotted from data t.whotoplot <- seq(dim(targdata)[1])[!(apply(is.na(targdata), 1, all))] ## t.whotoplot is for use for targdata matrix(dataframe); ## essentially, who will be plotted from targdata if (y.renum == TRUE) { whattoplot <- seq(subset.r[!(apply(is.na(targdata), 1, all))]) ## whattoplot is what will be plotted on y-axis of event chart } else if (y.renum == FALSE) { if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1]))) whattoplot <- subset.r[!(apply(is.na(targdata), 1, all))] else if (!is.na(sort.by[1]) & sort.after.subset == TRUE) whattoplot <- subset.ro[!(apply(is.na(targdata), 1, all))] } } else if (NA.rm == FALSE) { whotoplot <- subset.r t.whotoplot <- seq(dim(targdata)[1]) if (y.renum == TRUE) whattoplot <- seq(subset.r) else if (y.renum == FALSE) { if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1]))) whattoplot <- subset.r else if (!is.na(sort.by[1]) & sort.after.subset == TRUE) whattoplot <- subset.ro } } ## end of dealing with missing values and renumbering of y-axis ## section 2: perform necessary manipulations using x.reference and y.var ## deal with re-referencing x-axis with x.reference if (!is.na(x.reference)) { targdata <- apply(targdata - data[subset.r, x.reference], 2, as.numeric) if (NA.rm == TRUE) { x.referencew <- data[whotoplot, x.reference] whotoplot <- whotoplot[!is.na(x.referencew)] t.whotoplot <- t.whotoplot[!is.na(x.referencew)] whattoplot.ref <- whattoplot[!is.na(x.referencew)] if (y.renum == FALSE) { if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1]))) whattoplot <- seq(subset.r[1], subset.r[1] + length(whattoplot.ref) - 1) else if (!is.na(sort.by[1]) & sort.after.subset == TRUE) whattoplot <- seq(subset.ro[1], subset.ro[1] + length(whattoplot.ref) - 1) } else if (y.renum == TRUE) whattoplot <- seq(length(whattoplot.ref)) } } ## deal with using a data frame variable to place lines on y-axis if (!is.na(y.var)) { if (!is.na(sort.by[1])) stop("cannot use sort.by and y.var simultaneously\n") y.varw <- as.numeric(data[whotoplot, y.var]) whotoplot <- whotoplot[!is.na(y.varw)] t.whotoplot <- t.whotoplot[!is.na(y.varw)] whattoplot <- y.varw[!is.na(y.varw)] if (y.jitter == TRUE) { range.data <- diff(range(whattoplot)) range.unif <- y.jitter.factor * (range.data / (2 * (length(whattoplot) - 1))) whattoplot <- whattoplot + runif(length(whattoplot), -(range.unif), range.unif) } } sort.what <- sort(whattoplot) length.what <- length(whattoplot) ## section 3: perform all plotting ## first, make sure length of point.pch, cex, col is same as subset.c len.c <- length(subset.c) if (length(point.pch) < len.c) { warning("length(point.pch) < length(subset.c)") point.pch <- rep(point.pch, len.c)[1:len.c] } if (length(point.cex) < len.c) { warning("length(point.cex) < length(subset.c)") point.cex <- rep(point.cex, len.c)[1:len.c] } if (length(point.col) < len.c) { warning("length(point.col) < length(subset.c)") point.col <- rep(point.col, len.c)[1:len.c] } ## set default of par(new = FALSE) par(new = FALSE) ## plot external legend (if requested) if (legend.plot == TRUE && legend.location == "o") { plot(1, 1, type = "n", xlim = c(0, 100), ylim = c(0, 100), axes = FALSE, xlab = "", ylab = "") mtext(legend.titl, line = legend.titl.line, outer = FALSE, cex = legend.titl.cex) legnd(legend.point.at[[1]], legend.point.at[[2]], leg = legend.point.text, pch = legend.point.pch, cex = legend.cex, col = point.col, bty = legend.bty) if (!is.na(line.by)) { par(new = TRUE) legnd(legend.line.at[[1]], legend.line.at[[2]], leg = legend.line.text, cex = legend.cex, lty = line.lty, lwd = legend.line.lwd, col = line.col, bty = legend.bty) } invisible(par(ask = TRUE)) } ## start creating objects to be used in determining plot region targdata <- targdata / x.scale targodata <- targodata / x.scale minvec <- apply(targdata[t.whotoplot,, drop = FALSE], 1, min, na.rm = TRUE) minotime <- ifelse(x.axis.all, min(apply(targodata, 1, min, na.rm = TRUE), na.rm = TRUE), min(minvec, na.rm = TRUE)) maxvec <- apply(targdata[t.whotoplot,, drop = FALSE], 1, max, na.rm = TRUE) maxotime <- ifelse(x.axis.all, max(apply(targodata, 1, max, na.rm = TRUE), na.rm = TRUE), max(maxvec, na.rm = TRUE)) ## determine par parameters and plot graphical region based ## on request of y.var and, subsequently, y.var.type and now.line y.axis.top <- sort.what[length.what] + y.lim.extend[2] y.axis.bottom <- sort.what[1] - y.lim.extend[1] x.axis.right <- maxotime + x.lim.extend[2] x.axis.left <- minotime - x.lim.extend[1] if (!is.na(y.var) & y.var.type == "d") { oldpar <- par(omi = rep(0, 4), lwd = 0.6, mgp = c(3.05, 1.1, 0), tck = -0.006, ...) ## set pty par(pty = pty) plot(whattoplot, type = "n", xlim = c(x.axis.left, ifelse(now.line, (now - (min(data[, subset.c], na.rm = TRUE))) / x.scale, x.axis.right)), ylim = c(y.axis.bottom, ifelse(pty == "s", now, y.axis.top)), xlab = x.lab, ylab = y.lab, axes = FALSE) if (now.line == TRUE) abline(now, ((sort.what[1] - now) / (((now - min(data[, subset.c], na.rm = TRUE)) / x.scale) - minotime)), lty = now.line.lty, lwd = now.line.lwd, col = now.line.col) } else if (is.na(y.var) | (!is.na(y.var) & y.var.type == "n")) { if (now.line == TRUE) stop("with now.line == TRUE, y.var & y.var.type == 'd' must be specified\n") oldpar <- par(omi = rep(0, 4), lwd = 0.6, mgp = c(2.8, 1.1, 0), tck = -0.006, ...) plot(whattoplot, type = "n", xlim = c(x.axis.left, x.axis.right), ylim = c(y.axis.bottom - 1, y.axis.top + 1), xlab = x.lab, ylab = y.lab, axes = FALSE) } ## plot y-axis labels if (!is.na(y.idlabels)) { if (!is.na(y.var)) { warning("y.idlabels not used when y.var has been specified\n") axis(side = 2) } else if (is.na(y.var)) axis(side = 2, at = whattoplot, labels = as.vector(data[whotoplot, y.idlabels])) } else if (is.na(y.idlabels)) { if (y.axis == "auto") { if (is.na(y.var) | (!is.na(y.var) & y.var.type == "n")) axis(side = 2) else if (!is.na(y.var) & y.var.type == "d") { if (y.julian == FALSE) { y.axis.auto.now.bottom <- ifelse(now.line, sort.what[1], y.axis.bottom) ## marked by JJL, disable square plot ##y.axis.auto.now.top <- ifelse(now.line, now, y.axis.top) y.axis.auto.now.top <- ifelse(now.line, y.axis.top, y.axis.top) y.axis.auto.at <- round(seq(y.axis.auto.now.bottom, y.axis.auto.now.top, length = 5)) y.axis.auto.labels <- paste(month.day.year(y.axis.auto.at, origin = date.orig)$month, "/", month.day.year(y.axis.auto.at, origin = date.orig)$day, "/", substring(month.day.year(y.axis.auto.at, origin = date.orig)$year, 3, 4), sep = "") axis(side = 2, at = y.axis.auto.at, labels = y.axis.auto.labels) } else if (y.julian == TRUE) axis(side = 2) } } else if (y.axis == "custom") { if (is.na(y.axis.custom.at[1]) || is.na(y.axis.custom.labels[1])) stop("with y.axis == 'custom', must specify y.axis.custom.at and y.axis.custom.labels\n") axis(side = 2, at = y.axis.custom.at, labels = y.axis.custom.labels) } } ## plot x-axis labels if (x.axis == "auto") { if (x.julian == FALSE) { x.axis.auto.at <- round(seq(x.axis.left, x.axis.right, length = 5)) x.axis.auto.labels <- paste(month.day.year(x.axis.auto.at, origin = date.orig)$month, "/", month.day.year(x.axis.auto.at, origin = date.orig)$day, "/", substring(month.day.year(x.axis.auto.at, origin = date.orig)$year, 3, 4), sep = "") axis(side = 1, at = x.axis.auto.at, labels = x.axis.auto.labels) } else if (x.julian == TRUE) axis(side = 1) } else if (x.axis == "custom") { if (is.na(x.axis.custom.at[1]) || is.na(x.axis.custom.labels[1])) stop("with x.axis = 'custom', user must specify x.axis.custom.at and x.axis.custom.labels\n") axis(side = 1, at = x.axis.custom.at, labels = x.axis.custom.labels) } if (!is.na(titl)) { title(titl) } ## plot lines and points if (!is.na(line.by)) { line.byw <- data[whotoplot, line.by] table.by <- table(as.character(line.byw), exclude = c("", "NA")) names.by <- names(table.by) len.by <- length(table.by) if (length(line.lty) < len.by) warning("user provided length(line.lty) < num. of line.by categories") if (length(line.lwd) < len.by) warning("user provided length(line.lwd) < num. of line.by categories") if (length(line.col) < len.by) warning("user provided length(line.col) < num. of line.by categories") line.lty <- rep(line.lty, len = len.by) line.lwd <- rep(line.lwd, len = len.by) line.col <- rep(line.col, len = len.by) lbt.whotoplot <- (1:(length(t.whotoplot)))[as.character(line.byw) != "" & as.character(line.byw) != "NA"] for (i in lbt.whotoplot) { lines(c(minvec[i], maxvec[i]), rep(whattoplot[i], 2), lty = as.vector(line.lty[names.by == line.byw[i]]), lwd = as.vector(line.lwd[names.by == line.byw[i]]), col = as.vector(line.col[names.by == line.byw[i]])) } } else if (is.na(line.by)) { for (i in 1:length(t.whotoplot)) lines(c(minvec[i], maxvec[i]), rep(whattoplot[i], 2), lty = line.lty[1], lwd = line.lwd[1], col = line.col[1]) } if(is.na(point.cex.mult.var[1])) for(j in 1:dim(targdata)[2]) points(as.vector(unlist(targdata[t.whotoplot, j])), whattoplot, pch = point.pch[j], cex = point.cex[j], col = point.col[j]) else { ## loop only for extra points for(j in which(!is.na(extra.points.no.mult))) points(as.vector(unlist(targdata[t.whotoplot, j])), whattoplot, pch = point.pch[j], cex = point.cex[j], col = point.col[j]) ## loop for points to magnify based on level of covariates for(i in 1:length(t.whotoplot)) { k <- 0 for(j in which(is.na(extra.points.no.mult))) { k <- k + 1 points(as.vector(unlist(targdata[t.whotoplot[i], j])), whattoplot[i], pch = point.pch[j], cex = ifelse(is.na(data[whotoplot[i], point.cex.mult.var[k]]), point.cex[j], point.cex.mult * data[whotoplot[i], point.cex.mult.var[k]] * point.cex[j]), col = point.col[j]) } } } ## add line.add segments (if requested) if (!is.na(as.vector(line.add)[1])) { if (any(is.na(line.add.lty))) stop("line.add.lty can not have missing value(s) with non-missing line.add\n") if (any(is.na(line.add.lwd))) stop("line.add.lwd can not have missing value(s) with non-missing line.add\n") if (any(is.na(line.add.col))) stop("line.add.col can not have missing value(s) with non-missing line.add\n") line.add.m <- as.matrix(line.add) dim.m <- dim(line.add.m) if (dim.m[1] != 2) stop("line.add must be a matrix with two rows\n") if (length(line.add.lty) != dim.m[2]) stop("length of line.add.lty must be the same as number of columns in line.add\n") if (length(line.add.lwd) != dim.m[2]) stop("length of line.add.lwd must be the same as number of columns in line.add\n") if (length(line.add.col) != dim.m[2]) stop("length of line.add.col must be the same as number of columns in line.add\n") for (j in (1:dim.m[2])) { for (i in (1:length(t.whotoplot))) { add.var1 <- subset.c == line.add.m[1, j] if (!any(add.var1)) stop("variables chosen in line.add must also be in subset.c\n") add.var2 <- subset.c == line.add.m[2, j] if (!any(add.var2)) stop("variables chosen in line.add must also be in subset.c\n") segments(targdata[i, (1:len.c)[add.var1]], whattoplot[i], targdata[i, (1:len.c)[add.var2]], whattoplot[i], lty = line.add.lty[j], lwd = line.add.lwd[j], col = line.add.col[j]) } } } ## plot internal legend (if requested) if (legend.plot == TRUE & legend.location != "o") { if (legend.location == "i") { legnd(legend.point.at[[1]], legend.point.at[[2]], leg = legend.point.text, pch = legend.point.pch, cex = legend.cex, col = point.col, bty = legend.bty) if (!is.na(line.by)) legnd(legend.line.at[[1]], legend.line.at[[2]], leg = legend.line.text, cex = legend.cex, lty = line.lty, lwd = legend.line.lwd, col = line.col, bty = legend.bty) } else if (legend.location == "l") { cat("Please click at desired location to place legend for points.\n") legnd(locator(legend.loc.num), leg = legend.point.text, pch = legend.point.pch, cex = legend.cex, col = point.col, bty = legend.bty) if (!is.na(line.by)) { cat("Please click at desired location to place legend for lines.\n") legnd(locator(legend.loc.num), leg = legend.line.text, cex = legend.cex, lty = line.lty, lwd = legend.line.lwd, col = line.col, bty = legend.bty) } } } ## add box to main plot and clean up invisible(box()) invisible(par(ask = FALSE)) par(oldpar) } ## event.convert.s ## convert 2-column coded events to multiple event time for event.chart() ## input: a matrix or dataframe with at least 2 columns ## by default, the first column contains the event time and ## the second column contains the k event codes (e.g. 1=dead, 0=censord) ## ouput: a matrix of k columns, each column contains the time of kth coded event ## event.convert <- function(data2, event.time = 1, event.code = 2) { dim.d <- dim(data2) len.t <- length(event.time) if(len.t != length(event.code)) stop("length of event.time and event.code must be the same") if(any(event.time > dim.d[2])) stop(paste("Column(s) in event.time cannot be greater than ", dim.d[2])) if(any(event.code > dim.d[2])) stop(paste("Column(s) in event.code cannot be greater than ", dim.d[2])) name.data <- names(data2)[event.time] if(is.null(name.data)) { name.data <- paste("V", event.time, sep = "") } n.level <- rep(NA, len.t) for (i in (1:len.t)) { n.level[i] <- length(table(data2[, event.code[i]])) } tot.col <- sum(n.level) data.out <- matrix(NA, dim.d[1], tot.col) name.col <- rep(NA, tot.col) n.col <- 1 for (i in (1:len.t)) { tab.d <- table(data2[, event.code[i]]) if(is.null(class(data2[, event.code[i]]))) level.value <- as.numeric(names(tab.d)) else level.value <- names(tab.d) for (j in (1:length(tab.d))) { data.out[, n.col] <- rep(NA, dim.d[1]) check <- data2[, event.code[i]] == level.value[j] check[is.na(check)] <- FALSE data.out[, n.col][data2[, event.code[i]] == level.value[j]] <- data2[, event.time[i]][check] name.col[n.col] <- paste(name.data[i], ".", names(tab.d)[j], sep = "") n.col <- n.col + 1 } } dimnames(data.out) <- list(1:dim.d[1], name.col) return(as.matrix(data.out)) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/pc1.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000001110�12243661443�012363� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pc1 <- function(x, hi) { p <- ncol(x) x <- x[!is.na(x %*% rep(1,p)),] xo <- x for(i in 1:p) { y <- x[,i] x[,i] <- (y-mean(y))/sqrt(var(y)) } g <- prcomp(x) cat("Fraction variance explained by PC1:",format(g$sdev[1]^2/sum(g$sdev^2)), "\n\n") pc1 <- g$x[,1] f <- lsfit(xo, pc1) if(!missing(hi)) { if(sum(f$coef[-1]<0) >= p/2) pc1 <- -pc1 r <- range(pc1) pc1 <- hi*(pc1-r[1])/diff(r) f <- lsfit(xo, pc1) } cat("Coefficients to obtain PC1:\n\n") print(f$coef) attr(pc1,"coef") <- f$coef invisible(pc1) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/bootkm.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000001767�12257362020�013207� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bootkm <- function(S, q=.5, B=500, times, pr=TRUE) { tthere <- !missing(times) if(tthere && length(times)>1) stop('presently bootkm only works for a single time') S <- S[!is.na(S),] n <- nrow(S) stratvar <- factor(rep(1,nrow(S))) f <- survfitKM(stratvar, S) tt <- c(0, f$time) ss <- c(1, f$surv) if(!tthere) { if(ss[length(ss)] > q) stop(paste('overall Kaplan-Meier estimate does not fall below',q)) } else { if(tt[length(tt)] < times) stop(paste('overall Kaplan-Meier estimate not defined to time',times)) } ests <- double(B) for(i in 1:B) { if(pr && (i %% 10)==0) cat(i,'\r') f <- survfitKM(stratvar, S[sample(n,n,replace=TRUE),], se.fit=FALSE, conf.type='none') tt <- c(0, f$time) ss <- c(1, f$surv) ests[i] <- if(tthere) approx(tt, ss, xout=times, method='constant', f=0)$y else min(tt[ss <= q]) #is NA if none } if(pr) cat('\n') ests } ���������Hmisc/R/reShape.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000005412�12243661443�013300� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������reShape <- function(x, ..., id, colvar, base, reps, times=1:reps, timevar='seqno', constant=NULL) { if(!missing(base)) { if(!is.list(x)) stop('x must be a list or data frame when base is given') repvars <- as.vector(outer(base,1:reps,paste,sep='')) nam <- names(x) nonrep <- nam[nam %nin% repvars] res <- vector('list', 1+length(nonrep)+length(base)) names(res) <- c(timevar, nonrep, base) x1 <- x[[1]] n <- if(is.matrix(x1)) nrow(x1) else length(x1) res[[1]] <- rep(times[1:reps], n) for(i in nonrep) res[[i]] <- rep(x[[i]], rep(reps,n)) ## Get indexes that will put unlist() in right order k <- as.vector(matrix(1:(reps*n), nrow=reps, byrow=TRUE)) for(i in base) { bn <- paste(i, 1:reps, sep='') x1 <- x[[bn[1]]] at <- attributes(x1) at$names <- NULL x1 <- unlist(x[bn])[k] if(length(at)) attributes(x1) <- at res[[i]] <- x1 } if(is.data.frame(x)) { rn <- attr(x,'row.names') ln <- length(rn) if(ln) { ## R calls data.frame even if specify structure, and R does ## not have dup.row.names argument to data.frame as does S+ return(data.frame(res, row.names=paste(rep(rn,rep(reps,ln)), rep(1:reps,n)))) } } return(res) } if(is.matrix(x)) { y <- as.vector(x) v1 <- all.is.numeric(dimnames(x)[[1]][row(x)],'vector') v2 <- all.is.numeric(dimnames(x)[[2]][col(x)],'vector') w <- list(v1, v2, y) names(w) <- c('rowvar','colvar',as.character(substitute(x))) if(length(nd <- names(dimnames(x)))) names(w)[1:2] <- nd w } else { listid <- is.list(id) i <- as.factor(if(listid) do.call('paste', c(id, sep='~')) else id) colvar <- as.factor(colvar) m <- matrix(NA, nrow=length(levels(i)), ncol=length(levels(colvar)), dimnames=list(levels(i), levels(colvar))) dotlist <- list(...) if(!length(dotlist)) { m[cbind(i, colvar)] <- x if(listid) { j <- match(as.character(dimnames(m)[[1]]), as.character(i)) if(length(constant)) data.frame(id[j,,drop=FALSE], constant[j,,drop=FALSE], m) else data.frame(id[j,,drop=FALSE], m) } else m } else { res <- vector('list',nx <- 1+length(dotlist)) names(res) <- (as.character(sys.call())[-1])[1:nx] nam2 <- names(sys.call()[-1])[1:nx] if(length(nam2)) names(res) <- ifelse(nam2=='',names(res),nam2) w <- m; w[cbind(i, colvar)] <- x; res[[1]] <- w for(j in 2:nx) { w <- m; w[cbind(i, colvar)] <- dotlist[[j-1]] res[[j]] <- w } res } } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/groupn.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000001012�12243661443�013213� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������groupn <- function(x, y, m=150) { s <- !is.na(x + y) x<-x[s] y<-y[s] i<-order(x) x<-x[i] y<-y[i] n<-length(x) if(n<m) stop("m<number of observations in groupn") start <- 1 end <- m meanx <- NULL meany <- NULL while(end <= n) { meanx <- c(meanx,mean(x[start:end])) meany <- c(meany,mean(y[start:end])) start <- start+m end <- end+m } if(end > n) { meanx <- c(meanx,mean(x[n-m+1:n])) meany <- c(meany,mean(y[n-m+1:n])) } return(list(x=meanx,y=meany)) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/nstr.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000000531�12243661443�012674� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nstr <- function(string, times) { if(!is.atomic(string)) stop("argument string must be an atomic vector") if(!is.numeric(times)) stop("len must be a numeric vector") if(length(string) == 0) return(NULL) if(length(times) == 0) return(character(0)) return(.Call("do_nstr", as.character(string), as.integer(times))) } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/cpower.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000006447�12243661443�013221� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## tref time at which mortalities estimated ## n total sample size ## mc tref-year mortality, control ## r % reduction in m1c by intervention ## accrual duration of accrual period ## tmin minimum follow-up time ## noncomp.c % non-compliant in control group (drop-ins) ## noncomp.i % non-compliant in intervention group (non-adherers) ## alpha type I error ## nc Sample size for control (if not n/2) ## ni Sample size for intervention (if not n/2) ## pr set to T to print intermediate results ## ## non-compliance handled by an approximation of Eq. 5.4 of ## Lachin JM, Foulkes MA (1986): Evaluation of sample size and power for ## analyses of survival with allowance for nonuniform patient entry, ## losses to follow-up, noncompliance, and stratification. ## Here we're using log hazard ratio instead of their hazard difference cpower <- function(tref, n, mc, r, accrual, tmin, noncomp.c=0, noncomp.i=0, alpha=.05, nc, ni, pr=TRUE) { if(mc>1) stop("mc should be a fraction") ## Find mortality in intervention group mi <- (1-r/100)*mc if(missing(nc) | missing(ni)) { nc <- n/2; ni <- n/2 } else n <- nc+ni if(pr) { cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n") cat("\nTotal sample size:",n,"\n") cat("\nAlpha=",alpha,"\n") d <- c("Control","Intervention") m <- c(mc,mi) names(m) <- d cat("\n",tref,"-year Mortalities\n",sep=""); print(m) } ## Find exponential hazards for all groups lamc <- -logb(1-mc)/tref lami <- -logb(1-mi)/tref if(pr) { lam <- c(lamc,lami) names(lam) <- d cat("\nHazard Rates\n"); print(lam) } ## Find probability that a subject will have her event observed during ## the study, for all groups tmax <- tmin+accrual pc <- if(accrual==0) 1-exp(-lamc*tmin) else 1-1/accrual/lamc*(exp(-tmin*lamc)-exp(-tmax*lamc)) pi <- if(accrual==0) 1-exp(-lami*tmin) else 1-1/accrual/lami*(exp(-tmin*lami)-exp(-tmax*lami)) if(pr) { p <- c(pc,pi) names(p) <- d cat("\nProbabilities of an Event During Study\n") print(p) } ## Find expected number of events, all groups mc <- pc*nc mi <- pi*ni if(pr) { m <- c(mc,mi) names(m) <- d cat("\nExpected Number of Events\n") print(round(m,1)) } ## Find expected value of observed log hazard ratio delta <- logb(lami/lamc) if(pr) cat("\nHazard ratio:",format(exp(delta)),"\n") if(noncomp.c+noncomp.i>0) { if(pr) cat("\nDrop-in rate (controls):",noncomp.c, "%\nNon-adherence rate (intervention):",noncomp.i,"%\n",sep="") delta <- delta * (1 - (noncomp.c+noncomp.i)/100) if(pr) cat("Effective hazard ratio with non-compliance:", format(exp(delta)),"\n") } ## Find its variance v <- 1/mc + 1/mi ## Get same as /sasmacro/samsizc.sas if use 4/(mc+mi) sd <- sqrt(v) if(pr) cat("Standard deviation of log hazard ratio:",format(sd),"\n") z <- -qnorm(alpha/2) c(Power = 1 - (pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd))) } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/histSpikeg.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000013016�14112727067�014024� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������histSpikeg <- function(formula=NULL, predictions=NULL, data, plotly=NULL, lowess=FALSE, xlim=NULL, ylim=NULL, side=1, nint=100, frac=function(f) 0.01 + 0.02*sqrt(f-1) / sqrt(max(f,2)-1), span=3/4, histcol='black', showlegend=TRUE) { ## Raw data in data, predicted curves are in predictions ## If predictions is not given, side (1 or 3) is used v <- all.vars(formula) yv <- v[ 1] xv <- v[-1] X <- xv[1] if(lowess) { if(yv %nin% names(data)) stop(paste(yv, 'must be in data if lowess=TRUE')) yval <- data[[yv]] iter <- if(length(unique(yval[! is.na(yval)])) > 2) 3 else 0 lows <- function(x, y, iter, span) { i <- ! is.na(x + y) lowess(x[i], y[i], iter=iter, f=span) } } if(length(xv) > 1 && ! (lowess || length(predictions))) stop('predictions must be given or lowess=TRUE if formula has > 1 variable on the right') x1 <- data[[X]] if(length(xlim)) { data <- data[x1 >= xlim[1] & x1 <= xlim[2], ] x1 <- data[[X]] } if(length(unique(x1)) > nint) { eps <- diff(range(x1, na.rm=TRUE)) / nint x1 <- round(x1 / eps) * eps data[[X]] <- x1 } p <- predictions # xr <- NULL # if(length(p)) xr <- range(p[[X]], na.rm=TRUE) # else if(lowess) xr <- range(data[[X]], na.rm=TRUE) xr <- xlim if(length(xv) == 1) { tab <- as.data.frame(do.call(table, data[X])) tab[[X]] <- as.numeric(as.character(tab[[X]])) if(length(xr)) tab <- tab[tab[[X]] >= xr[1] & tab[[X]] <= xr[2], ] if(lowess) { p <- as.data.frame(lows(data[[X]], yval, iter, span)) names(p) <- c(X, yv) } if(length(p)) tab$.yy. <- approxExtrap(p[[X]], p[[yv]], xout=tab[[X]])$y } else { ## grouping variable(s) present tab <- as.data.frame(do.call(table, data[xv])) tab <- subset(tab, Freq > 0) tab[[X]] <- as.numeric(as.character(tab[[X]])) if(length(xr)) tab <- tab[tab[[X]] >= xr[1] & tab[[X]] <= xr[2], ] tab$.yy. <- rep(NA, nrow(tab)) gv <- xv[-1] # grouping variables U <- unique(tab[gv]) # unique combinations if(! lowess) { for(k in 1 : nrow(U)) { i <- rep(TRUE, nrow(tab)) j <- rep(TRUE, nrow(p)) for(l in 1 : ncol(U)) { currgroup <- as.character(U[k, gv[l], drop=TRUE]) i <- i & (tab[[gv[l]]] == currgroup) j <- j & (p[[gv[l]]] == currgroup) } ## now all grouping variables intersected tab$.yy.[i] <- approxExtrap(p[[X]][j], p[[yv]][j], xout=tab[[X]][i])$y } } ## end if(! lowess) else { # lowess; need to compute p p <- NULL for(k in 1 : nrow(U)) { i <- rep(TRUE, nrow(tab)) j <- rep(TRUE, nrow(data)) for(l in 1 : ncol(U)) { currgroup <- as.character(U[k, gv[l], drop=TRUE]) i <- i & (tab[[gv[l]]] == currgroup) j <- j & (data[[gv[l]]] == currgroup) } ## now all grouping variables intersected sm <- lows(data[[X]][j], data[[yv]][j], iter, span) Sm <- sm; names(Sm) <- c(X, yv) Uk <- U[k, , drop=FALSE]; row.names(Uk) <- NULL p <- rbind(p, data.frame(Uk, Sm)) tab$.yy.[i] <- approxExtrap(sm, xout=tab[[X]][i])$y } } ## end lowess } ## end grouping variables present if(! length(ylim)) { if(length(data) && yv %in% names(data)) ylim <- range(data[[yv]], na.rm=TRUE) else if(length(p)) ylim <- range(p[[yv]], na.rm=TRUE) } if(! length(ylim)) stop('no way to infer ylim from information provided') tab$.rf. <- frac(tab$Freq) * diff(ylim) n <- nrow(tab) tab$.ylo. <- if(length(p)) tab$.yy. - tab$.rf. else if(side == 1) rep(ylim[1], n) else rep(ylim[2], n) tab$.yhi. <- if(length(p)) tab$.yy. + tab$.rf. else if(side == 1) ylim[1] + tab$.rf. else ylim[2] - tab$.rf. hcol <- if(histcol == 'default') '' else sprintf(', col="%s"', histcol) P <- plotly if(! is.null(P) && requireNamespace("plotly")) { tab$.xx. <- tab[[X]] ## Got NA/NaN argument, numerical expression has 8 elements ## when using add_segments # P <- plotly:add_segments(P, data=tab, # x=~ .xx., xend=~ .xx., # y=~ .ylo., yend=~ .yhi., # hoverinfo='none', # name='Histogram', legendgroup='Histogram') n <- 3 * nrow(tab) .xx. <- .yy. <- rep(NA, n) .xx.[seq(1, n, by=3)] <- tab[[X]] .xx.[seq(2, n, by=3)] <- tab[[X]] .yy.[seq(1, n, by=3)] <- tab$.ylo. .yy.[seq(2, n, by=3)] <- tab$.yhi. P <- plotly::add_lines(P, data=data.frame(.xx., .yy.), x=~.xx., y=~.yy., line=list(color=histcol, width=1.4), hoverinfo='none', showlegend=showlegend, name='Histogram', legendgroup='Histogram') if(lowess) { af <- function(x) as.formula(paste('~', x)) P <- plotly::add_lines(P, data=p, x=af(X), y=af(yv), hoverinfo='none', showlegend=showlegend, name='loess', legendgroup='loess') } return(P) } res <- eval(parse(text=sprintf('ggplot2::geom_segment(data=tab, aes(x=%s, xend=%s, y=.ylo., yend=.yhi.), size=.25 %s)', X, X, hcol))) if(lowess) res <- list(hist=res, lowess=eval(parse(text= sprintf('ggplot2::geom_line(data=p, aes(x=%s, y=%s))', X, yv)))) res } utils::globalVariables(c('aes', 'Freq', '.ylo.', '.yhi.', 'x', 'y')) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/matxv.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000005051�12243661443�013047� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Multiply matrix by a vector ## vector can be same length as # columns in a, or can be longer, ## in which case b[kint] is added to a * b[s:length(b)], s=length(b)-ncol(a)+1 ## F. Harrell 17 Oct90 ## Mod 5 Jul91 - is.vector -> !is.matrix ## 16 Oct91 - as.matrix -> matrix(,nrow=1) ## 29 Oct91 - allow b to be arbitrarily longer than ncol(a), use b(1) ## 13 Nov91 - matrix(,nrow=1) -> matrix(,ncol=1) ## 14 Nov91 - changed to nrow=1 if length(b)>1, ncol=1 otherwise ## 25 Mar93 - changed to use %*% ## 13 Sep93 - added kint parameter ## 22 Jun13 - allowed null kint, matrix b (e.g. bootstrap coefs) ## 3 Jul13 - sense intercepts attribute in b which signals ## which subset of intercepts were retained in fit matxv <- function(a, b, kint=1, bmat=FALSE) { bi <- attr(b, 'intercepts') lbi <- length(bi) lkint <- length(kint) if(lkint > 1L) stop('kint must have length 0 or 1') if(bmat) { if(!is.matrix(a)) stop('a must be a matrix when b is a matrix') ca <- ncol(a); cb <- ncol(b) if(cb < ca) stop('number of columns in b must be >= number in a') if(cb == ca) return(a %*% t(b)) excess <- cb - ca xx <- matrix(0, nrow=nrow(a), ncol=excess) if(lbi && lkint) { if(lbi != excess) stop('b intercepts attribute has different length from number of excess elements in b') bi <- round(bi) kint <- round(kint) if(!isTRUE(all.equal(sort(bi), sort(kint)))) stop('b intercepts attribute do not match kint') xx[] <- 1. } else if(lkint) { if(kint > excess) stop('kint > number of excess elements in b') xx[,kint] <- 1. } return(cbind(xx, a) %*% t(b)) } if(!is.matrix(a)) a <- if(length(b) == 1L) matrix(a, ncol=1L) else matrix(a, nrow=1L) nc <- dim(a)[2] lb <- length(b) if(lb < nc) stop(paste("columns in a (", nc, ") must be <= length of b (", length(b), ")", sep="")) if(nc == lb) return(drop(a %*% b)) excess <- lb - nc if(lbi && lkint) { if(lbi != excess) stop('b intercepts attribute has different length from number of excess elements in b') bi <- round(bi) kint <- round(kint) if(!isTRUE(all.equal(sort(bi), sort(kint)))) stop('b intercepts attribute do not match kint') bkint <- b[1] } else if(lkint) { if(kint > excess) stop('kint > number excess elements in b') bkint <- b[kint] } else bkint <- 0. drop(bkint + (a %*% b[(lb - nc + 1L) : lb])) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/dates.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000007220�13215523664�013012� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yearDays <- function(time) { time <- as.POSIXlt(time) time$mon[] <- time$mday[] <- time$sec[] <- time$min <- time$hour <- 0 time$year <- time$year + 1 return(as.POSIXlt(as.POSIXct(time))$yday + 1) } monthDays <- function(time) { time <- as.POSIXlt(time) time$mday[] <- time$sec[] <- time$min <- time$hour <- 0 time$mon <- time$mon + 1 return(as.POSIXlt(as.POSIXct(time))$mday) } roundPOSIXt <- function(x, digits=c("secs", "mins", "hours", "days", "months", "years")) { ## this gets the default from the generic, as that has two args. if(is.numeric(digits) && digits == 0.0) digits <-"secs" units <- match.arg(digits) month.length <- monthDays(x) x <- as.POSIXlt(x) if(length(x$sec) > 0) switch(units, "secs" = {x$sec <- x$sec + 0.5}, "mins" = {x$sec <- x$sec + 30}, "hours" = {x$sec <- 0; x$min <- x$min + 30}, "days" = {x$sec <- 0; x$min <- 0; x$hour <- x$hour + 12 isdst <- x$isdst <- -1}, "months" = {x$sec <- 0; x$min <- 0; x$hour <- 0; x$mday <- x$mday + trunc(monthDays(x)/2); isdst <- x$isdst <- -1}, "years" = {x$sec <- 0; x$min <- 0; x$hour <- 0; x$mday <- 0; x$mon <- x$mon + 6; isdst <- x$isdst <- -1} ) return(truncPOSIXt(as.POSIXct(x), units=units)) } truncPOSIXt <- function(x, units=c("secs", "mins", "hours", "days", "months", "years"), ...) { units <- match.arg(units) x <- as.POSIXlt(x) isdst <- x$isdst if(length(x$sec) > 0) switch(units, "secs" = {x$sec <- trunc(x$sec)}, "mins" = {x$sec <- 0}, "hours"= {x$sec <- 0; x$min <- 0}, "days" = {x$sec <- 0; x$min <- 0; x$hour <- 0; isdst <- x$isdst <- -1}, "months" = { x$sec <- 0 x$min <- 0 x$hour <- 0 x$mday <- 1 isdst <- x$isdst <- -1 }, "years" = { x$sec <- 0 x$min <- 0 x$hour <- 0 x$mday <- 1 x$mon <- 0 isdst <- x$isdst <- -1 } ) x <- as.POSIXlt(as.POSIXct(x)) if(isdst == -1) { x$isdst <- -1 } return(x) } ceil <- function(x, units, ...) { UseMethod('ceil', x) } ceil.default <- function(x, units, ...) { ceiling(x) } ceil.POSIXt <- function(x, units=c("secs", "mins", "hours", "days", "months", "years"), ...) { units <- match.arg(units) x <- as.POSIXlt(x) isdst <- x$isdst if(length(x$sec) > 0 && x != truncPOSIXt(x, units=units)) { switch(units, "secs" = { x$sec <- ceiling(x$sec) }, "mins" = { x$sec <- 0 x$min <- x$min + 1 }, "hours"= {x$sec <- 0; x$min <- 0; x$hour <- x$hour + 1}, "days" = { x$sec <- 0 x$min <- 0 x$hour <- 0 x$mday <- x$mday + 1 isdst <- x$isdst <- -1 }, "months" = { x$sec <- 0 x$min <- 0 x$hour <- 0 x$mday <- 1 x$mon <- x$mon + 1 isdst <- x$isdst <- -1 }, "years" = { x$sec <- 0 x$min <- 0 x$hour <- 0 x$mday <- 1 x$mon <- 0 x$year <- x$year + 1 isdst <- x$isdst <- -1 } ) x <- as.POSIXlt(as.POSIXct(x)) if(isdst == -1) { x$isdst <- -1 } } return(x) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/data.frame.labelled.s�����������������������������������������������������������������������0000644�0001762�0000144�00000000444�12243661443�015456� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## For every object in a data frame that has a 'label' attribute, make it ## class 'labelled' data.frame.labelled <- function(object) { for(n in names(object)) if(length(attr(object[[n]],'label'))) attr(object[[n]],'class') <- c('labelled',attr(object[[n]],'class')) object } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/src.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000000742�12243661443�012501� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##Function to source(x) if x is given, or source(last x given) otherwise ##Last x is stored in options() last.source. x is unquoted with .s omitted. ##Author: Frank Harrell 19May91 src <- function(x) { if(!missing(x)) { y <- paste(as.character(substitute(x)),".s",sep="") options(last.source=y, TEMPORARY=FALSE) } else y <- options()$last.source if(is.null(y)) stop("src not called with file name earlier") source(y) cat(y, "loaded\n") invisible() } ������������������������������Hmisc/R/reformM.r�����������������������������������������������������������������������������������0000644�0001762�0000144�00000002673�12700234337�013321� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Function to automatically reorder variables for use with aregImpute # Modification of function written by Yong Hao Pua # Specify nperm=integer to obtain that many formulae with random # permutation of order of variables; omit nperm to obtain formula # with variables sorted by decreasing number of NAs reformM <- function(formula, data, nperm) { cs <- all.vars(formula) data <- data[, cs] ismiss <- function(x) if(is.character(x)) is.na(x) | x=='' else is.na(x) m <- sapply(data, ismiss) miss.per.obs <- apply(m, 1, sum) miss.per.var <- apply(m, 2, sum) # Percent of observations with any missings: pm <- ceiling(100 * sum(miss.per.obs > 0) / length(miss.per.obs)) nimpute = max(5, pm) cat("Recommended number of imputations:", nimpute, "\n") if(missing(nperm)) { j <- order(miss.per.var, decreasing=TRUE) # var with highest NA freq first formula <- as.formula(paste('~', paste(cs[j], collapse=' + '))) } else { formula <- list() prev <- character(0) # Could add logic to sample() until permutation is different from # all previous permutations for(i in 1 : nperm) { ## Try up to 10 times to get a unique permutation for(j in 1 : 10) { f <- paste('~', paste(sample(cs), collapse=' + ')) if(f %nin% prev) { prev <- c(prev, f) formula[[i]] <- as.formula(f) break } } } if(nperm == 1) formula <- formula[[1]] } formula } ���������������������������������������������������������������������Hmisc/R/Merge.r�������������������������������������������������������������������������������������0000644�0001762�0000144�00000012242�14333753640�012751� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#' Merge Multiple Data Frames or Data Tables #' #' Merges an arbitrarily large series of data frames or data tables containing common \code{id} variables. Information about number of observations and number of unique \code{id}s in individual and final merged datasets is printed. The first data frame/table has special meaning in that all of its observations are kept whether they match \code{id}s in other data frames or not. For all other data frames, by default non-matching observations are dropped. The first data frame is also the one against which counts of unique \code{id}s are compared. Sometimes \code{merge} drops variable attributes such as \code{labels} and \code{units}. These are restored by \code{Merge}. #' #' @param \dots two or more dataframes or data tables #' @param id a formula containing all the identification variables such that the combination of these variables uniquely identifies subjects or records of interest. May be omitted for data tables; in that case the \code{key} function retrieves the id variables. #' @param all set to \code{FALSE} to drop observations not found in second and later data frames (only applies if not using \code{data.table}) #' @param verbose set to \code{FALSE} to not print information about observations #' @export #' @examples #' \dontrun{ #' a <- data.frame(sid=1:3, age=c(20,30,40)) #' b <- data.frame(sid=c(1,2,2), bp=c(120,130,140)) #' d <- data.frame(sid=c(1,3,4), wt=c(170,180,190)) #' all <- Merge(a, b, d, id = ~ sid) #' # First file should be the master file and must #' # contain all ids that ever occur. ids not in the master will #' # not be merged from other datasets. #' a <- data.table(a); setkey(a, sid) #' # data.table also does not allow duplicates without allow.cartesian=TRUE #' b <- data.table(sid=1:2, bp=c(120,130)); setkey(b, sid) #' d <- data.table(d); setkey(d, sid) #' all <- Merge(a, b, d) #' } Merge <- function(..., id=NULL, all=TRUE, verbose=TRUE) { w <- list(...) nams <- (as.character(sys.call())[-1])[1 : length(w)] m <- length(nams) ## If argument is a function call, e.g., subset(mydata, age > 20) ## find name of first argument and omit any dollar sign prefix and [] for(i in 1 : m) { x <- nams[i] x <- gsub('subset\\(', '', x) x <- gsub(',.*', '', x) x <- gsub('\\[.*' , '', x) nams[i] <- gsub('(.*)\\$(.*)', '\\2', x) } d1 <- w[[1]] idt <- 'data.table' %in% class(d1) if(idt && ! requireNamespace("data.table", quietly = TRUE)) stop("The 'data.table' package is required to operate on data tables.") if(length(id)) id <- all.vars(id) else { if(! idt) stop('must specify id if not using data.tables') id <- key(d1) if(! length(id)) stop('id not given and first data table has no keys') } m <- length(w) va <- n <- nu <- integer(m) nin1 <- nnin1 <- rep(NA, m) did <- if(idt) d1[, id, with=FALSE] else d1[id] idc1 <- unique(as.character(interaction(did))) id.union <- id.intersection <- idc1 ## Unique variables, and their labels and units uvar <- lab <- un <- character(0) for(i in 1 : m) { d <- w[[i]] nd <- names(d) if(any(id %nin% nd)) stop(paste('data frame', nams[i], 'does not contain id variables', paste(id, collapse=', '))) j <- nd %nin% uvar uvar <- c(uvar, nd[j]) lab <- c(lab, sapply(d, label)[j]) un <- c(un, sapply(d, units)[j]) idt <- is.data.table(d) M <- if(i == 1) d else merge(M, d, by=id, all.x=TRUE, all.y=all) did <- if(idt) d[, id, with=FALSE] else d[id] idc <- unique(as.character(interaction(did))) di <- dim(d) va[i] <- di[2] n [i] <- di[1] nu[i] <- length(unique(idc)) if(i > 1) { nin1 [i] <- sum(idc %in% idc1) nnin1[i] <- sum(idc %nin% idc1) id.union <- union(id.union, idc) id.intersection <- intersect(id.intersection, idc) } } ## Restore labels and units if needed nm <- names(M) names(lab) <- uvar names(un ) <- uvar anych <- FALSE if(any(c(lab, un) != '')) for(i in 1 : ncol(M)) { x <- M[[i]] ni <- nm[i] changed <- FALSE if(ni %nin% names(lab)) stop(paste('Unexpected variable:', ni)) if(lab[ni] != '' && ! length(attr(x, 'label'))) { label(x) <- lab[ni] changed <- TRUE } if(un[ni] != '' && ! length(attr(x, 'units'))) { units(x) <- un[ni] changed <- TRUE } if(changed) M[[i]] <- x anych <- anych | changed } nams <- c(nams, 'Merged') va <- c(va, ncol(M)) n <- c(n, nrow(M)) did <- if(is.data.table(M)) M[, id, with=FALSE] else M[id] idc <- unique(as.character(interaction(did))) nu <- c(nu, length(unique(idc))) nin1 <- c(nin1, sum(idc %in% idc1)) nnin1 <- c(nnin1, sum(idc %nin% idc1)) info <- cbind(Vars=va, Obs=n, 'Unique IDs'=nu, 'IDs in #1'=nin1, 'IDs not in #1'=nnin1) rownames(info) <- nams if(verbose) { print(info) cat('\nNumber of unique IDs in any data frame :', length(id.union), '\n') cat( 'Number of unique IDs in all data frames:', length(id.intersection), '\n') if(anych) cat('\nLabels or units restored\n') } attr(M, 'info') <- info M } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/Key.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000001005�12622407253�012431� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������.tmpfunction <- function(...) stop("This function callback has not been defined yet") Key <- function(...) { .tmpfunction(...) } environment(Key) <- new.env() .setKey <- function(x) { environment(Key)$.tmpfunction <- x } Key2 <- function(...) .tmpfunction(...) environment(Key2) <- new.env() .setKey2 <- function(x) environment(Key2)$.tmpfunction <- x sKey <- function(...) .tmpfunction(...) environment(sKey) <- new.env() .setsKey <- function(x) environment(sKey)$.tmpfunction <- x ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/regexpEscape.s������������������������������������������������������������������������������0000644�0001762�0000144�00000000242�12243661443�014320� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������escapeBS <- function(string) { gsub('\\\\', '\\\\\\\\\\', string) } escapeRegex <- function(string) { gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', string) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/errbar.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000005627�13166554661�013206� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## From: geyer@galton.uchicago.edu ## Modified 11May91 FEH - added na.rm to range() ## Modified 12Jul91 FEH - added add=T and lty=1 parameters ## Modified 12Aug91 FEH - added explicit ylim parameter ## Modified 26Aug94 FEH - added explicit lwd parameter for segments() ## FEH 2Jul02 added horizontal charts with differences on 2nd axis errbar <- function(x, y, yplus, yminus, cap=.015, main=NULL, sub=NULL, xlab=as.character(substitute(x)), ylab=if(is.factor(x) || is.character(x)) '' else as.character(substitute(y)), add=FALSE, lty=1, type='p', ylim=NULL, lwd=1, pch=16, errbar.col=par("fg"), Type=rep(1,length(y)), ...) { if(is.null(ylim)) ylim <- range(y[Type==1], yplus[Type==1], yminus[Type==1], na.rm=TRUE) if(is.factor(x) || is.character(x)) { x <- as.character(x) n <- length(x) t1 <- Type==1 t2 <- Type==2 n1 <- sum(t1) n2 <- sum(t2) omai <- par('mai') mai <- omai mai[2] <- max(strwidth(x, 'inches')) + .25 par(mai=mai) on.exit(par(mai=omai)) plot(NA, NA, xlab=ylab, ylab='', xlim=ylim, ylim=c(1, n+1), axes=FALSE, main=main, sub=sub, ...) axis(1) w <- if(any(t2)) n1+(1:n2)+1 else numeric(0) axis(2, at=c(seq.int(length.out=n1), w), labels=c(x[t1], x[t2]), las=1, adj=1) points(y[t1], seq.int(length.out=n1), pch=pch, type=type, ...) segments(yplus[t1], seq.int(length.out=n1), yminus[t1], seq.int(length.out=n1), lwd=lwd, lty=lty, col=errbar.col) if(any(Type==2)) { abline(h=n1+1, lty=2, ...) offset <- mean(y[t1]) - mean(y[t2]) if(min(yminus[t2]) < 0 & max(yplus[t2]) > 0) lines(c(0,0)+offset, c(n1+1,par('usr')[4]), lty=2, ...) points(y[t2] + offset, w, pch=pch, type=type, ...) segments(yminus[t2] + offset, w, yplus[t2] + offset, w, lwd=lwd, lty=lty, col=errbar.col) at <- pretty(range(y[t2], yplus[t2], yminus[t2])) axis(side=3, at=at + offset, labels=format(round(at, 6))) } return(invisible()) } if(add) points(x, y, pch=pch, type=type, ...) else plot(x, y, ylim=ylim, xlab=xlab, ylab=ylab, pch=pch, type=type, ...) xcoord <- par()$usr[1:2] smidge <- cap * ( xcoord[2] - xcoord[1] ) / 2 segments(x, yminus, x, yplus , lty=lty, lwd=lwd, col=errbar.col) if(par()$xlog) { xstart <- x * 10 ^ (-smidge) xend <- x * 10 ^ (smidge) } else { xstart <- x - smidge xend <- x + smidge } segments( xstart, yminus, xend, yminus, lwd=lwd, lty=lty, col=errbar.col) segments( xstart, yplus, xend, yplus, lwd=lwd, lty=lty, col=errbar.col) return(invisible()) } ���������������������������������������������������������������������������������������������������������Hmisc/R/t.test.cluster.s����������������������������������������������������������������������������0000644�0001762�0000144�00000007122�13067147751�014620� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t.test.cluster <- function(y, cluster, group, conf.int=.95) { ## See: ## Donner A, Birkett N, Buck C, Am J Epi 114:906-914, 1981. ## Donner A, Klar N, J Clin Epi 49:435-439, 1996. ## Hsieh FY, Stat in Med 8:1195-1201, 1988. group <- as.factor(group) cluster <- as.factor(cluster) s <- !(is.na(y)|is.na(cluster)|is.na(group)) y <- y[s]; cluster <- cluster[s]; group <- group[s] n <- length(y) if(n<2) stop("n<2") gr <- levels(group) if(length(gr)!=2) stop("must have exactly two treatment groups") n <- table(group) nc <- tapply(cluster, group, function(x)length(unique(x))) bar <- tapply(y, group, mean) u <- unclass(group) y1 <- y[u==1]; y2 <- y[u==2] c1 <- factor(cluster[u==1]); c2 <- factor(cluster[u==2]) #factor rids unused lev b1 <- tapply(y1, c1, mean); b2 <- tapply(y2, c2, mean) m1 <- table(c1); m2 <- table(c2) if(any(names(m1)!=names(b1))) stop("logic error 1") if(any(names(m2)!=names(b2))) stop("logic error 2") if(any(m2 < 2)) stop(paste('The following clusters contain only one observation:', paste(names(m2[m2 < 2]), collapse=' '))) M1 <- mean(y1); M2 <- mean(y2) ssc1 <- sum(m1*((b1-M1)^2)); ssc2 <- sum(m2*((b2-M2)^2)) if(nc[1]!=length(m1)) stop("logic error 3") if(nc[2]!=length(m2)) stop("logic error 4") df.msc <- sum(nc)-2 msc <- (ssc1+ssc2)/df.msc v1 <- tapply(y1,c1,var); v2 <- tapply(y2,c2,var) ssw1 <- sum((m1-1)*v1); ssw2 <- sum((m2-1)*v2) df.mse <- sum(n)-sum(nc) mse <- (ssw1+ssw2)/df.mse na <- (sum(n)-(sum(m1^2)/n[1]+sum(m2^2)/n[2]))/(sum(nc)-1) rho <- (msc-mse)/(msc+(na-1)*mse) r <- max(rho, 0) C1 <- sum(m1*(1+(m1-1)*r))/n[1] C2 <- sum(m2*(1+(m2-1)*r))/n[2] v <- mse*(C1/n[1]+C2/n[2]) v.unadj <- mse*(1/n[1]+1/n[2]) de <- v/v.unadj dif <- diff(bar) se <- sqrt(v) zcrit <- qnorm((1+conf.int)/2) cl <- c(dif-zcrit*se, dif+zcrit*se) z <- dif/se P <- 2*pnorm(-abs(z)) stats <- matrix(NA, nrow=20, ncol=2, dimnames=list(c("N","Clusters","Mean", "SS among clusters within groups", "SS within clusters within groups", "MS among clusters within groups","d.f.", "MS within clusters within groups","d.f.", "Na","Intracluster correlation", "Variance Correction Factor","Variance of effect", "Variance without cluster adjustment","Design Effect", "Effect (Difference in Means)", "S.E. of Effect",paste(format(conf.int),"Confidence limits"), "Z Statistic","2-sided P Value"), gr)) stats[1,] <- n stats[2,] <- nc stats[3,] <- bar stats[4,] <- c(ssc1, ssc2) stats[5,] <- c(ssw1, ssw2) stats[6,1] <- msc stats[7,1] <- df.msc stats[8,1] <- mse stats[9,1] <- df.mse stats[10,1] <- na stats[11,1] <- rho stats[12,] <- c(C1, C2) stats[13,1] <- v stats[14,1] <- v.unadj stats[15,1] <- de stats[16,1] <- dif stats[17,1] <- se stats[18,] <- cl stats[19,1] <- z stats[20,1] <- P attr(stats,'class') <- "t.test.cluster" stats } print.t.test.cluster <- function(x, digits, ...) { ## if(!missing(digits)).Options$digits <- digits 6Aug00 if(!missing(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } cstats <- t(apply(x,1,format)) ## cstats <- format(x) attr(cstats,'class') <- NULL cstats[is.na(x)] <- "" invisible(print(cstats, quote=FALSE)) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/deff.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000001164�12243661443�012615� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������deff <- function(y, cluster) { ss <- function(x) { n <- length(x) xbar <- sum(x) / n sum((x - xbar)^2) } if(!is.factor(cluster)) cluster <- as.factor(cluster) cluster <- unclass(cluster) s <- !is.na(cluster + y) y <- y[s] cluster <- as.integer(cluster[s]) n <- length(y) sst <- ss(y) sses <- tapply(y,cluster,ss) k <- length(sses) R2 <- 1 - sum(sses) / sst Fstat <- R2 * (n - k) / (1 - R2) / k g <- (Fstat - 1.) * k / n rho <- if(R2 == 1.) 1. else g / (1. + g) ng <- table(cluster) B <- sum(ng^2) / n deff <- 1 + (B - 1) * rho c(n=n, clusters=k, rho=rho, deff=deff) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/gbayesSeqSim.r������������������������������������������������������������������������������0000644�0001762�0000144�00000023517�14112727067�014314� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Simulate Bayesian Sequential Treatment Comparisons Using a Gaussian Model ##' ##' Simulate a sequential trial under a Gaussian model for parameter estimates, and Gaussian priors using simulated estimates and variances returned by `estSeqSim`. For each row of the data frame `est` and for each prior/assertion combination, computes the posterior probability of the assertion. ##' @title gbayesSeqSim ##' @param est data frame created by `estSeqSim()` ##' @param asserts list of lists. The first element of each list is the user-specified name for each assertion/prior combination, e.g., `"efficacy"`. The other elements are, in order, a character string equal to "<", ">", or "in", a parameter value `cutoff` (for "<" and ">") or a 2-vector specifying an interval for "in", and either a prior distribution mean and standard deviation named `mu` and `sigma` respectively, or a parameter value (`"cutprior"`) and tail area `"tailprob"`. If the latter is used, `mu` is assumed to be zero and `sigma` is solved for such that P(parameter > 'cutprior') = P(parameter < - 'cutprior') = `tailprob`. ##' @return a data frame with number of rows equal to that of `est` with a number of new columns equal to the number of assertions added. The new columns are named `p1`, `p2`, `p3`, ... (posterior probabilities), `mean1`, `mean2`, ... (posterior means), and `sd1`, `sd2`, ... (posterior standard deviations). The returned data frame also has an attribute `asserts` added which is the original `asserts` augmented with any derived `mu` and `sigma` and converted to a data frame, and another attribute `alabels` which is a named vector used to map `p1`, `p2`, ... to the user-provided labels in `asserts`. ##' @author Frank Harrell ##' @seealso `gbayes()`, `estSeqSim()`, `simMarkovOrd()`, `estSeqMarkovOrd()` ##' @examples ##' \dontrun{ ##' # Simulate Bayesian operating characteristics for an unadjusted ##' # proportional odds comparison (Wilcoxon test) ##' # For 100 simulations, 5 looks, 2 true parameter values, and ##' # 2 assertion/prior combinations, compute the posterior probability ##' # Use a low-level logistic regression call to speed up simuluations ##' # Use data.table to compute various summary measures ##' # Total simulation time: 2s ##' lfit <- function(x, y) { ##' f <- rms::lrm.fit(x, y) ##' k <- length(coef(f)) ##' c(coef(f)[k], vcov(f)[k, k]) ##' } ##' gdat <- function(beta, n1, n2) { ##' # Cell probabilities for a 7-category ordinal outcome for the control group ##' p <- c(2, 1, 2, 7, 8, 38, 42) / 100 ##' ##' # Compute cell probabilities for the treated group ##' p2 <- pomodm(p=p, odds.ratio=exp(beta)) ##' y1 <- sample(1 : 7, n1, p, replace=TRUE) ##' y2 <- sample(1 : 7, n2, p2, replace=TRUE) ##' list(y1=y1, y2=y2) ##' } ##' ##' # Assertion 1: log(OR) < 0 under prior with prior mean 0.1 and sigma 1 on log OR scale ##' # Assertion 2: OR between 0.9 and 1/0.9 with prior mean 0 and sigma computed so that ##' # P(OR > 2) = 0.05 ##' asserts <- list(list('Efficacy', '<', 0, mu=0.1, sigma=1), ##' list('Similarity', 'in', log(c(0.9, 1/0.9)), ##' cutprior=log(2), tailprob=0.05)) ##' ##' set.seed(1) ##' est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200), ##' gendat=gdat, ##' fitter=lfit, nsim=100) ##' z <- gbayesSeqSim(est, asserts) ##' head(z) ##' attr(z, 'asserts') ##' ##' # Compute the proportion of simulations that hit targets (different target posterior ##' # probabilities for efficacy vs. similarity) ##' ##' # For the efficacy assessment compute the first look at which the target ##' # was hit (set to infinity if never hit) ##' require(data.table) ##' z <- data.table(z) ##' u <- z[, .(first=min(p1 > 0.95])), by=.(parameter, sim)] ##' # Compute the proportion of simulations that ever hit the target and ##' # that hit it by the 100th subject ##' u[, .(ever=mean(first < Inf)), by=.(parameter)] ##' u[, .(by75=mean(first <= 100)), by=.(parameter)] ##' } ##' @md gbayesSeqSim <- function(est, asserts) { nas <- length(asserts) alabels <- character(nas) for(i in 1 : nas) { a <- asserts[[i]] nam <- names(a) if(nam[1] == '') nam[1] <- 'label' if(nam[2] == '') nam[2] <- 'dir' if(nam[3] == '') nam[3] <- 'cutoff' names(a) <- nam if((length(a$cutoff) == 2) != (a$dir == 'in')) stop('mismatch of direction and length of cutoff in asserts') if(any(c('mu', 'sigma') %nin% names(a))) { a$mu <- 0 a$sigma <- - a$cutprior / qnorm(a$tailprob) if(a$sigma <= 0) stop('error in specification of cutoff or tailprob') } w <- format(round(a$cutoff, 3)) a$assertion <- paste(if(a$dir %in% c('<','>')) a$dir, if(length(a$cutoff) == 2) paste0('[', w[1], ',', w[2], ']') else w[1]) asserts[[i]] <- a alabels[i] <- a$label } N <- nrow(est) ests <- est$est vests <- est$vest ## For each simulated parameter estimate and variance compute nas ## posterior probabilities for(i in 1 : nas) { a <- asserts[[i]] dir <- a$dir cutoff <- a$cutoff mu <- a$mu sigma2 <- (a$sigma) ^ 2 var.post <- 1. / (1. / sigma2 + 1. / vests) mean.post <- (mu / sigma2 + ests / vests) * var.post sd.post <- sqrt(var.post) pp <- switch(dir, '<' = pnorm(cutoff, mean.post, sd.post), '>' = pnorm(cutoff, mean.post, sd.post, lower.tail=FALSE), 'in' = pnorm(cutoff[2], mean.post, sd.post) - pnorm(cutoff[1], mean.post, sd.post)) label(pp) <- a$label est[[paste0('p', i)]] <- pp est[[paste0('mean', i)]] <- mean.post est[[paste0('sd', i)]] <- sd.post } a <- asserts w <- NULL for(j in 1 : nas) { a <- asserts[[j]] a$dir <- a$cutoff <- NULL if(any(c('cutprior', 'tailprob') %nin% names(a))) a$cutprior <- a$tailprob <- NA w <- rbind(w, as.data.frame(a)) } attr(est, 'asserts') <- w names(alabels) <- paste0('p', 1 : nas) attr(est, 'alabels') <- alabels est } ##' Simulate Comparisons For Use in Sequential Clinical Trial Simulations ##' ##' Simulates sequential clinical trials. Looks are done sequentially at observation numbers given in the vector `looks` with the earliest possible look being at observation 2. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function `gendat` that given a true effect of `parameter` and the two sample sizes (for treatment groups 1 and 2) returns a list with vectors `y1` and `y2` containing simulated data. The user also provides a function `fitter` with arguments `x` (group indicator 0/1) and `y` (response variable) that returns a 2-vector containing the effect estimate and its variance. `parameter` is usually on the scale of a regression coefficient, e.g., a log odds ratio. ##' @title estSeqSim ##' @param parameter vector of true parameter (effects; group differences) values ##' @param looks integer vector of observation numbers at which posterior probabilities are computed ##' @param gendat a function of three arguments: true parameter value (scalar), sample size for first group, sample size for second group ##' @param fitter a function of two arguments: 0/1 group indicator vector and the dependent variable vector ##' @param nsim number of simulations (default is 1) ##' @param progress set to `TRUE` to send current iteration number to the console ##' @return a data frame with number of rows equal to the product of `nsim`, the length of `looks`, and the length of `parameter`. ##' @author Frank Harrell ##' @seealso `gbayesSeqSim()`, `simMarkovOrd()`, `estSeqMarkovOrd()` ##' @examples ##' if (requireNamespace("rms", quietly = TRUE)) { ##' # Run 100 simulations, 5 looks, 2 true parameter values ##' # Total simulation time: 2s ##' lfit <- function(x, y) { ##' f <- rms::lrm.fit(x, y) ##' k <- length(coef(f)) ##' c(coef(f)[k], vcov(f)[k, k]) ##' } ##' gdat <- function(beta, n1, n2) { ##' # Cell probabilities for a 7-category ordinal outcome for the control group ##' p <- c(2, 1, 2, 7, 8, 38, 42) / 100 ##' ##' # Compute cell probabilities for the treated group ##' p2 <- pomodm(p=p, odds.ratio=exp(beta)) ##' y1 <- sample(1 : 7, n1, p, replace=TRUE) ##' y2 <- sample(1 : 7, n2, p2, replace=TRUE) ##' list(y1=y1, y2=y2) ##' } ##' ##' set.seed(1) ##' est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200), ##' gendat=gdat, ##' fitter=lfit, nsim=100) ##' head(est) ##' } ##' @md estSeqSim <- function(parameter, looks, gendat, fitter, nsim=1, progress=FALSE) { looks <- sort(looks) nlook <- length(looks) N <- max(looks) np <- length(parameter) nc <- nsim * nlook * np parm <- est <- vest <- numeric(nc) look <- sim <- integer(nc) ## For each simulation and each parameter value, simulate data for the ## whole study is <- 0 for(isim in 1 : nsim) { if(progress) cat('Simulation', isim, '\r') for(param in parameter) { X <- sample(0 : 1, N, replace=TRUE) dat <- gendat(param, sum(X == 0), sum(X == 1)) Y <- rep(NA, N) Y[X == 0] <- dat$y1 Y[X == 1] <- dat$y2 ## For each look compute the parameter estimate and its variance for(l in looks) { f <- fitter(X[1 : l], Y[1 : l]) is <- is + 1 sim[is] <- isim parm[is] <- param look[is] <- l est[is] <- f[1] vest[is] <- f[2] } # end looks } # end param } # end sim if(progress) cat('\n') data.frame(sim=sim, parameter=parm, look=look, est=est, vest=vest) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/varclus.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000024073�13306557037�013400� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������varclus <- function(x, similarity=c("spearman","pearson","hoeffding", "bothpos","ccbothpos"), type=c("data.matrix","similarity.matrix"), method="complete", data=NULL, subset=NULL, na.action=na.retain, trans=c("square", "abs", "none"), ...) { call <- match.call() type <- match.arg(type) if(type != "similarity.matrix") similarity <- match.arg(similarity) trans <- match.arg(trans) nact <- NULL if(inherits(x,"formula")) { form <- x oldops <- options(contrasts=c("contr.treatment","contr.poly")) if(length(list(...))) data <- dataframeReduce(data, ...) x <- list(formula=form, data=data, na.action=na.action, subset=subset) x <- do.call('model.frame', x) nam <- names(x) nv <- length(x) Terms <- attr(x,'terms') nact <- attr(x,"na.action") x <- model.matrix(Terms, x) if(dimnames(x)[[2]][1]=='(Intercept)') x <- x[,-1] form <- TRUE options(oldops) type <- "data.matrix" } else form <- FALSE n <- NULL if(mode(x) != "numeric") stop("x matrix must be numeric") if(type == "data.matrix") { ## assume not a correlation matrix if(similarity %in% c("bothpos","ccbothpos")) { isthere <- 1*(! is.na(x)) x[is.na(x)] <- 0 x[x > 0] <- 1 n <- crossprod(isthere) x <- crossprod(x)/n if(similarity=='ccbothpos') { cc <- diag(x) %*% t(diag(x)) cc[row(cc)==col(cc)] <- 0 x <- x - cc } } else if(similarity=="hoeffding") { D <- hoeffd(x); x <- D$D; n <- D$n } else { D <- rcorr(x, type=similarity) x <- D$r x <- switch(trans, square = x^2, abs = abs(x), none = x) n <- D$n } } else if(diff(dim(x)) != 0) stop("x must be square to be a similarity matrix") if(any(is.na(x))) { cat("Part of the similarity matrix could not be computed:\n") x[x < .01] <- 0 print(x, digits=2) stop() } w <- if(similarity=='ccbothpos') NULL else hclust(as.dist(1-x), method=method) structure(list(call=call, sim=x, n=n, hclust=w, similarity=similarity, trans=trans, method=method, na.action=nact), class="varclus") } print.varclus <- function(x, abbrev=FALSE, ...) { dput(x$call); cat("\n") if(length(x$na.action)) naprint(x$na.action) trans <- x$trans s <- c(hoeffding="30 * Hoeffding D", spearman=switch(trans, square = "Spearman rho^2", abs = "|Spearman rho|", none = "Spearman rho"), pearson=switch(trans, square = "Pearson r^2", abs = "|Pearson r|", none = "Pearson r"), bothpos="Proportion", ccbothpos="Chance-Corrected Proportion")[x$similarity] cat("\nSimilarity matrix (",s,")\n\n",sep="") k <- x$sim lab <- dimnames(k)[[2]] if(abbrev) lab <- abbreviate(lab) dimnames(k) <- list(lab,lab) print.default(round(k, 2)) n <- x$n if(length(n)) { if(length(n) == 1) cat("\nNo. of observations used=", n,"\n\n") else { cat("\nNo. of observations used for each pair:\n\n") dimnames(n) <- list(lab,lab) print(n) } } cat("\nhclust results (method=",x$method,")\n\n",sep="") print(x$hclust) invisible() } plot.varclus <- function(x, ylab, abbrev=FALSE, legend.=FALSE, loc, maxlen=20, labels=NULL, ...) { trans <- x$trans if(missing(ylab)) { s <- c(hoeffding="30 * Hoeffding D", spearman=switch(trans, square = expression(paste(Spearman,~rho^2)), abs = expression(paste(Spearman,~abs(rho))), none = expression(paste(Spearman,~rho))), pearson=switch(trans, square = expression(paste(Pearson,~r^2)), abs = expression(paste(Pearson,~abs(r))), none = expression(paste(Pearson,~r))), bothpos="Proportion", ccbothpos="Chance-Corrected Proportion")[x$similarity] if((is.expression(s) && as.character(s)=='NULL') || (! is.expression(s) && (is.na(s) || s==''))) s <- x$similarity ylab <- s } if(legend.) abbrev <- TRUE if(! length(labels)) labels <- dimnames(x$sim)[[2]] olabels <- labels if(abbrev) labels <- abbreviate(labels) if(! length(x$hclust)) stop('clustering was not done on similarity="ccbothpos"') plot(x$hclust, labels=labels, ann=FALSE, axes=FALSE, ...) ya <- pretty(range(1 - x$hclust$height)) axis(2, at=1-ya, labels=format(ya)) title(ylab=ylab) s <- labels != olabels if(legend. && any(s)) { if(missing(loc)) { cat("Click mouse at upper left corner of legend\n") loc <- locator(1) } olabels <- ifelse(nchar(olabels)>maxlen, substring(olabels,1,maxlen), olabels) text(loc, paste(paste(labels[s],":",olabels[s],"\n"), collapse=""), adj=0) } invisible() } na.retain <- function(mf) mf naclus <- function(df, method="complete") { ismiss <- function(x) if(is.character(x)) is.na(x) | x=='' else is.na(x) na <- sapply(df, ismiss) * 1 n <- nrow(na) sim <- crossprod(na) / n res <- varclus(sim, type="similarity.matrix", similarity="Fraction Missing", method=method) na.per.obs <- apply(na, 1, sum) nc <- ncol(na) mean.na <- rep(NA, nc) names(mean.na) <- dimnames(na)[[2]] for(i in 1:nc) { y <- na[,i] == 1 if(any(y)) mean.na[i] <- mean(na.per.obs[y]) - 1 NULL } res$na.per.obs <- na.per.obs res$mean.na <- mean.na res } naplot <- function(obj, which=c('all','na per var','na per obs','mean na', 'na per var vs mean na'), ...) { which <- match.arg(which) tab <- table(obj$na.per.obs) na.per.var <- diag(obj$sim) names(na.per.var) <- dimnames(obj$sim)[[2]] mean.na <- obj$mean.na if(which %in% c('all','na per var')) dotchart2(sort(na.per.var), xlab='Fraction of NAs', main='Fraction of NAs in each Variable', ...) if(which %in% c('all','na per obs')) dotchart2(tab, auxdata=tab, xlab='Frequency', main='Number of Missing Variables Per Observation', ...) if(which %in% c('all','mean na')) dotchart2(sort(mean.na), xlab='Mean Number of NAs', main='Mean Number of Other Variables Missing for\nObservations where Indicated Variable is NA', ...) if(which %in% c('all','na per var vs mean na')) { xpd <- par('xpd') par(xpd=NA) on.exit(par(xpd=xpd)) plot(na.per.var, mean.na, xlab='Fraction of NAs for Single Variable', ylab='Mean # Other Variables Missing', type='p') usr <- par('usr') eps <- .015*diff(usr[1:2]); epsy <- .015*diff(usr[3:4]) s <- (1:length(na.per.var))[! is.na(mean.na)] taken.care.of <- NULL for(i in s) { if(i %in% taken.care.of) next w <- s[s > i & abs(na.per.var[s]-na.per.var[i]) < eps & abs(mean.na[s]-mean.na[i]) < epsy] if(any(w)) { taken.care.of <- c(taken.care.of, w) text(na.per.var[i]+eps, mean.na[i], paste(names(na.per.var[c(i,w)]),collapse='\n'),adj=0) } else text(na.per.var[i]+eps, mean.na[i], names(na.per.var)[i], adj=0) } } invisible(tab) } combine.levels <- function(x, minlev=.05) { x <- as.factor(x) notna <- sum(! is.na(x)) if(notna == 0) return(rep(NA, length(x))) lev <- levels(x) f <- table(x) / notna i <- f < minlev si <- sum(i) if(si == 0) return(x) comb <- if(si == 1) names(sort(f))[1 : 2] else names(f)[i] keepsep <- setdiff(names(f), comb) names(keepsep) <- keepsep w <- c(list(OTHER=comb), keepsep) levels(x) <- w x } plotMultSim <- function(s, x=1:dim(s)[3], slim=range(pretty(c(0,max(s,na.rm=TRUE)))), slimds=FALSE, add=FALSE, lty=par('lty'), col=par('col'), lwd=par('lwd'), vname=NULL, h=.5, w=.75, u=.05, labelx=TRUE, xspace=.35) { if(! length(vname)) vname <- dimnames(s)[[1]] p <- dim(s)[1] if(length(vname) != p) stop('wrong length for vname') if(p != dim(s)[2]) stop('similarity matrix not square') if(length(x) != dim(s)[3]) stop('length of x differs from extent of 3rd dimension of s') if(! add) { plot(c(-xspace,p+.5),c(.5,p+.25), type='n', axes=FALSE, xlab='',ylab='') if(labelx) text(1:p, rep(.6,p), vname, adj=.5) text(rep(.5,p), 1:p, vname, adj=1) } scaleit <- function(x, xlim, lim) lim[1] + (x - xlim[1]) / diff(xlim) * diff(lim) if(slimds) { slim.diag <- -1e10 for(k in 1:length(x)) { sk <- s[,,k] r <- max(diag(sk)) slim.diag <- max(slim.diag, r) } slim.diag <- range(pretty(c(0,slim.diag))) slim.offdiag <- slim.diag - diff(slim.diag)/2 } rx <- range(x) rxe <- c(rx[1] - u * diff(rx), rx[2] + u * diff(rx)) for(i in 1 : p) { for(j in 1 : p) { if((i == j) && all(s[i,j,] == 1)) next sl <- if(slimds) if(i==j) slim.diag else slim.offdiag else slim sle <- c(sl[1]-u*diff(sl), sl[2]+u*diff(sl)) if(! add) { lines(c(i-w/2,i+w/2,i+w/2, i-w/2,i-w/2), c(j-h/2,j-h/2,j+h/2, j+h/2,j-h/2), col=gray(.5), lwd=.65) xc <- rep(i-w/2-u/3,2) yc <- scaleit(sl, sle, c(j-h/2,j+h/2)) if(i==1 && j<=2) { text(xc, yc, format(sl,digits=2), adj=1, cex=.7) segments(rep(xc+u/8,2),yc, rep(xc+u/3,2),yc) } } lines(scaleit(x, rxe, c(i-w/2,i+w/2)), scaleit(s[i,j,], sle, c(j-h/2,j+h/2)), lty=lty, lwd=lwd, col=col) if(! add && slimds && (i != j)) lines(c(i-w/2,i+w/2), rep(scaleit(0, sle, c(j-h/2,j+h/2)),2), col=gray(.5)) } } invisible(slim) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/zoom.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000000732�13067150346�012675� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## Function to use the mouse to zoom in on plots. ## Author: Bill Dunlap <bill@STAT.WASHINGTON.EDU> zoom <- function(fun, ...) { on.exit(par(oldpar)) oldpar <- par('err') par(err = -1) fun(...) while(TRUE) { cat("Click mouse over corners of zoom area: ") p <- locator(n = 2) if(length(p$x) != 2) break xlim <- range(p$x) ylim <- range(p$y) cat("xlim=", xlim, "ylim=", ylim, "\n") fun(..., xlim=xlim, ylim=ylim) } cat("Bye! \n") } ��������������������������������������Hmisc/R/latexCheckOptions.r�������������������������������������������������������������������������0000644�0001762�0000144�00000001525�13211011725�015324� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������latexCheckOptions <- function(...) { if (any(sapply(options()[c("latexcmd","dviExtension","xdvicmd")], is.null))) stop("This example uses the pdflatex system command and R's pdf() graphics\n", "device and therefore requires that the three options\n", " options()[c(\"latexcmd\",\"dviExtension\",\"xdvicmd\")]\n", "all be set to non-NULL values. Please see the comments in the \"Details\"\n", "section of ?microplot::microplot for some recommendations, and the\n", "\"System options\" paragraph in the \"Details\" section of ?Hmisc::latex\n", "for full discussion of the options available and suggested values for\n", "several operating systems. If you wish to use the latex system command\n", "and a compatible graphics device, see the discussion in ?Hmisc::latex", call.=FALSE) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/combplotp.r���������������������������������������������������������������������������������0000644�0001762�0000144�00000026667�14247506234�013730� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#' Combination Plot #' #' Generates a plotly attribute plot given a series of possibly overlapping binary variables #' #' Similar to the \code{UpSetR} package, draws a special dot chart sometimes called an attribute plot that depicts all possible combination of the binary variables. By default a positive value, indicating that a certain condition pertains for a subject, is any of logical \code{TRUE}, numeric 1, \code{"yes"}, \code{"y"}, \code{"positive"}, \code{"+"} or \code{"present"} value, and all others are considered negative. The user can override this determination by specifying her own \code{pos} function. Case is ignored in the variable values. #' #' The plot uses solid dots arranged in a vertical line to indicate which combination of conditions is being considered. Frequencies of all possible combinations are shown above the dot chart. Marginal frequencies of positive values for the input variables are shown to the left of the dot chart. More information for all three of these component symbols is provided in hover text. #' #' Variables are sorted in descending order of marginal frqeuencies and likewise for combinations of variables. #' #' @param formula a formula containing all the variables to be cross-tabulated, on the formula's right hand side. There is no left hand side variable. If \code{formula} is omitted, then all variables from \code{data} are analyzed. #' @param data input data frame. If none is specified the data are assumed to come from the parent frame. #' @param subset an optional subsetting expression applied to \code{data} #' @param na.action see \code{lm} etc. #' @param vnames set to \code{"names"} to use variable names to label axes instead of variable labels. When using the default \code{labels}, any variable not having a label will have its name used instead. #' @param includenone set to \code{TRUE} to include the combination where all conditions are absent #' @param showno set to \code{TRUE} to show a light dot for conditions that are not part of the currently tabulated combination #' @param maxcomb maximum number of combinations to display #' @param minfreq if specified, any combination having a frequency less than this will be omitted from the display #' @param N set to an integer to override the global denominator, instead of using the number of rows in the data #' @param pos a function of vector returning a logical vector with \code{TRUE} values indicating positive #' @param obsname character string noun describing observations, default is \code{"subjects"} #' @param ptsize point size, defaults to 35 #' @param width width of \code{plotly} plot #' @param height height of \code{plotly} plot #' @param \dots optional arguments to pass to \code{table} #' #' @return \code{plotly} object #' @author Frank Harrell #' @examples #' if (requireNamespace("plotly")) { #' g <- function() sample(0:1, n, prob=c(1 - p, p), replace=TRUE) #' set.seed(2); n <- 100; p <- 0.5 #' x1 <- g(); label(x1) <- 'A long label for x1 that describes it' #' x2 <- g() #' x3 <- g(); label(x3) <- 'This is<br>a label for x3' #' x4 <- g() #' combplotp(~ x1 + x2 + x3 + x4, showno=TRUE, includenone=TRUE) #' #' n <- 1500; p <- 0.05 #' pain <- g() #' anxiety <- g() #' depression <- g() #' soreness <- g() #' numbness <- g() #' tiredness <- g() #' sleepiness <- g() #' combplotp(~ pain + anxiety + depression + soreness + numbness + #' tiredness + sleepiness, showno=TRUE) #' } #' @export combplotp <- function(formula, data=NULL, subset, na.action=na.retain, vnames=c('labels', 'names'), includenone=FALSE, showno=FALSE, maxcomb=NULL, minfreq=NULL, N=NULL, pos=function(x) 1 * (tolower(x) %in% c('true', 'yes', 'y', 'positive', '+', 'present', '1')), obsname='subjects', ptsize=35, width=NULL, height=NULL, ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") vnames <- match.arg(vnames) frac <- markupSpecs$html$frac fr2 <- function(a, b) paste0(frac(a, b), ' = ', round(a / b, 3)) Y <- if(missing(formula)) { if(! missing(subset)) stop('subset not allowed if formula missing') if(! length(data)) stop('data must be specified if formula missing') data } else { if(!missing(subset) && length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) } # Get variable labels, defaulting to variable names labs <- if(vnames == 'names') structure(names(Y), names=names(Y)) else { lbs <- sapply(Y, label) ifelse(lbs == '', names(Y), lbs) } # Convert Y to logical TRUE/FALSE Y <- lapply(Y, pos) # Compute marginal frequencies m <- sapply(Y, sum, na.rm=TRUE) # Sort variables in order of descending marginal frequency Y <- Y[order(m)] if(! length(N)) N <- length(Y[[1]]) # no. obs f <- as.data.frame(table(Y, ...)) f <- f[f$Freq > 0, ] # subset didn't work p <- ncol(f) - 1 # no. variables numcondpresent <- apply(f[, 1 : p], 1, function(u) sum(u == 1)) Nc <- sum(f$Freq[numcondpresent > 0]) # no. obs with any condition if(! includenone && any(numcondpresent == 0)) f <- f[numcondpresent > 0, ] # Sort combinations in descending order of frequency # Tie-breaker is row order when a combination has only one condition mdesc <- sort(m) mdesc <- 1 : length(mdesc) names(mdesc) <- names(sort(m)) g <- function(x) { i <- x > 0 ifelse(sum(i) == 1, mdesc[names(x)[i]], 0) } tiebr <- apply(f[, 1 : p], 1, g) i <- order(-f$Freq, -tiebr) f <- f[i, ] if(length(maxcomb) && maxcomb < nrow(f)) f <- f[1 : maxcomb, ] if(length(minfreq) && any(f$Freq < minfreq)) f <- f[f$Freq >= minfreq, ] n <- nrow(f) # no. combinations X <- as.matrix(1 * (f[, 1 : p] == '1')) Freq <- f$Freq # String out information x <- y <- present <- txt <- xn <- frq <- NULL namx <- colnames(X) for(i in 1 : n) { x <- c(x, rep(i, p)) y <- c(y, 1 : p) xi <- X[i, ] present <- c(present, xi) namespres <- if(! any(xi == 1)) 'none' else paste(labs[namx][xi == 1], collapse='<br>') k <- Freq[i] tx <- paste0('<b>', namespres, '</b><br>', '<br>Count: ', k, '<br>Fraction of ', obsname, ': ', fr2(k, N), '<br>Fraction of ', obsname, ' w/any cond: ', fr2(k, Nc)) txt <- c(txt, rep(tx, p)) xn <- c(xn, namx) frq <- c(frq, rep(k, p)) } txt <- paste0(txt, '<br>Fraction of ', obsname, ' w/', namx[y], ': ', fr2(frq, m[namx[y]])) hdc <- plotlyParm$heightDotchartb if(! length(height)) height <- hdc(c(labs, '', ''), low=250, per=30) if(! length(width)) { # Find longest html line in labs w <- unlist(strsplit(labs, '<br>')) longest <- w[which.max(nchar(w))] nlongest <- nchar(longest) width <- hdc(rep('X', n), per=23, low=450) + 8 * nlongest } auto <- .Options$plotlyauto if(length(auto) && auto) {height <- width <- NULL} P <- plotly::plot_ly(height=height, width=width) # Add grid lines to take control of their span yy <- 1 : p P <- plotly::add_segments(P, x = ~ rep(-2, p), xend = ~ rep(n, p), y = ~ 1 : p, yend = ~ 1 : p, color = I('gray80'), line=list(width=0.75), hoverinfo='none', showlegend=FALSE) P <- plotly::add_segments(P, x = ~ 1 : n, xend = ~ 1 : n, y = ~ rep(1, n), yend = ~ rep(p + 1.5, n), color = I('gray80'), line=list(width=0.75), hoverinfo='none', showlegend=FALSE) # Show main result as dot chart P <- plotly::add_markers(P, x = ~ x[present == 1], y = ~ y[present == 1], text = ~ txt[present == 1], hoverinfo='text', color=I('black'), size=I(ptsize), showlegend=FALSE) if(showno) P <- plotly::add_markers(P, x = ~ x[present == 0], y = ~ y[present == 0], hoverinfo='none', color=I('gray90'), size=I(ptsize), showlegend=FALSE) # Add a trace showing marginal frequencies on the left as segments relfreq <- m[namx] / max(m) tmf <- paste0('<b>', labs[namx], '</b><br><br>Marginal count: ', m[namx], '<br>Fraction of ', obsname, ': ', fr2(m[namx], N), '<br>Fraction of ', obsname, ' w/any cond: ', fr2(m[namx], Nc)) P <- plotly::add_segments(P, x = ~ rep(0, p), xend= ~ -2 * relfreq, # y = ~ labs[namx], yend ~ labs[namx], y = ~ 1 : p, yend ~ 1 : p, text = ~ tmf, hoverinfo='text', color=I('blue'), name='Marginal Counts', showlegend=TRUE, line=list(width=3) ) # Add a trace showing the individual combination frequencies on top relfreqc <- Freq / max(Freq) nn <- 1 : n xi <- X[i, ] present <- c(present, xi) namespres <- if(! any(xi == 1)) 'none' else paste(labs[namx][xi == 1], collapse='<br>') txtc <- character(n) for(i in 1 : n) { xi <- X[i, ] txtc[i] <- if(! any(xi == 1)) 'none' else paste(labs[namx][xi == 1], collapse='<br>') } txtc <- paste0('<b>', txtc, '</b>', '<br><br>Count: ', Freq, '<br>Fraction of ', obsname, ': ', fr2(Freq, N), '<br>Fraction of ', obsname, ' w/any cond: ', fr2(Freq, Nc)) P <- plotly::add_segments(P, x = ~ nn, xend = ~ nn, y = ~ rep(p + 0.5, n), yend = ~ p + 0.5 + relfreqc, text = ~ txtc, hoverinfo='text', color=I('black'), name='Combination Counts', showlegend=TRUE, line=list(width=3)) # Add variable labels as annotations P <- plotly::add_text(P, x = ~ rep(n + 0.7, p), y = 1 : p, text = ~ labs[namx], textposition="middle right", hoverinfo='none', showlegend=FALSE) # leave extra space for long label P <- plotly::layout(P, xaxis = list(title='', tickvals=1 : n, range=c(-2, n + 0.4 * nlongest), showgrid=FALSE, showticklabels=FALSE, zeroline=FALSE), yaxis = list(title='', tickvals=1 : p, showgrid=FALSE, showticklabels=FALSE), legend= list(x=0.5, y=0, xanchor='center', yanchor='top', orientation='h')) P } �������������������������������������������������������������������������Hmisc/R/html.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000100220�14367744441�012657� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html <- function(object, ...) UseMethod('html') html.latex <- function(object, file, where=c('cwd', 'tmp'), method=c('hevea', 'htlatex'), rmarkdown=FALSE, cleanup=TRUE, ...) { where <- match.arg(where) method <- match.arg(method) if(where == 'tmp') cleanup <- FALSE if(rmarkdown && ! missing(file)) warning('do not specify file when rmarkdown=TRUE') fi <- object$file fibase <- gsub('\\.tex', '', fi) if(missing(file)) file <- paste(fibase, 'html', sep='.') if(rmarkdown) file <- character(0) toConsole <- ! length(file) || file == '' ehtml = function(content) { # Thanks to Yihui if(! requireNamespace('htmltools', quietly=TRUE)) stop('htmltools package not installed') content = htmltools::HTML(gsub('^.*?<body\\s*>|</body>.*$', '', content)) ss <- paste(fibase, '-enclosed.css', sep='') src <- switch(where, cwd=getwd(), tmp=tempdir()) d = htmltools::htmlDependency( 'TeX4ht', '1.0.0', src = src, stylesheet = ss) htmltools::attachDependencies(content, d) } sty <- object$style if(length(sty)) sty <- paste('\\usepackage{', unique(sty), '}', sep='') tmp <- switch(where, cwd = paste(fibase, 'enclosed', sep='-'), tmp = tempfile()) tmptex <- paste(tmp, 'tex', sep='.') infi <- readLines(fi) cat('\\documentclass{report}', sty, if(method == 'hevea') '\\def\\tabularnewline{\\\\}', '\\begin{document}', infi, '\\end{document}\n', file=tmptex, sep='\n') sc <- if(.Platform$OS.type == 'unix') ';' else '&' ## Create system call to convert enclosed latex file to html. cmd <- if(missing(file) || ! length(file) || file == '') paste(optionsCmds(method), shQuote(tmptex)) else paste(optionsCmds(method), '-o', file, shQuote(tmptex)) ## perform system call sys(cmd) if(method == 'hevea' && ! toConsole) { ## Remove 2 bottom lines added by HeVeA infi <- readLines(file) i <- grep('<hr style="height:2"><blockquote class="quote"><em>This document was translated from L<sup>A</sup>T<sub>E</sub>X by', infi) i <- c(i, grep('</em><a href="http://hevea.inria.fr/index.html"><em>H</em><em><span style="font-size:small"><sup>E</sup></span></em><em>V</em><em><span style="font-size:small"><sup>E</sup></span></em><em>A</em></a><em>.</em></blockquote></body>', infi)) if(length(i)) { infi <- infi[- i] writeLines(infi, file) } if(cleanup) { bf <- gsub('\\.html', '', file) unlink(c(paste(bf, 'haux', sep='.'), paste(bf, 'enclosed.tex', sep='-'))) } return(structure(list(file=file), class='html')) } if(cleanup && method == 'htlatex') unlink(paste(tmp, c('tex', 'tmp','idv','lg','4tc','aux','dvi','log', 'xref','4ct'), sep='.')) if(rmarkdown || toConsole) { w <- readLines(paste(tmp, 'html', sep='.')) if(rmarkdown) return(ehtml(w)) if(! length(file)) return(paste(w, collapse='\n')) cat(w, sep='\n') return(invisible()) } structure(list(file=file), class='html') } html.data.frame <- function(object, file=paste(first.word(deparse(substitute(object))), 'html',sep='.'), header, caption=NULL, rownames=FALSE, align='r', align.header='c', bold.header=TRUE, col.header='Black', border=2, width=NULL, size=100, translate=FALSE, append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), ...) { linkType <- match.arg(linkType) mu <- markupSpecs$html tr <- c(c='center', l='left', r='right') align <- tr[align] align.header <- tr[align.header] trans <- if(translate) htmlTranslate else function(x) x x <- as.matrix(object) # for(i in 1:ncol(x)) { # xi <- x[,i] # if(is.numeric(object[,i])) # x[,i] <- paste0('<div align=right>', xi, '</div>') # } if(rownames && length(r <- rownames(x))) x <- cbind(Name=as.character(r), x) b <- c('border: 1px solid gray;', 'border-collapse: collapse;') ## Give style a name hmisctablexxx where xxx is a random 6-digit integer ## because if you reuse the same style in the same document, style ## elements will affect tables that preceeded this one sn <- paste0('hmisctable', floor(runif(1, 100000, 999999))) psn <- paste0('.', sn) ## Duplicate specifications because can't get any single one to work lua <- length(unique(align)) sty <- c('<style>', paste0(psn, ' {'), if(border == 0) 'border: none;' else b, paste0('font-size: ', size, '%;'), '}', paste0(psn, ' td {'), if(lua == 1) paste0('text-align: ', align, ';'), 'padding: 0 1ex 0 1ex;', ## top left bottom right '}', paste0(psn, ' th {'), paste0('color: ', col.header, ';'), paste0('text-align: ', align.header, ';'), 'padding: 0 1ex 0 1ex;', if(bold.header) 'font-weight: bold;' else 'font-weight: normal;', '}', '</style>') R <- c(sty, paste0('<table class="', sn, '"', if(length(width) == 1) paste0(' width="', width, '"'), if(border == 1) ' border="0"', if(border == 2) ' border="1"', '>')) if(length(caption)) R <- c(R, paste0('<caption>', mu$lcap(caption), '</caption>')) if(missing(header)) header <- colnames(x) if(length(header)) { head <- trans(header) head <- paste0('<th>', head, '</th>') head <- paste0('<tr>', paste(head, collapse=''), '</tr>') R <- c(R, head) } if(length(link)) { if(is.matrix(link)) for(j in 1 : ncol(x)) x[, j] <- ifelse(link[, j] == '', x[, j], paste0('<a ', linkType, '="', link[, j], '">', trans(x[, j]), '</a>')) # x[link != ''] <- paste('<a ',linkType,'="', link[link!=''],'">', # trans(x[link != '']), '</a>', sep='') else x[,linkCol] <- ifelse(link == '', trans(x[, linkCol]), paste0('<a ',linkType, '="', link, '">', trans(x[, linkCol]), '</a>')) } for(i in 1 : nrow(x)) { rowt <- if(lua == 1) paste0('<td>', x[i, ], '</td>') else paste0('<td style="text-align:', align, ';">', x[i, ], '</td>') R <- c(R, paste0('<tr>', paste(rowt, collapse=''), '</tr>')) } R <- c(R, '</table>') if(is.logical(file) && ! file) return(htmltools::HTML(paste0(R, '\n'))) cat(R, file=file, append=append && file != '', sep='\n') structure(list(file=file), class='html') } html.default <- function(object, file=paste(first.word(deparse(substitute(object))), 'html', sep='.'), append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), ...) html.data.frame(object, file=file, append=append, link=link, linkCol=linkCol, linkType=linkType, ...) if(FALSE) { show.html <- function(object) { browser <- .Options$help.browser if(!length(browser)) browser <- .Options$browser if(!length(browser)) browser <- 'netscape' sys(paste(browser, object, if(.Platform$OS.type == 'unix') '&')) invisible() } print.html <- function(x, ...) show.html(x) } htmlVerbatim <- function(..., size = 75, width = 85, scroll=FALSE, rows=10, cols=100, propts=NULL, omit1b=FALSE) { if(scroll) { nam <- as.character(sys.call()[2]) w <- paste0('<textarea class="scrollabletextbox" rows=', rows, ' cols=', cols, ' style="font-size:', size, '%; font-family:Courier New;" name="', nam, '">') } else w <- paste0('<pre style="font-size:', size, '%;">') op <- options(width=width) propts <- c(propts, list(quote=FALSE)) for(x in list(...)) { z <- capture.output(do.call('print', c(list(x), propts))) if(omit1b && gsub(' ', '', z[1]) == '') z <- z[-1] w <- c(w, z) } options(op) w <- c(w, if(scroll) '</textarea>' else '</pre>') w <- paste0(w, '\n') htmltools::HTML(w) } htmlGreek <- function(x, mult=FALSE, code=htmlSpecialType()) { orig <- c('alpha','beta','gamma','delta','epsilon','varepsilon', 'zeta', 'eta', 'theta','vartheta','iota','kappa','lambda','mu','nu', 'xi','pi','varpi','rho','varrho','sigma','varsigma','tau', 'upsilon','phi','chi','psi','omega','Gamma','Delta', 'Theta','Lambda','Xi','Pi','Sigma','Upsilon','Phi','Psi','Omega') l <- length(orig) new <- if(code == 'unicode') substring('\u3B1\u3B2\u3B3\u3B4\u3B5\u3F5\u3B6\u3B7\u3B8\u3D1\u3B9\u3BA\u3BB\u3BC\u3BD\u3BE\u3C0\u3D6\u3C1\u3F1\u3C3\u3C2\u3C4\u3C5\u3C6\u3C7\u3C8\u3C9\u393\u394\u398\u39B\u39E\u3A0\u3A3\u3A5\u3A6\u3A8\u3A9', 1 : l, 1 : l) else paste0('&#', c(945,946,947,948,949,1013,950,951,952,977,953,954,955,956,957,958, 960,982,961,1009,963,962,964,965,966,967,968,969,915,916,920,923, 926,928,931,933,934,936,937), ';') names(new) <- orig if(mult) { for(j in 1 : l) x <- gsub(paste0('\\<', orig[j], '\\>'), new[j], x) return(x) } if(! all(x %in% orig)) stop(paste0('illegal Greek letter name:', x)) new[x] } htmlSpecial <- function(x, code=htmlSpecialType()) { z <- c(nbsp = '160\u00A0', thinsp = '8201\u2009', emsp = '8195\u2003', ensp = '8194\u2002', plusmn = '177\u00B1', times = '215\u00D7', caret = '94\u005E', frasl = '8260\u2044', half = '189\u00BD', angrt = '8735\u221F', squarecrosshatch = '9638\u2586', whitesquareverticalline = '9707\u25EB', blackdowntriangle = '9660\u25BC', mediumsmallwhitecircle = '9900\u26AC', combiningcircumflexaccent = '770\u005E', part = '8706\u2202') u <- substring(z, nchar(z), nchar(z)) n <- substring(z, 1, nchar(z) - 1) new <- if(code == 'unicode') u else paste0('&#', n, ';') names(new) <- names(z) if(! all(x %in% names(z))) stop(paste0('illegal character name:', x[x %nin% names(z)])) new[x] } markupSpecs <- list(html=list( ## <span> needed for text in plotly graphics bold = function(..., span=TRUE) if(span) paste0('<span style="font-weight:bold">', ..., '</span>') else paste0('<strong>', ..., '</strong>'), italics = function(...) paste0('<i>', ..., '</i>'), math = function(...) paste0('<i>', ..., '</i>'), ord = function(n) paste0(n, '<sup>', markupSpecs$all$ordsuffix(n), '</sup>'), code = function(...) paste0('<code style="font-size:0.8em">', ..., '</code>'), sup = function(x, ...) paste0('<sup>', x, '</sup>'), sub = function(x, ...) paste0('<sub>', x, '</sub>'), size = function(..., pct) paste0('<span style="font-size: ', pct, '%;">', paste(..., collapse=' '), '</span>'), smaller = function(...) paste0('<span style="font-size: 80%;">', ..., '</span>'), larger = function(...) paste0('<span style="font-size: 125%;">', ..., '</span>'), smaller2 = function(...) paste0('<span style="font-size: 64%;">', ..., '</span>'), larger2 = function(...) paste0('<span style="font-size: 156%;">', ..., '</span>'), center = function(...) paste0('<div align=center>', ..., '</div>'), color = function(..., col) paste0('<font color="', col, '">', ..., '</font>'), ## Break a long string into two lines with <br> inserted at a space ## between words that is close to the middle of the string ## Cole Beck 2018-10-18 addBreak = function(x, minbreak=30) { sl <- nchar(x) if(sl < minbreak) return(x) si <- c(gregexpr(' ', x)[[1]]) ix <- si[which.min(abs(si - sl/2))] p1 <- substr(x, 1, ix - 1) p2 <- substr(x, ix + 1, sl) paste0(p1, "<br>", p2) }, subtext = function(..., color='blue') paste0('<br><font size=1 color="', color, '">', paste(unlist(list(...)), collapse=' '), '</font>'), cap = function(..., symbol=htmlSpecial('angrt')) { # figure caption formatting ## alternative: symbol='Figure:'; default is right angle ## use symbol=htmlSpecial('squarecrosshatch') for grid graph paper symbol lcap <- htmlTranslate(paste(unlist(list(...)), collapse=' '), greek=TRUE) paste0('<span style="font-family:Verdana;font-size:10px;">', symbol, ' </span><span style="font-family:Verdana;font-size:12px;color:MidnightBlue;">', lcap, '</span>') }, lcap = function(...) # for continuation of figure caption paste0('<span style="font-family:Verdana;font-size:12px;color:MidnightBlue;">', htmlTranslate(paste(unlist(list(...)), collapse=' '), greek=TRUE), '</span>'), tcap = function(..., symbol=htmlSpecial('whitesquareverticalline')) { # table caption formatting # alt: symbol='Table:'; default is white square w/vertical bisecting line lcap <- htmlTranslate(paste(unlist(list(...)), collapse=' '), greek=TRUE) paste0('<span style="font-family:Verdana;font-size:10px;">', symbol, ' </span><span style="font-family:Verdana;font-size:12px;color:MidnightBlue;">', lcap, '</span>') }, ltcap = function(...) # for continuation of table caption paste0('<span style="font-family:Verdana;font-size:12px;color:MidnightBlue;">', htmlTranslate(paste(unlist(list(...)), collapse=' '), greek=TRUE), '</span>'), expcoll = function(vis, invis) { id <- floor(runif(1, 100000, 999999)) # unique html id paste0('<br><a href="#', id, '" id="', id, '_earrows" class="earrows" onclick="expand_collapse(\'', id, '\');">', htmlSpecial('blackdowntriangle'), '</a>', vis, '<span id="', id, '" style="display:none;">', invis, '</span>') }, expcolld = function(vis, invis) paste0('<details><summary>', vis, ## htmlSpecial('blackdowntriangle'), '</summary>', invis, '</details>'), uncover = function(before, options, envir) { ## https://stackoverflow.com/questions/44866287 ## usage: knitrSet(lang='markdown') # issues knit_hooks$set(uncover=uncover) ## <script> ## function uncover(id) { ## var x = document.getElementById(id); ## x.style.display = 'block'; ## } ## </script> ## ## ```{r, uncover=TRUE, label='text for button', id='script'} ## 1 + 1 ## ``` if (before) { id <- options$id label <- options$label if(! length(label)) label <- 'Uncover' button_string <- paste0("<button onclick=\"uncover('", id, "')\">", label, "</button>") div_string <- paste0("<div id = '", id, "', style = 'display:none'>") paste0(button_string, "\n", div_string) } else { "</div>" } }, session = function(cite=TRUE, loadedOnly=FALSE, style=NULL) { si <- sessionInfo() if(! loadedOnly) si$loadedOnly <- NULL # Need to default to html because non-RStudio knitting to .md # will not know ultimate output format if(! length(style)) style <- if(knitr::is_html_output() ) 'html' else if(knitr::is_latex_output()) 'latex' else 'html' tt <- function(x) switch(style, text = x, html = paste0('<tt>', x, '</tt>'), latex = paste0('\\texttt{', x, '}')) w <- c(if(style == 'html') '<pre>', if(style == 'latex') toLatex(si, locale=FALSE) else capture.output(print(si, locale=FALSE)), if(style == 'html') '</pre>', if(cite) 'To cite R in publications use:', if(cite) capture.output(print(citation(), style=style))) if(cite) { s <- search() for(pac in c('Hmisc', 'rms', 'rmsb', 'hreport', 'VGAM', 'data.table', 'ggplot2', 'rstan', 'survival')) { if(paste0('package:', pac) %in% s) { w <- c(w, paste0('\nTo cite the ', tt(pac), ' package in publications use:\n')) w <- c(w, capture.output(print(citation(pac)[1], style=style))) } } } w <- paste0(w, '\n') if(style == 'html') htmltools::HTML(w) else knitr::asis_output(w) }, installcsl = function(cslname, rec=FALSE) { if(rec) { cat('Shows URLs:', 'american-medical-association', '', 'Does not show URLs:', 'council-of-science-editors', 'american-medical-association-no-url', sep='\n') return(invisible()) } if(missing(cslname)) browseURL('https://www.zotero.org/styles') else download.file(paste0('https://raw.githubusercontent.com/citation-style-language/styles/master/', cslname, '.csl'), paste0(cslname, '.csl')) }, citeulikeShow = function(user, bibkeys=NULL, tags=NULL, file=NULL) { if(length(file)) { x <- readLines(file) ## See http://stackoverflow.com/questions/8613237 bibkeys <- unlist(regmatches(x, gregexpr("(?<=\\[@).*?(?=\\])", x, perl=TRUE))) } if(length(bibkeys)) { keys <- paste(paste0('bibkey%3A+', bibkeys), collapse='+OR+') browseURL(paste0('http://www.citeulike.org/search/username?q=', keys, '&search=Search+library&username=', user)) } else browseURL(paste0('http://www.citeulike.org/user/', user, '/tag/', tags)) invisible(bibkeys) }, widescreen = function(width='4000px') htmltools::HTML(paste0('<style>div.main-container {max-width:', width, ';}</style>')), tocsize = function(width = '20%', maxwidth = '260px', maxheight='85%') htmltools::HTML(paste0('<style>div.tocify {width: ', width, '; max-width: ', maxwidth, '; max-height: ', maxheight, ';}</style>')), sectionNumberDepth = function() { ## Set depth for numbering sections the same as TOC depth ## See https://stackoverflow.com/questions/47124299/ toc_depth <- rmarkdown::metadata$output$html_document$toc_depth sel <- paste0("h", (toc_depth + 1) : 10, collapse = " > span, ") paste0("<style>", sel, " > .header-section-number { display: none; } </style>") }, scroll = function(x, size=75, rows=10, cols=100, font='', name='') { w <- paste0('<div style="width: ', cols, 'ex; overflow: auto; height: ', rows, 'ex;">') c(w, x, '</div>') }, chisq = function(x, ...) # paste0('\u03C7&<span class="xscript" style="font-size: 75%;"><sup>2</sup><sub>', x, # '</sub></span>') if(missing(x)) paste0(htmlGreek('chi'), '<sup>2</sup>') else paste0(htmlGreek('chi'), markupSpecs$html$subsup(x, '2')), fstat = function(x, ...) paste0('<i>F</i><sub><span style="font-size: 80%;">', x[1], htmlSpecial('thinsp'), x[2], '</span></sub>'), frac = function(a, b, size=82, ...) paste0('<span style="font-size: ', size, '%;"><sup>', a, '</sup>', htmlSpecial('frasl'), '<sub>', b, '</sub></span>'), half = function(...) htmlSpecial('half'), subsup = function(a, b) paste0("<sup><span style='font-size: 70%;'>", b, "</span></sup><sub style='position: relative; left: -.47em; bottom: -.4em;'><span style='font-size: 70%;'>", a, "</span></sub>"), varlabel = function(label, units='', size=75, hfill=FALSE) { if(units=='') htmlTranslate(label, greek=TRUE) else if(hfill) paste0("<div style='float: left; text-align: left;'>", htmlTranslate(label, greek=TRUE), "</div><div style='float: right; text-align: right; font-family: Verdana; font-size:", size, "%;'>", htmlTranslate(units, greek=TRUE), "</div>") else paste0(htmlTranslate(label, greek=TRUE), htmlSpecial('emsp'), "<span style='font-family:Verdana;font-size:", size, "%;'>", htmlTranslate(units, greek=TRUE), "</span>") }, rightAlign = function(x) paste0("<div style='float: right; text-align: right;'>", x, "</div>"), space = htmlSpecial('nbsp'), lspace = htmlSpecial('emsp'), sspace = htmlSpecial('thinsp'), smallskip= '<br><br>', medskip = '<br><br><br>', bigskip = '<br><br><br><br>', lineskip = function(n) paste0('\n<p style="padding-top:', n, 'em;">'), br = '<br>', hrule = '<hr>', hrulethin= '<hr class="thinhr">', plminus = htmlSpecial('plusmn'), times = htmlSpecial('times'), xbar = '<span style="text-decoration: overline">X</span>', overbar = function(x) paste0('<span style="text-decoration: overline">', x, '</span>'), unicode = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', styles = function(...) htmltools::HTML(' <style> .earrows {color:silver;font-size:11px;} fcap { font-family: Verdana; font-size: 12px; color: MidnightBlue } smg { font-family: Verdana; font-size: 10px; color: 󅒐 } hr.thinhr { margin-top: 0.15em; margin-bottom: 0.15em; } span.xscript { position: relative; } span.xscript sub { position: absolute; left: 0.1em; bottom: -1ex; } </style> '), ## The following is to create html file so that can copy and paste unicode chars ## To do actual conversions best to use http://www.endmemo.com/unicode/unicodeconverter.php unicodeshow = function(x, surr=TRUE, append=FALSE) { if(surr) x <- paste0('&', x, ';') cat('<meta charset="utf-8">', paste(x, collapse=''), '<br>', file='/tmp/z.html', append=append) }, ## Function to intersperse markdown with knitr chunk output, especially ## for chunks producing plotly graphics. The typical use is to intersperse ## figure captions and table of contents entries produced by putHcap() ## with plotly graphics. md is a list of character vectors, one element ## per chunk, and robj is a list of R objects to print ## Accounts for markdown being in caption text; knitr processes this ## See stackoverflow.com/questions/51803162 mdchunk = function(md=rep('', length(robj)), robj, cnames=FALSE, w=NULL, h=NULL, caption=NULL, results=NULL, method=c('knit_expand','knit_child')) { method <- match.arg(method) bn <- paste0('c', round(runif(1, 0, 1e6))) n <- length(md) if(length(robj) != n) stop('robj and md must have same length') opts <- rep('echo=FALSE', n) if(length(results)) opts <- paste0(opts, ',results="', results, '"') if(length(w)) opts <- paste0(opts, ',fig.width=' , w) if(length(h)) opts <- paste0(opts, ',fig.height=', h) if(length(caption)) opts <- paste0(opts, ',fig.cap="', caption, '"') if(length(cnames) == 1 && is.logical(cnames)) cnames <- if(cnames) paste0(bn, 1 : n) else rep('', n) if(! all(cnames == '')) cnames <- paste0(cnames, ',') for(i in 1 : n) { pos <- 1 env <- as.environment(pos) if(method == 'knit_expand') .obj. <- robj[[i]] else assign('.obj.', robj[[i]], envir=env) k <- c(md[[i]], paste0('```{r ', cnames[i], opts[i], '}'), '.obj.', '```') ## Original solution had cat(trimws(...)) but this caused ## section headings to be run into R output and markdown not recog. switch(method, knit_expand = cat(knitr::knit(text=knitr::knit_expand(text=k), quiet=TRUE)), knit_child = knitr::knit_child(text=k, quiet=TRUE) ) } }, ## Function to define css for putting a background around a character string ## to make it look more like a button ## Usage: <p class="cssbutton">Text inside button</p> cssbutton = function(color='DarkBlue', background='LightBlue', size='115%') htmltools::HTML(' <style> .rbutton { font-family: Times; font-size:', size, '; color:', color, '; background-color:', background, '; } </style>'), ## Function to high details using <details>...</details> ## Usage: `r cssbutton()` ... `r hideDetails('button text', ...)` ... </details> hideDetails = function(txt) htmltools::HTML(' <details><summary><p class="rbutton">', txt, '</p></summary>') ), latex = list( bold = function(...) paste0('\\textbf{', ..., '}'), italics = function(...) paste0('\\emph{', ..., '}'), math = function(...) paste0('$', ..., '$'), ord = function(n) paste0('$', n, '^\\textrm{', markupSpecs$all$ordsuffix(n), '}$'), code = function(...) paste0('\\texttt{\\smaller ', ..., '}'), sup = function(x, add='$') paste0(add, '^{',x, '}', add), sub = function(x, add='$') paste0(add, '_{',x, '}', add), smaller = function(...) paste0('{\\smaller ', ..., '}' ), larger = function(...) paste0('{\\smaller[-1]{', ..., '}' ), smaller2 = function(...) paste0('{\\smaller[2]{', ..., '}' ), larger2 = function(...) paste0('{\\smaller[-2]{', ..., '}' ), center = function(...) paste0('\\centerline{', ..., '}' ), color = function(x, col) { colcmd <- if(col == 'MidnightBlue') '\\textcolor[rgb]{0.1,0.1,0.44}' else paste0('\\textcolor{', col, '}') paste0(colcmd, '{', x, '}') }, chisq = function(x, add='$') if(missing(x)) paste0(add, '\\chi^{2}', add) else paste0(add, '\\chi^{2}_{', x, '}', add), fstat = function(x, add='$') paste0(add, 'F_{', x[1], ',', x[2], '}', add), frac = function(a, b, add='$', ...) paste0(add, '\\frac{', a, '}{', b, '}', add), half = function(add='$') paste0(add, '\\frac(1}{2}', add), subsup = function(a, b) paste0('$_{', a, '}^{', b, '}$'), varlabel = function(label, units='', hfill=FALSE, ...) { if(units=='') return(label) else units <- latexTranslate(units) fill <- if(hfill) '~\\hfill' else '~~~' paste0(label, fill, '\\texttt{\\smaller[2] ', gsub('\\*', ' ', units), '}') }, space = '~', lspace = '~~', sspace = '\\,', smallskip= '\\smallskip', medskip = '\\medskip', bigskip = '\\bigskip', lineskip = function(n) paste0('\n\\vspace{', n, 'ex}\n\n'), br = '\\\\', hrule = '\\hrule', plminus = '$\\pm$', times = '$\\times$', xbar = '$\\bar{X}$', overbar = function(x) paste0('$\\overline{', x, '}$') ), plain = list( space = ' ', lspace = ' ', sspace = ' ', br = '\n', lineskip = function(n) paste(rep('\n', n), collapse=''), hrule = '', code = function(x) x, ord = function(n) paste0(n, markupSpecs$all$ordsuffix(n)), chisq = function(x, ...) if(missing(x)) 'chi-square' else paste0('chi-square(', x, ')'), frac = function(a, b, ...) paste0(a, '/', b), half = function(...) '1/2', varlabel = function(label, units='', ...) if(units == '') label else paste0(label, ' [', units, ']'), times = 'x', plminus = '+/-', color = function(x, ...) x ), markdown = list( tof = function(file=.Options$FigCapFile, level=2, number=FALSE) { if(! length(file) || file == '') stop('figure captions file not defined') r <- readLines(file) if(! length(r)) return() r <- read.csv(file, header=FALSE) names(r) <- c('label', 'figref', 'scap') n <- nrow(r) ## Sort in descending order so last takes precendence if dups r <- r[nrow(r) : 1, ] isdup <- duplicated(r$label) r <- r[! isdup, ] r <- r[nrow(r) : 1, ] figref <- r[[2]] scap <- r[[3]] head <- c('', '<a name="LOF"></a>', '', paste(substring('####', 1, level), 'List of Figures', if(! number) '{-}'), '', '| **Figure** | **Description** |', '|:---|:---|') w <- c(head, paste0('| ', figref, ' | ', scap, ' |')) paste0(w, '\n') }, # Function to start a verbatim quote if results='asis' in effect # Works for all output formats in R markdown squote <- function() { # start quote r <- knitr::opts_current$get('results') if(length(r) && r %in% c('asis', 'markup')) cat('\n```') invisible() }, # Function to close the quote if needed equote <- function() { # end quote r <- knitr::opts_current$get('results') if(length(r) && r %in% c('asis', 'markup')) cat('```\n\n') invisible() }, # Function to print an object or inline text or both, # verbatim quoting if needed (results='asis') in effect in chunk # Inline text is printed with cat() # Fractional numbers are rounded to the nearest dec digits for data frames pr = function(x='', obj=NULL, inline=NULL, dec=3) { r <- knitr::opts_current$get('results') asis <- length(r) && r %in% c('asis', 'markup') if(asis) cat('\n```\n') if(any(x != '') || length(inline)) cat('\n', x, if(any(x != '')) ' ', inline, '\n\n', sep='') if(length(obj)) { if(is.data.frame(obj)) for(i in 1 : length(obj)) { x <- obj[[i]] if(is.numeric(x)) obj[[i]] <- round(x, dec) } print(obj, quote=FALSE) } if(asis) cat('```\n\n') invisible() } ), # end markdown plotmath = list( varlabel = function(label, units='', ...) labelPlotmath(label, units) ), all=list( ordsuffix = function(n) { l <- n - floor(n / 10) * 10 ifelse(n %in% 11:13, 'th', ifelse(l == 1, 'st', ifelse(l == 2, 'nd', ifelse(l == 3, 'rd', 'th')))) } ) ) ## For expand_collapse see http://dickervasti.com/wiki-style-text-expand-collapse-no-jquery.htm#01000 ## Function to translate several expressions to html form. ## Arguments inn and out specify additional input and translated ## strings over the usual defaults. htmlTranslate <- function(object, inn=NULL, out=NULL, greek=FALSE, na='', code=htmlSpecialType(), ...) { if(! length(object) || all(trimws(object) == '')) return(object) text <- ifelse(is.na(object), na, as.character(object)) ## Must translate & first so won't be converted to & when other ## symbols are translated inn <- c("&", "|", "%", "#", "<=", "<", ">=", ">", "_", "\\243", "\\$", inn, c("[", "(", "]", ")")) w <- if(code == 'unicode') substring('\u26\u7C\u25\u23\u2264\u3C\u2265\u3E\u5F\uA3\u24', 1:11, 1:11) else c("&", "|", "%", "#", "≤", "<", "≥", ">", "_", "£", "$") # 163 was 164 ## htmlarrows.com ## markupSpecs$html$unicodeshow(out, surr=FALSE) out <- c(w, out, c('[', '(', ']', ')')) ##See if string contains an ^ - superscript followed by a number dig <- c('0','1','2','3','4','5','6','7','8','9') for(i in 1 : length(text)) { lt <- nchar(text[i]) x <- substring(text[i], 1 : lt, 1 : lt) j <- x == '^' if(any(j)) { is <- ((1 : lt)[j])[1] #get first ^ remain <- x[-(1 : is)] k <- remain %in% c(' ',',',')',']','\\','$') if(remain[1] %in% dig || (length(remain) > 1 && remain[1] == '-' && remain[2] %in% dig)) k[-1] <- k[-1] | remain[-1] %nin% dig ie <- if(any(k)) is + ((1 : length(remain))[k])[1] else length(x) + 1 substring2(text[i], is, ie - 1) <- paste0('BEGINSUP', substring(text[i], is + 1, ie - 1), 'ENDSUP') } text[i] <- sedit(text[i], c(inn, '^', 'BEGINSUP', 'ENDSUP'), c(out, htmlSpecial('caret', code=code), '<sup>', '</sup>'), wild.literal=TRUE) if(greek) text[i] <- htmlGreek(text[i], code=code, mult=TRUE) } text } ## markupSpecs$html$unicodeshow(c('#9660', 'chi', 'thinsp', 'frasl', 'emsp', 'plusmn', 'times'), append=TRUE) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/R2Measures.r��������������������������������������������������������������������������������0000644�0001762�0000144�00000014107�14225262155�013700� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Generalized R^2 Measures ##' ##' Computes various generalized R^2 measures related to the Maddala-Cox-Snell (MCS) R^2 for regression models fitted with maximum likelihood. The original MCS R^2 is labeled `R2` in the result. This measure uses the raw sample size `n` and does not penalize for the number of free parameters, so it can be rewarded for overfitting. A measure adjusted for the number of fitted regression coefficients `p` uses the analogy to R^2 in linear models by computing `1 - exp(- lr / n) * (n-1)/(n-p-1)` if `padj=2`, which is approximately `1 - exp(- (lr - p) / n)`, the version used if `padj=1` (the default). The latter measure is appealing because the expected value of the likelihood ratio chi-square statistic `lr` is `p` under the global null hypothesis of no predictors being associated with the response variable. See <https://hbiostat.org/bib/r2.html> for more details. ##' ##' It is well known that in logistic regression the MCS R^2 cannot achieve a value of 1.0 even with a perfect model, which prompted Nagelkerke to divide the R^2 measure by its maximum attainable value. This is not necessarily the best recalibration of R^2 throughout its range. An alternative is to use the formulas above but to replace the raw sample size `n` with the effective sample size, which for data with many ties can be significantly lower than the number of observations. As used in the `popower()` and `describe()` functions, in the context of a Wilcoxon test or the proportional odds model, the effective sample size is `n * (1 - f)` where `f` is the sums of cubes of the proportion of observations at each distict value of the response variable. Whitehead derived this from an approximation to the variance of a log odds ratio in a proportional odds model. To obtain R^2 measures using the effective sample size, either provide `ess` as a single number specifying the effective sample size, or specify a vector of frequencies of distinct Y values from which the effective sample size will be computed. In the context of survival analysis, the single number effective sample size you may wish to specify is the number of uncensored observations. This is exactly correct when estimating the hazard rate from a simple exponential distribution or when using the Cox PH/log-rank test. For failure time distributions with a very high early hazard, censored observations contain enough information that the effective sample size is greater than the number of events. See Benedetti et al, 1982. ##' ##' If the effective sample size equals the raw sample size, measures involving the effective sample size are set to \code{NA}. ##' @title R2Measures ##' @param lr likelihoood ratio chi-square statistic ##' @param p number of non-intercepts in the model that achieved `lr` ##' @param n raw number of observations ##' @param ess if a single number, is the effective sample size. If a vector of numbers is assumed to be the frequency tabulation of all distinct values of the outcome variable, from which the effective sample size is computed. ##' @param padj set to 2 to use the classical adjusted R^2 penalty, 1 (the default) to subtract `p` from `lr` ##' @return named vector of R2 measures. The notation for results is `R^2(p, n)` where the `p` component is empty for unadjusted estimates and `n` is the sample size used (actual sample size for first measures, effective sample size for remaining ones). For indexes that are not adjusted, only `n` appears. ##' @author Frank Harrell ##' @md ##' @export ##' @references ##' Smith TJ and McKenna CM (2013): A comparison of logistic regression pseudo R^2 indices. Multiple Linear Regression Viewpoints 39:17-26. <https://www.glmj.org/archives/articles/Smith_v39n2.pdf> ##' ##' Benedetti JK, et al (1982): Effective sample size for tests of censored survival data. Biometrika 69:343--349. ##' ##' Mittlbock M, Schemper M (1996): Explained variation for logistic regression. Stat in Med 15:1987-1997. ##' ##' Date, S: R-squared, adjusted R-squared and pseudo R-squared. <https://timeseriesreasoning.com/contents/r-squared-adjusted-r-squared-pseudo-r-squared/> ##' ##' UCLA: What are pseudo R-squareds? <https://stats.oarc.ucla.edu/other/mult-pkg/faq/general/faq-what-are-pseudo-r-squareds/> ##' ##' Allison P (2013): What's the beset R-squared for logistic regression? <https://statisticalhorizons.com/r2logistic/> ##' ##' Menard S (2000): Coefficients of determination for multiple logistic regression analysis. The Am Statistician 54:17-24. ##' ##' Whitehead J (1993): Sample size calculations for ordered categorical data. Stat in Med 12:2257-2271. See errata (1994) 13:871 and letter to the editor by Julious SA, Campbell MJ (1996) 15:1065-1066 showing that for 2-category Y the Whitehead sample size formula agrees closely with the usual formula for comparing two proportions. ##' @examples ##' x <- c(rep(0, 50), rep(1, 50)) ##' y <- x ##' # f <- lrm(y ~ x) ##' # f # Nagelkerke R^2=1.0 ##' # lr <- f$stats['Model L.R.'] ##' # 1 - exp(- lr / 100) # Maddala-Cox-Snell (MCS) 0.75 ##' lr <- 138.6267 # manually so don't need rms package ##' ##' R2Measures(lr, 1, 100, c(50, 50)) # 0.84 Effective n=75 ##' R2Measures(lr, 1, 100, 50) # 0.94 ##' # MCS requires unreasonable effective sample size = minimum outcome ##' # frequency to get close to the 1.0 that Nagelkerke R^2 achieves R2Measures <- function(lr, p, n, ess=NULL, padj=1) { R <- numeric(0) r2 <- 1. - exp(- lr / n) adj <- function() if(padj == 1) 1. - exp(- max(lr - p, 0) / n) else 1. - exp(- lr / n) * (n - 1.) / (n - p - 1.) r2adj <- adj() R <- c(R2=r2, R2adj=r2adj) g <- function(p, n) { n <- as.character(round(n, 1)) sub <- ifelse(p == 0, n, paste0(p, ',', n)) paste0('R2(', sub, ')') } name <- g(c(0, p), c(n, n)) if(length(ess)) { ## Replace n with effective sample size nr <- n n <- if(length(ess) == 1) ess else { P <- ess / sum(ess) n * (1. - sum(P ^ 3)) } r2 <- 1. - exp(- lr / n) r2adj <- adj() if(n == nr) r2 <- r2adj <- NA name <- c(name, g(c(0, p), c(n, n))) R <- c(R, r2, r2adj) } names(R) <- name R } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/translate.s���������������������������������������������������������������������������������0000644�0001762�0000144�00000001722�12250352603�013677� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������translate <- function(text, old, new, multichar=FALSE) { if(length(old)>1 || (nchar(old)!=nchar(new))) multichar <- TRUE if(length(old)>1 && (length(new)>1 & length(new)!=length(old))) stop("old and new must have same lengths or new must have 1 element") if(!multichar) k <- chartr(old, new, text) else { if(multichar) command <- paste("sed",paste('-e "s/',old,"/",new,'/g"', sep="", collapse=" ")) else command <- paste("tr \"", old, "\" \"", new, "\"", sep="") ## k <- sys(command, text) replace with next 2 27aug03 ## Thanks: <Sebastian.Weber@aventis.com> k <- unlist(lapply(text, function(x, command) { sys(paste("echo \"", x, "\" | ", command, sep="")) }, command=command)) # command= 22feb04 ## added command 26jan04; thanks:<Willi.Weber@aventis.com> } if(is.matrix(text)) k <- matrix(k, nrow=nrow(text)) k } ����������������������������������������������Hmisc/R/string.break.line.s�������������������������������������������������������������������������0000644�0001762�0000144�00000000361�12243661443�015226� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������if(!exists("string.break.line", mode='function')) { string.break.line <- function(string) { if(! is.character(string)) { x <- as.character(string) } ifelse(string == '', '', strsplit(string, '\n', fixed=TRUE)) } } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/print.char.list.s���������������������������������������������������������������������������0000644�0001762�0000144�00000041574�12243661443�014744� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������if(!exists("string.bounding.box")) { string.bounding.box <- function(string, type=c("chars", "width")) { thisfun <- function(x, type) { height <- length(x) # get rid of ':' on last string x[height] <- substr(x[height], start=1, stop=nchar(x[height], type='chars') - 1) c(height = height, width = max(nchar(x, type=type))) } mode(string) <- "character" type <- match.arg(type) ## Add remove '\n' if it is ends the string and add a ':' so that string split ## functions the way I want it to. string <- paste(string, ':', sep='') ans <- sapply(strsplit(string, '\n', fixed=TRUE), FUN=thisfun, type=type, USE.NAMES=FALSE) return(list(columns = ans[2,], rows = ans[1,])) } } equalBins <- function(widths, subwidths) { ## The length of widths and subwidths must be the same if(length(widths) != length(subwidths)) { stop("width and subwidth must be of the same length") } ## adjust width for column spacers widths <- widths - unlist(lapply(subwidths, length)) + 1 unlist(mapply(function(width, subwidths) { if(sum(subwidths) < width) { div <- width %/% length(subwidths) mod <- width %% length(subwidths) c(rep.int(div + 1, mod), rep.int(div, length(subwidths) - mod)) } else { subwidths } }, widths, subwidths, SIMPLIFY = FALSE)) } stringDims <- function(string) { if(is.null(string)) { return(height = 0, width = 0) } dims <- dim(string) bbox <- string.bounding.box(string) height <- bbox$rows width <- bbox$columns if(any(dims)) { dim(height) <- dims dim(width) <- dims } list(height = height, width = width) } simplifyDims <- function(x) { if(any(sapply(x, FUN=is.matrix))) do.call(rbind, x) else do.call(c, x) } partition.vector <- function(x, sep, ...) { if(missing(sep)) { stop("sep is a required arg") } if(sum(sep) != length(x)) { stop("sep must sum to the number of columns in x") } split(x, rep(seq(along.with=sep), times=sep)) } partition.matrix <- function(x, rowsep, colsep, ...) { colmissing <- missing(colsep) rowmissing <- missing(rowsep) if(rowmissing && colmissing) { stop("Atleast one of rowsep or colsep args must be specified") } ## If length of group is equal to length of x assume that this is a ## a vector of group numbers if(!rowmissing) { if(sum(rowsep) != NROW(x)) { stop("rowsep must sum to the number of columns in x") } if(!is.numeric(rowsep)) { stop("the rowsep vector must be numeric") } } if(!colmissing) { if(sum(colsep) != NCOL(x)) { stop("colsep must sum to the number of rows in x") } if(!is.numeric(colsep)) { stop("the colsep vector must be numeric") } } ## Separate x into row chunks if(!rowmissing) { set <- lapply(split(seq(NROW(x)), rep(seq(along.with=rowsep), times=rowsep)), function(index) x[index,,drop=FALSE]) } else { set <- NULL } if(!colmissing) { FUN <- function(x) lapply(split(seq(NCOL(x)), rep(seq(along.with=colsep), times=colsep)), function(index) x[,index,drop=FALSE]) if(is.null(set)) { FUN(x) } else { lapply(set, FUN) } } else { set } } print.char.list <- function(x, ..., hsep = c("|"), vsep = c("-"), csep = c("+"), print.it = TRUE, rowname.halign = c("left", "centre", "right"), rowname.valign = c("top", "centre", "bottom"), colname.halign = c("centre", "left", "right"), colname.valign = c("centre", "top", "bottom"), text.halign = c("right", "centre", "left"), text.valign = c("top", "centre", "bottom"), rowname.width, rowname.height, min.colwidth = .Options$digits, max.rowheight = NULL, abbreviate.dimnames = TRUE, page.width = .Options$width, colname.width, colname.height, prefix.width, superprefix.width = prefix.width) { vjustText <- function(char.matrix, fieldHeight, vjust = c("top", "center", "bottom")) { if(!is.matrix(char.matrix)) stop("char.matrix must be of type matrix") d <- dim(char.matrix) vjust <- match.arg(vjust) if(! is.character(char.matrix)) char.matrix <- as.character(char.matrix) # split the matrix strings up into multi lines. char.matrix <- ifelse(is.na(char.matrix), NA, string.break.line(char.matrix)) # determine veritcal differentials vdiff <- fieldHeight - unlist(lapply(char.matrix, length)) ans <- mapply(function(element, vdiff, target) { if(is.na(element) || vdiff == 0) { return(element) } if(vdiff < 0) { # Trim trailing extra lines lines <- rev(seq(along.with=element)[element != '']) if(lines[1] <= target) { return(element[1:target]) } length(element) <- lines[1] element <- element[element != ''] if(length(element) == target) return(element) vdiff <- target - length(element) } switch(vjust, top = c(element, character(vdiff)), bottom = c(character(vdiff), element), center = c(character(half <- vdiff%/%2), element, character(vdiff - half))) }, char.matrix, vdiff, fieldHeight, USE.NAMES=FALSE) matrix(unlist(ans), ncol=d[[2]]) } hjustText <- function(char.matrix, fieldWidth, hjust=c("left","right","center")) { if(!is.matrix(char.matrix)) stop("text must be of type matrix") d <- dim(char.matrix) hjust <- match.arg(hjust) ans <- mapply(function(column, target) { column <- unlist(column) column <- ifelse(is.na(column), NA, format(column, justify=hjust, width=target)) column <- ifelse(is.na(column) || target - nchar(column, type="width"), column, strtrim(column, target)) }, split(char.matrix, col(char.matrix)), fieldWidth, USE.NAMES=FALSE) # dim(ans) <- d ans } justText <- function(char.matrix, fieldWidth, fieldHeight, hjust=c("left","right","centre"), vjust = c("top", "centre", "bottom"), trim=FALSE) { if(!is.matrix(char.matrix)) stop("text must be of type matrix") ## Get the original dims of the matrix d <- dim(char.matrix) ## Determin the wanted justification. hjust <- match.arg(hjust) vjust <- match.arg(vjust) ## If this is a charater matrix then break in on the lines if(is.character(char.matrix)) { ## split the matrix strings up into multi lines. ans <- ifelse(is.na(char.matrix), NA, string.break.line(char.matrix)) } else { ans <- char.matrix } ## format the text horizontaly. ans <- mapply(function(column, target) { fun <- function(x) ifelse(is.na(x), NA, format(x, justify=hjust, width=target)) if(is.list(column)) { lapply(column, fun) } else { fun(column) } }, split(ans, col(char.matrix)), fieldWidth, USE.NAMES=FALSE) spacer <- makeNstr(' ', fieldWidth) ## Add extra rows to justify the text vericaly. ans <- mapply(function(row, target) { fun <- function(element, spacer) { vdiff <- target - length(element) if(is.na(element) || vdiff == 0) { return(element) } if(vdiff < 0) { ## Trim trailing extra lines lines <- rev(seq(along.with=element)[element != '']) if(lines[1] <= target) { return(element[1:target]) } length(element) <- lines[1] element <- element[element != ''] if(length(element) == target) return(element) vdiff <- target - length(element) } switch(vjust, top = c(element, rep(spacer, vdiff)), bottom = c(rep(spacer, vdiff), element), centre = c(rep(spacer, half <- vdiff%/%2), element, rep(spacer, vdiff - half))) } mapply(fun, row, spacer, USE.NAMES=FALSE, SIMPLIFY=FALSE) }, split(ans, row(char.matrix)), fieldHeight, USE.NAMES=FALSE, SIMPLIFY=FALSE) matrix(unlist(unsplit(ans, row(char.matrix))), ncol=d[[2]]) } printRow <- function(entries, widths, sep) { if(length(entries) != length(widths)) stop("arguments must be the same length") first <- TRUE last <- TRUE env <- environment() nval <- ' ' sep <- hsep out <- mapply(function(entry, width) { if(is.na(entry)) { if(is.null(last)) { out <- c(nval, makeNstr(nval, width)) } else { out <- c(sep, makeNstr(nval, width)) assign("last", NULL, envir=env) } }else{ if(is.null(last)) { assign("last", TRUE, envir=env) } out <- c(sep, entry) } out }, entries, widths) paste(c(out, sep), collapse='') } printBars <- function(entries, blank, widths, hsep, csep, vsep) { bars <- character(length(entries) + 1) alt <- rep(c(1,2), length.out=length(widths)) # blank <- c(list(rep(TRUE, length(widths))), blank) for(i in seq(along.with=entries)) { len <- length(entries[[i]]) comp <- entries[[i]][-len] comp.last <- entries[[i]][len] bnk <- blank[[i]] bnk.last <- bnk[length(bnk)] dividers <- ifelse(comp & bnk, hsep, ifelse(bnk, ' ', ifelse(comp, csep, vsep))) dividers <- c(dividers, ifelse(bnk.last, hsep, csep)) betweens <- c(makeNstr(ifelse(bnk, ' ', vsep), widths), '') bars[i] <- paste(dividers, betweens, sep='', collapse='') } dividers <- ifelse(entries[[length(entries)]], csep, vsep) betweens <- c(makeNstr(vsep, widths), '') bars[length(bars)] <- paste(dividers, betweens, sep='', collapse='') bars } rjustText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'right', trim) ljustText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'left', trim) centerText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'center', trim) colnames <- NULL rownames <- NULL colDims <- NULL rowDims <- NULL supercols <- NULL superrows <- NULL supercolDims <- NULL superrowDims <- NULL colsets <- NULL rowsets <- NULL if(is.list(x)) { rownames <- lapply(x, function(x) { if(is.null(rownames <- dimnames(x)[[1]])) { rep(NA, NROW(x)) } else { rownames } }) rowsets <- unlist(lapply(rownames, length), use.names=FALSE) superrows <- names(rownames) rownames <- matrix(unlist(rownames, use.names=FALSE), ncol=1) if(all(is.na(rownames))) { rownames <- NULL } colnames <- lapply(x[[1]][1,], names) colsets <- unlist(lapply(colnames, length), use.names=FALSE) supercols <- names(colnames) colnames <- matrix(unlist(colnames, use.names=FALSE), nrow=1) if(all(is.na(colnames))) { colnames <- NULL } ## Convert to a matrix matrix <- do.call(rbind, x) matrix <- do.call(rbind, tapply(matrix, row(matrix), FUN='unlist')) } else { rownames <- dimnames(x)[[1]] colnames <- dimnames(x)[[2]] matrix <- x } ## get widths of each column in table. listDims <- stringDims(matrix(sapply(matrix, format),ncol=ncol(matrix))) ## find the widths and heights of the row names and col names ## if any elements do not have rownames the set them equal to 0 if(length(colnames)) { colDims <- stringDims(colnames) } else { colDims <- list(height = integer(nrow(matrix)), width = integer(nrow(matrix))) } if(length(rownames)) { rowDims <- stringDims(rownames) } else { rowDims <- list(height = integer(ncol(matrix)), width = integer(ncol(matrix))) } ## calculate the superdim info ## do it for the supercols if(length(supercols)) { supercolDims <- stringDims(supercols) } else { supercolDims <- list(height = 0, width = 0) } ## do it for the superrows if(length(superrows)) { superrowDims <- stringDims(superrows) } else { superrowDims <- list(height = 0, width = 0) } ## Calculate the max column width if(!missing(colname.width)) { colwidth <- pmin(colDims$width, colname.width) supercolwidth <- pmin(supercolDims$width, colname.width*colsets) } else { colwidth <- colDims$width supercolwidth <- supercolDims$width } ## Calculate the max row hight if(!missing(rowname.height)) { rowheight <- pmin(rowDims$height, rowname.height) superrowheight <- pmin(superrowDims$height, rowname.height*rowsets) } else { rowheight <- rowDims$height superrowheight <- superrowDims$height } ## Find the overall height of the matrix height <- pmax(tapply(listDims$height, row(matrix), max), rowheight) height <- equalBins(superrowheight, partition.vector(height, rowsets)) ## Find the overall width of the matrix width <- pmax(tapply(listDims$width, col(matrix), max), colwidth) width <- equalBins(supercolwidth, partition.vector(width, colsets)) ## Calculate actual supercol widths that is the sum of the subcol or total lenght to supercol ## which ever is greater supercolwidth <- tapply(width, rep.int(seq(along.with=colsets), times=colsets), sum) + colsets - 1 supercolheight <- max(superrowDims$height) colheight <- max(colDims$height) superrowheight <- tapply(height, rep.int(seq(along.with=rowsets), times=rowsets), sum) if(missing(prefix.width)) { if(!is.null(rownames)) { prefix.width <- max(max(na.rm = TRUE, as.integer(median(width)), max(rowDims$width))) } else { prefix.width <- 0 } } if(missing(superprefix.width)) { if(!is.null(superrows)) { superprefix.width <- max(na.rm = TRUE, as.integer(median(width)), max(superrowDims$width)) } else { superprefix.width <- 0 } } header <- NULL headerwidth <- NULL rows <- NULL entries <- list() blanks <- list() ## Figure out the centering of the cells. rowNameHalign <- match.arg(rowname.halign) rowNameValign <- match.arg(rowname.valign) colNameHalign <- match.arg(colname.halign) colNameValign <- match.arg(colname.valign) cellHalign <- match.arg(text.halign) cellValign <- match.arg(text.valign) ## create the superrowname column superrow <- if(!is.null(superrows)) { superrows <- matrix(superrows, ncol=1) header <- NA headerwidth <- superprefix.width ## perform verical and horizontal centering. justText(superrows, superprefix.width, superrowheight, rowNameHalign, rowNameValign) } row <- if(!is.null(rownames)) { header <- cbind(header, NA) headerwidth <- c(headerwidth, prefix.width) justText(rownames, prefix.width, height, rowNameHalign, rowNameValign) } body <- cbind(superrow, row, justText(matrix, width, height, cellHalign, cellValign)) width <- c(headerwidth, width) body <- split(body, row(body)) ## Create the super column name row and the column name row if(!is.null(supercols)) { supercols <- matrix(supercols, nrow=1) supercolwidth <- c(headerwidth, supercolwidth) entry <- c(header, rep(seq(along.with=colsets), colsets), 0) entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry)))) blank <- ifelse(is.na(c(header, rep(supercols, colsets))), TRUE, FALSE) blanks <- c(blanks, list(blank)) rows <- printRow(justText(cbind(header, supercols), supercolwidth, supercolheight, colNameHalign, colNameValign), width=supercolwidth, sep=hsep) } if(!is.null(colnames)) { entry <- c(header, rep(seq(along.with=colnames), 1), 0) entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry)))) blank <- ifelse(is.na(c(header, colnames)), TRUE, FALSE) blanks <- c(blanks, list(blank)) rows <- c(rows, printRow(justText(cbind(header, colnames), width, colheight, colNameHalign, colNameValign), width=width, sep=hsep)) } env <- environment() rows <- c(rows, unlist(lapply(split(body, rep(seq(along.with=rowsets), rowsets)), function(set) { index <- seq(along.with=set) mapply(FUN = function(line, index) { entry <- c(ifelse(is.na(line), NA, rep(seq(along.with=line), 1)), 0) entry <- ifelse(is.na(entry), FALSE, !duplicated(entry)) assign('entries', c(entries, list(entry)), env) blank <- ifelse(is.na(line), FALSE, FALSE) if(index != 1) { blank[1] <- TRUE } assign('blanks', c(blanks, list(blank)), env) printRow(line, width=width, sep=hsep) }, set, index) }), use.names=FALSE)) blanks[[1]] <- logical(length(width)) entries <- lapply(entries, function(entry) {entry[1] <- TRUE; entry}) bars <- printBars(entries, blanks, width, hsep=hsep, vsep=vsep, csep=csep) total <- paste(bars, c(rows, ""), sep='\n', collapse='\n') if(print.it) { cat(total) invisible(x) } else { total } } ������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/sedit.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000016034�14112731327�013017� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������sedit <- function(text, from, to, test=NULL, wild.literal=FALSE) { to <- rep(to, length.out=length(from)) for(i in seq_along(text)) { s <- text[i] if(length(s)) for(j in 1:length(from)) { old <- from[j] front <- back <- FALSE if(!wild.literal) { if(substring(old,1,1) == '^') { front <- TRUE; old <- substring(old,2) } if(substring(old,nchar(old)) == '$') { back <- TRUE; old <- substring(old, 1, nchar(old)-1) } } new <- to[j] lold <- nchar(old) if(lold > nchar(s)) next ex.old <- substring(old, 1:lold, 1:lold) if(!wild.literal && any(ex.old == '*')) s <- replace.substring.wild(s, old, new, test=test, front=front, back=back) else { l.s <- nchar(s) is <- 1:(l.s-lold+1) if(front) is <- 1 ie <- is + lold - 1 if(back) ie <- l.s ss <- substring(s, is, ie) k <- ss == old if(!any(k)) next k <- is[k] substring2(s, k, k+lold-1) <- new } } text[i] <- s } text } substring.location <- function(text, string, restrict) { if(length(text) > 1) stop('only works with a single character string') l.text <- nchar(text) l.string <- nchar(string) if(l.string > l.text) return(list(first=0,last=0)) if(l.string == l.text) return(if(text == string) list(first=1,last=l.text) else list(first=0,last=0)) is <- 1:(l.text-l.string+1) ss <- substring(text, is, is+l.string-1) k <- ss == string if(!any(k)) return(list(first=0,last=0)) k <- is[k] if(!missing(restrict)) k <- k[k>=restrict[1] & k<=restrict[2]] if(length(k) == 0) return(list(first=0,last=0)) list(first=k, last=k+l.string-1) } ## if(version$major < 5) 14Sep00 substring2 <- function(text, first, last=100000L) base::substring(text, first, last) 'substring2<-' <- function(text, first, last=100000, value) { if(is.character(first)) { if(!missing(last)) stop('wrong # arguments') return(sedit(text, first, value)) ## value was setto 25May01 } lf <- length(first) if(length(text) == 1 && lf > 1) { if(missing(last)) last <- nchar(text) last <- rep(last, length.out=lf) for(i in 1:lf) { text <- paste(if(first[i]>1) substring(text, 1, first[i]-1), value, substring(text, last[i]+1), sep='') if(i < lf) { j <- (i+1):lf w <- nchar(value) - (last[i]-first[i]+1) first[j] <- first[j] + w last[j] <- last[j] + w } } return(text) } text <- paste(ifelse(first>1,substring(text, 1, first-1),''), value, substring(text, last+1), sep='') text } replace.substring.wild <- function(text, old, new, test=NULL, front=FALSE, back=FALSE) { if(length(text)>1) stop('only works with a single character string') if(missing(front) && missing(back)) { if(substring(old,1,1) == '^') { front <- TRUE; old <- substring(old,2) } if(substring(old, nchar(old)) == '$') { back <- TRUE old <- substring(old, 1, nchar(old)-1) } } if((front || back) && old!='*') stop('front and back (^ and $) only work when the rest of old is *') star.old <- substring.location(old,'*') if(length(star.old$first)>1) stop('does not handle > 1 * in old') if(sum(star.old$first) == 0) stop('no * in old') star.new <- substring.location(new,'*') if(length(star.new$first)>1) stop('cannot have > 1 * in new') if(old == '*' && (front | back)) { if(front && back) stop('may not specify both front and back (or ^ and $) with old=*') if(length(test) == 0) stop('must specify test= with old=^* or *$') et <- nchar(text) if(front) { st <- rep(1, et); en <- et:1 } else { st <- 1:et; en <- rep(et, et) } qual <- test(substring(text, st, en)) if(!any(qual)) return(text) st <- (st[qual])[1] en <- (en[qual])[1] text.before <- if(st == 1)'' else substring(text, 1, st-1) text.after <- if(en == et)'' else substring(text, en+1, et) text.star <- substring(text, st, en) new.before.star <- if(star.new$first>1) substring(new, 1, star.new$first-1) else '' new.after.star <- if(star.new$last == length(new))'' else substring(new, star.new$last+1) return(paste(text.before, new.before.star, text.star, new.after.star, text.after, sep='')) } old.before.star <- if(star.old$first == 1)'' else substring(old, 1, star.old$first-1) old.after.star <- if(star.old$last == nchar(old))'' else substring(old, star.old$first+1) if(old.before.star == '') loc.before <- list(first=0, last=0) else { loc.before <- substring.location(text, old.before.star) loc.before <- list(first=loc.before$first[1], last=loc.before$last[1]) } if(sum(loc.before$first+loc.before$last) == 0) return(text) loc.after <- if(old.after.star == '') list(first=0, last=0) else { la <- substring.location(text, old.after.star, restrict=c(loc.before$last+1,1e10)) lastpos <- length(la$first) la <- list(first=la$first[lastpos], last=la$last[lastpos]) if(la$first+la$last == 0) return(text) la } loc.star <- list(first=loc.before$last+1, last=if(loc.after$first == 0) nchar(text) else loc.after$first-1) star.text <- substring(text, loc.star$first, loc.star$last) if(length(test) && !test(star.text)) return(text) if(star.new$first == 0) return(paste(if(loc.before$first>1)substring(text,1,loc.before$first-1), new, sep='')) new.before.star <- if(star.new$first == 1)'' else substring(new, 1, star.new$first-1) new.after.star <- if(star.new$last == nchar(new)) '' else substring(new, star.new$first+1) paste(if(loc.before$first>1)substring(text,1,loc.before$first-1), new.before.star, substring(text,loc.star$first,loc.star$last), new.after.star, if(loc.after$last<nchar(text) && loc.after$last>0) substring(text,loc.after$last+1), sep='') } ## Some functions useful as test= arguments to replace.substring.wild, sedit numeric.string <- function(string) suppressWarnings(!is.na(as.numeric(string))) all.digits <- function(string) { k <- length(string) result <- logical(k) for(i in 1:k) { st <- string[i] ls <- nchar(st) ex <- substring(st, 1:ls, 1:ls) result[i] <- all(match(ex,c('0','1','2','3','4','5','6','7','8','9'),nomatch=0)>0) } result } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/mdb.get.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000002453�14112733210�013220� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mdb.get <- function(file, tables=NULL, lowernames=FALSE, allow=NULL, dateformat='%m/%d/%y', mdbexportArgs='-b strip', ...) { rettab <- length(tables) && is.logical(tables) if(rettab) tables <- NULL if(!length(tables)) tables <- system(paste('mdb-tables -1', file), intern=TRUE) if(rettab) return(tables) f <- tempfile() D <- vector('list', length(tables)) names(D) <- tables for(tab in tables) { s <- system(paste('mdb-schema -T', shQuote(tab), file), intern=TRUE) start <- grep('^ \\($', s) + 1 end <- grep('^\\);$', s) - 1 s <- s[start:end] s <- strsplit(s, '\t') vnames <- sapply(s, function(x) { bracketed = x[2] if(substr(bracketed, 1, 1) == '[') substr(bracketed, 2, nchar(bracketed) - 1) else bracketed }) vnames <- makeNames(vnames, unique=TRUE, allow=allow) if(lowernames) vnames <- casefold(vnames) types <- sapply(s, function(x)x[length(x)]) datetime <- vnames[grep('DateTime', s)] system2(command = 'mdb-export', args = paste(mdbexportArgs, file, shQuote(tab)), stdout = f) d <- csv.get(f, datetimevars=datetime, lowernames=lowernames, allow=allow, dateformat=dateformat, ...) if(length(tables) == 1) return(d) else D[[tab]] <- d } D } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/plotlyM.r�����������������������������������������������������������������������������������0000644�0001762�0000144�00000037222�14247427021�013352� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#' plotly Multiple #' #' Generates multiple plotly graphics, driven by specs in a data frame #' #' Generates multiple \code{plotly} traces and combines them with \code{plotly::subplot}. The traces are controlled by specifications in data frame \code{data} plus various arguments. \code{data} must contain these variables: \code{x}, \code{y}, and \code{tracename} (if \code{color} is not an "AsIs" color such as \code{~ I('black')}), and can contain these optional variables: \code{xhi}, \code{yhi} (rows containing \code{NA} for both \code{xhi} and \code{yhi} represent points, and those with non-\code{NA} \code{xhi} or \code{yhi} represent segments, \code{connect} (set to \code{TRUE} for rows for points, to connect the symbols), \code{legendgroup} (see \code{plotly} documentation), and \code{htext} (hovertext). If the \code{color} argument is given and it is not an "AsIs" color, the variable named in the \code{color} formula must also be in \code{data}. Likewise for \code{size}. If the \code{multplot} is given, the variable given in the formula must be in \code{data}. If \code{strata} is present, another level of separate plots is generated by levels of \code{strata}, within levels of \code{multplot}. #' #' If \code{fitter} is specified, x,y coordinates for an individual plot are #' run through \code{fitter}, and a line plot is made instead of showing data points. Alternatively you can specify \code{fitter='ecdf'} to compute and plot emirical cumulative distribution functions. #' #' @param data input data frame #' @param x formula specifying the x-axis variable #' @param y formula for y-axis variable #' @param xhi formula for upper x variable limits (\code{x} taken to be lower value) #' @param yhi formula for upper y variable limit (\code{y} taken to be lower value) #' @param htext formula for hovertext variable #' @param multplot formula specifying a variable in \code{data} that when stratified on produces a separate plot #' @param strata formula specifying an optional stratification variable #' @param fitter a fitting such as \code{loess} that comes with a \code{predict} method. Alternatively specify \code{fitter='ecdf'} to use an internal function for computing and displaying ECDFs, which moves the analysis variable from the y-axis to the x-axis #' @param color \code{plotly} formula specifying a color variable or e.g. \code{~ I('black')}. To keep colors constant over multiple plots you will need to specify an AsIs color when you don't have a variable representing color groups. #' @param size \code{plotly} formula specifying a symbol size variable or AsIs #' @param showpts if \code{fitter} is given, set to \code{TRUE} to show raw data points in addition to smooth fits #' @param rotate set to \code{TRUE} to reverse the roles of \code{x} and \code{y}, for example to get horizontal dot charts with error bars #' @param xlab x-axis label. May contain html. #' @param ylab a named vector of y-axis labels, possibly containing html (see example below). The names of the vector must correspond to levels of the \code{multplot} variable. \code{ylab} can be unnamed if \code{multplot} is not used. #' @param ylabpos position of y-axis labels. Default is on top left of plot. Specify \code{ylabpos='y'} for usual y-axis placement. #' @param xlim 2-vector of x-axis limits, optional #' @param ylim 2-vector of y-axis limits, optional #' @param shareX specifies whether x-axes should be shared when they align vertically over multiple plots #' @param shareY specifies whether y-axes should be shared when they align horizontally over multiple plots #' @param nrows the number of rows to produce using \code{subplot} #' @param ncols the number of columns to produce using \code{subplot} (specify at most one of \code{nrows,ncols}) #' @param height height of the combined image in pixels #' @param width width of the combined image in pixels #' @param colors the color palette. Leave unspecified to use the default \code{plotly} palette #' @param alphaSegments alpha transparency for line segments (when \code{xhi} or \code{yhi} is not \code{NA}) #' @param alphaCline alpha transparency for lines used to connect points #' @param digits number of significant digits to use in constructing hovertext #' @param zeroline set to \code{FALSE} to suppress vertical line at x=0 #' #' @return \code{plotly} object produced by \code{subplot} #' @author Frank Harrell #' @examples #' \dontrun{ #' set.seed(1) #' pts <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'), yhi=NA, #' tracename='mean', legendgroup='mean', #' connect=TRUE, size=4) #' #' pts$y <- round(runif(nrow(pts)), 2) #' #' segs <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'), #' tracename='limits', legendgroup='limits', #' connect=NA, size=6) #' segs$y <- runif(nrow(pts)) #' segs$yhi <- segs$y + runif(nrow(pts), .05, .15) #' #' z <- rbind(pts, segs) #' #' xlab <- labelPlotmath('X<sub>12</sub>', 'm/sec<sup>2</sup>', html=TRUE) #' ylab <- c(y1=labelPlotmath('Y1', 'cm', html=TRUE), #' y2='Y2', #' y3=labelPlotmath('Y3', 'mm', html=TRUE)) #' #' W=plotlyM(z, multplot=~v, color=~g, xlab=xlab, ylab=ylab, ncols=2, #' colors=c('black', 'blue')) #' #' W2=plotlyM(z, multplot=~v, color=~I('black'), xlab=xlab, ylab=ylab, #' colors=c('black', 'blue')) #' #' } #' @export plotlyM <- function(data, x=~x, y=~y, xhi=~xhi, yhi=~yhi, htext=NULL, multplot=NULL, strata=NULL, fitter=NULL, color=NULL, size=NULL, showpts=! length(fitter), rotate=FALSE, xlab=NULL, ylab=NULL, ylabpos=c('top', 'y'), xlim=NULL, ylim=NULL, shareX=TRUE, shareY=FALSE, height=NULL, width=NULL, nrows=NULL, ncols=NULL, colors=NULL, alphaSegments=1, alphaCline=0.3, digits=4, zeroline=TRUE) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") auto <- .Options$plotlyauto if(length(auto) && auto) height <- width <- NULL ylabpos <- match.arg(ylabpos) if(rotate) { xf <- y #~ y yf <- x #~ x xfe <- yhi #~ yhi yfe <- xhi #~ xhi } else { xf <- x #~ x yf <- y #~ y xfe <- xhi #~ xhi yfe <- yhi #~ yhi } xn <- all.vars(xf) #x) yn <- all.vars(yf) #y) xhin <- all.vars(xfe) #xhi) yhin <- all.vars(yfe) #yhi) n <- nrow(data) if(! length(multplot)) { multplot <- ~ .v. data$.v. <- rep(' ', n) } else data$.v. <- data[[all.vars(multplot)]] vlevs <- levels(as.factor(data$.v.)) lastv <- vlevs[length(vlevs)] strpres <- length(strata) > 0 strata <- if(strpres) as.factor(data[[all.vars(strata)]]) else as.factor(rep('', nrow(data))) stlevs <- levels(strata) lasts <- stlevs[length(stlevs)] if(! length(nrows) && ! length(ncols) && strpres) ncols <- length(stlevs) if(length(ylab) && ! length(names(ylab))) names(ylab) <- vlevs if(! length(ylab)) ylab <- structure(vlevs, names=vlevs) fmt <- function(x) htmlSN(x, digits=digits) nam <- names(data) if(xhin %nin% nam) data[[xhin]] <- rep(NA, n) if(yhin %nin% nam) data[[yhin]] <- rep(NA, n) if('connect' %nin% nam) data$connect <- rep(FALSE, n) if('tracename' %in% nam && 'legendgroup' %nin% nam) data$legendgroup <- data$tracename if(length(color)) { ## ~ I('black') will not show inherits('AsIs') but all.vars is char(0) colasis <- ! length(all.vars(color)) traceform <- if(colasis) ~ tracename legendgroupform <- if(colasis) ~ legendgroup colvar <- if(! colasis) all.vars(color) } else if(length(size)) { sizeasis <- ! length(all.vars(color)) traceform <- if(sizeasis) ~ tracename legendgroupform <- if(sizeasis) ~ legendgroup sizevar <- if(! sizeasis) all.vars(size) } else { traceform <- if('tracename' %in% nam) ~ tracename legendgroupform <- if('legendgroup' %in% nam) ~ legendgroup colasis <- FALSE colvar <- NULL sizeasis <- FALSE sizevar <- NULL } if(length(color)) legendgroupform <- color usualfitter <- length(fitter) && is.function(fitter) is.ecdf <- length(fitter) && is.character(fitter) && fitter == 'ecdf' xpresent <- ! is.ecdf runfit <- if(usualfitter) function() { xv <- all.vars(xf) yv <- all.vars(yf) x <- pt[[xv]] y <- pt[[yv]] g <- if(length(colvar)) pt[[colvar]] else rep('', nrow(pt)) g <- as.factor(g) d <- data.frame(x, y, g) Dp <- NULL xgrid <- seq(min(x, na.rm=TRUE), max(x, na.rm=TRUE), length=150) dx <- data.frame(x = xgrid) for(gv in levels(g)) { f <- fitter(y ~ x, data=subset(d, g == gv)) y <- predict(f, newdata=dx) dp <- cbind(dx, y, g=gv) Dp <- rbind(Dp, dp) } names(Dp) <- c(xv, yv, if(length(colvar)) colvar else 'g') Dp } else if(is.ecdf) function() { yv <- all.vars(xf) y <- pt[[yv]] g <- if(length(colvar)) pt[[colvar]] else rep('', nrow(pt)) g <- as.factor(g) Dp <- NULL rng <- range(y, na.rm=TRUE) for(gv in levels(g)) { j <- g == gv & ! is.na(y) yg <- sort(y[j]) n <- length(yg) vals <- unique(yg) # see stats::ecdf a <- approx(vals, cumsum(tabulate(match(yg, vals))) / n, method='constant', yleft=0, yright=1, f=0, ties='ordered', xout=vals) delta <- diff(rng) * 0.025 a$x <- c(min(a$x) - delta, a$x, max(a$x) + delta) a$y <- c(0, a$y, 1) dp <- data.frame(x = a$x, y = a$y, g=gv) Dp <- rbind(Dp, dp) } names(Dp) <- c(yv, 'ecdf', if(length(colvar)) colvar else 'g') Dp } xlabc <- if(length(xlab)) paste0(xlab, ': ') llab <- ifelse('tracename' %in% nam, as.character(data$tracename), 'Limits') wl <- function(n, hin) paste0(xlabc, fmt(data[[hin]]), '<br>', llab, ':[', fmt(data[[n]]), ', ', fmt(data[[hin]]), ']') if(! length(htext)) { nhi <- is.na(data[[xhin]]) + is.na(data[[yhin]]) whi <- ifelse(nhi == 2, 'xy', ## which vars missing hi? ifelse(nhi == 0, '', ifelse(is.na(data[[xhin]]), 'x', 'y'))) data$htxt <- ifelse(whi == 'xy', paste0(xlabc, fmt(data[[xn]]), '<br>', ylab[data$.v.], ':', fmt(data[[yn]])), ifelse(whi == 'x', wl(yn, yhin), ifelse(whi == 'y', wl(xn, xhin), paste0(xlabc, fmt(data[[xn]]), '<br>', xn, ' ', llab, ': [', fmt(data[[xn]]), ', ', fmt(data[[xhin]]), ']', '<br>', yn, ' ', llab, ': [', fmt(data[[yn]]), ', ', fmt(data[[yhin]]), ']')))) htext <- ~ htxt } p <- plotly::plot_ly(height=height, width=width, colors=colors) ## For some reason colors doesn't always take in add_* P <- list() iv <- 0 # axislab <- character(0) # axn1 <- if(rotate) 'yaxis' else 'xaxis' # axn2 <- if(rotate) 'xaxis' else 'yaxis' for(vn in vlevs) { for(sn in stlevs) { iv <- iv + 1 whichaxis <- if(iv == 1) '' else iv if(is.ecdf) { ax1 <- ylab[vn] ax2 <- 'Cumulative Probability' xn <- yn xf <- yf yf <- ~ ecdf } else { ax1 <- if(rotate) ylab[vn] else xlab ax2 <- if(rotate) xlab else ylab[vn] } w <- subset(data, .v. == vn & strata == sn) wxn <- w[[xn]] # if(xpresent) w[[xn]] else 1 : nrow(w) j <- if(length(colvar)) order(w[[colvar]], wxn) else if(length(sizevar)) order(w[[sizevar]], wxn) else order(wxn) w <- w[j, ] r <- p ipt <- is.na(w[[yhin]]) & is.na(w[[xhin]]) pt <- w[ipt, ] conct <- is.logical(pt$connect) && pt$connect[1] if(nrow(pt)) { if(length(fitter)) { Dp <- runfit() r <- plotly::add_lines(r, data=Dp, x=xf, y=yf, name=traceform, legendgroup=legendgroupform, showlegend=vn==lastv & sn==lasts, color=color, size=size, colors=colors, line=if(is.ecdf) list(shape='hv')) } if(showpts) { r <- plotly::add_markers(r, data=pt, x=xf, y=yf, name=traceform, legendgroup=legendgroupform, showlegend=vn==lastv & sn==lasts, color=color, size=size, text=htext, hoverinfo='text', colors=colors) if(conct) r <- plotly::add_lines(r, data=pt, x=xf, y=yf, name=traceform, legendgroup=legendgroupform, showlegend=FALSE, color=color, size=I(1), hoverinfo='none', colors=colors, alpha=alphaCline) } } s <- w[! ipt, ] if(nrow(s)) { ## If only one of xhi and yhi is missing, need to copy non-NA ## value from x/y. Must go to extra trouble to preserve factors m <- is.na(s[[xhin]]) if(any(m)) { a <- s[[xn]] a[! m] <- s[! m, xhin] s[[xhin]] <- a } m <- is.na(s[[yhin]]) if(any(m)) { a <- s[[yn]] a[! m] <- s[! m, yhin] s[[yhin]] <- a } r <- plotly::add_segments(r, data=s, x=xf, y=yf, xend=xfe, yend=yfe, name=traceform, legendgroup=legendgroupform, showlegend=vn==lastv & sn==lasts, color=color, size=size, colors=colors, alpha=alphaSegments, text=htext, hoverinfo='text') } ## rdocumentation.org/packages/plotly/versions/4.7.1/topics/add_annotations ## https://plot.ly/r/text-and-annotations/ ## plot.ly/r/text-and-annotations/#set-annotation-coordinate-references firstst <- length(stlevs) > 1 && vn == vlevs[1] if(firstst || ylabpos == 'top') { lab <- ax2 if(firstst) lab <- paste0(lab, '<br>', sn) r <- plotly::add_annotations(r, x=0, y=1, xref='paper', xanchor='left', yref='paper', yanchor='bottom', text=paste0('<b>', lab, '</b>'), showarrow=FALSE, font=list(color='rgba(25, 25, 112, 1.0)', size=14)) ## midnight blue } r <- plotly::layout(r, xaxis=list(title=ax1, range=xlim, zeroline=zeroline), yaxis=list(title=if(ylabpos == 'y') ax2 else '', range=ylim)) P[[iv]] <- r } } if(length(ncols)) nrows <- ceil(iv / ncols) if(length(stlevs) > 1) shareY <- TRUE if(length(P) == 1) P <- P[[1]] else { P <- if(length(nrows)) plotly::subplot(P, shareX=shareX, shareY=shareY, titleX=TRUE, titleY=TRUE, nrows=nrows) else plotly::subplot(P, shareX=shareX, shareY=shareY, titleX=TRUE, titleY=TRUE) } P } utils::globalVariables('.v.') ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/fit.mult.impute.s���������������������������������������������������������������������������0000644�0001762�0000144�00000016462�14275727615�014777� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������fit.mult.impute <- function(formula, fitter, xtrans, data, n.impute=xtrans$n.impute, fit.reps=FALSE, dtrans, derived, vcovOpts=NULL, pr=TRUE, subset, ...) { call <- match.call() if(deparse(substitute(fitter))[1] == 'lm') warning('If you use print, summary, or anova on the result, lm methods use the\nsum of squared residuals rather than the Rubin formula for computing\nresidual variance and standard errors. It is suggested to use ols\ninstead of lm.') using.Design <- FALSE used.mice <- any(class(xtrans)=='mids') if (used.mice && !requireNamespace("mice", quietly = TRUE)) stop("This data requires the 'mice' package.") if(used.mice && missing(n.impute)) n.impute <- xtrans$m fits <- if(fit.reps) vector('list', n.impute) stats.ok2average <- c('linear.predictors','fitted.values','stats', 'means', 'icoef', 'scale', 'center', 'y.imputed') for(i in 1 : n.impute) { if(used.mice) { completed.data <- mice::complete(xtrans, i) for(impvar in names(completed.data)) if(length(attr(completed.data[[impvar]], 'contrasts'))) attr(completed.data[[impvar]], 'contrasts') <- NULL } else { completed.data <- data imputed.data <- impute.transcan(xtrans, imputation=i, data=data, list.out=TRUE, pr=FALSE, check=FALSE) ## impute.transcan works for aregImpute completed.data[names(imputed.data)] <- imputed.data } if(!missing(dtrans)) completed.data <- dtrans(completed.data) if(!missing(derived)) { stop('derived variables in fit.mult.imputed not yet implemented') eval(derived, completed.data) } if(using.Design) options(Design.attr=da) f <- if(missing(subset)) fitter(formula, data=completed.data, ...) else fitter(formula, data=completed.data[subset,], ...) ## For some reason passing subset= causes model.frame bomb in R if(fit.reps) fits[[i]] <- f cof <- f$coef v <- do.call('vcov', c(list(object=f, intercepts='all'), vcovOpts)) if(i == 1) { if(inherits(f, 'orm') && length(f$na.action) && length(f$na.action$nmiss) && f$na.action$nmiss[1] > 0) warning('When using fit.mult.impute with orm, there should not be any missing\nY values because different imputations will result in differing numbers\nof intercepts') assign <- f$assign ns <- num.intercepts(f) ik <- coef.intercepts <- NULL if(ns > 0) { ik <- attr(v, 'intercepts') # intercepts kept in f$var if(length(ik)) { if(ik == 'all') ik <- 1 : ns else if(ik == 'none') ik <- 0 lenik <- length(ik); if(length(ik) == 1 && ik == 0) lenik <- 0 ## Shift parameter indexes to left b/c of omitted intercepts for orm if(lenik != ns) { for(j in 1 : length(assign)) assign[[j]] <- assign[[j]] - (ns - lenik) coef.intercepts <- ik } } } } if(length(ik)) cof <- c(cof[ik], cof[-(1 : ns)]) ## From Rainer Dyckerhoff to work correctly with models that have ## a scale parameter (e.g. psm). Check whether length of the ## coefficient vector is different from the the number of rows of ## the covariance matrix. If so, the model contains scale ## parameters that are not fixed at some value and we have to ## append the scale parameters to the coefficient vector. nvar0 <- length(cof) nvar <- nrow(v) if(nvar > nvar0) { cof <- c(cof, log(f$scale)) names(cof) <- c(names(f$coef), if((nvar - nvar0) == 1) "Log(scale)" else names(f$scale)) } if(i==1) { vavg <- 0*v p <- length(cof) bar <- rep(0, p) vname <- names(cof) cov <- matrix(0, nrow=p, ncol=p, dimnames=list(vname,vname)) astats <- NULL fitcomp <- names(f)[names(f) %in% stats.ok2average] if(length(fitcomp)) for(ncomp in fitcomp) astats[[ncomp]] <- f[[ncomp]] if(inherits(f,'Design') | inherits(f, 'rms')) { using.Design <- TRUE da <- f$Design } } vavg <- vavg + v bar <- bar + cof cof <- as.matrix(cof) cov <- cov + cof %*% t(cof) if(i > 1 && length(fitcomp)) for(ncomp in fitcomp) astats[[ncomp]] <- astats[[ncomp]] + f[[ncomp]] } vavg <- vavg / n.impute ## matrix \bar{U} in Rubin's notation bar <- bar / n.impute bar <- as.matrix(bar) ## Matrix B in Rubin's notation: cov <- (cov - n.impute * bar %*% t(bar)) / (n.impute - 1) U <- diag(vavg) B <- diag(cov) ## save the diagonals of U and B cov <- vavg + (n.impute + 1) / n.impute * cov ## final covariance matrix r <- diag(cov) / diag(vavg) names(r) <- vname tau <- (1 + 1/n.impute)*B/U missingInfo <- tau/(1+tau) dfmi <- (n.impute - 1)*((1 + 1/tau)^2) ## Same as dfmi <- (n.impute - 1) * (1 + U / (B * (1 + 1 / n.impute))) ^ 2 if(length(fitcomp)) for(ncomp in fitcomp) f[[ncomp]] <- astats[[ncomp]] / n.impute if(pr) { cat('\nVariance Inflation Factors Due to Imputation:\n\n') print(round(r,2)) cat('\nRate of Missing Information:\n\n') print(round(missingInfo,2)) cat('\nd.f. for t-distribution for Tests of Single Coefficients:\n\n') print(round(dfmi,2)) if(length(fitcomp)) { cat('\nThe following fit components were averaged over the', n.impute, 'model fits:\n\n') cat(' ', fitcomp, '\n\n') } } f$coefficients <- drop(bar) if(length(coef.intercepts)) attr(f$coefficients, 'intercepts') <- coef.intercepts attr(cov, 'intercepts') <- ik f$var <- cov f$variance.inflation.impute <- r f$missingInfo <- missingInfo f$dfmi <- dfmi f$fits <- fits f$formula <- formula f$assign <- assign f$call <- call if(using.Design) options(Design.attr=NULL) class(f) <- c('fit.mult.impute', class(f)) f } ## orm fit$var has only middle intercept ## fit.mult.impute from orm has all intercepts vcov.fit.mult.impute <- function(object, regcoef.only=TRUE, intercepts='mid', ...) { if(inherits(object, 'orm')) return(NextMethod('vcov', object, regcoef.only=regcoef.only, intercepts=intercepts, ...)) ns <- num.intercepts(object) v <- object$var if(ns == 0) return(v) vari <- attr(v, 'intercepts') lvari <- length(vari) if(is.character(intercepts)) { switch(intercepts, mid = { if(lvari > 0L && lvari != 1L) stop('requested middle intercept but more than one intercept stored in object$var') return(v) }, all = { if(lvari > 0 && lvari < ns) stop('requested all intercepts but not all stored in object$var') return(v) }, none = if(inherits(object, 'orm') && lvari == 1) return(v[-1, -1, drop=FALSE]) else return(v[-(1 : ns),-(1 : ns), drop=FALSE])) } ## intercepts is integer scalar or vector if(lvari && isTRUE(all.equal(sort(vari), sort(intercepts)))) return(v) if(length(intercepts) == ns) return(v) if(length(intercepts) > ns) stop('more intercepts requested than in model') i <- c(intercepts, (ns + 1) : ncol(v)) v[i, i, drop=FALSE] } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/dotchart3.s���������������������������������������������������������������������������������0000644�0001762�0000144�00000043571�14247426053�013616� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dotchart3 <- function (x, labels = NULL, groups = NULL, gdata = NULL, cex = par("cex"), pch = 21, gpch = pch, bg = par("bg"), color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlim = range(c(x, gdata), na.rm=TRUE), main = NULL, xlab = NULL, ylab = NULL, auxdata=NULL, auxtitle=NULL, auxgdata=NULL, axisat=NULL, axislabels=NULL, cex.labels = cex, cex.group.labels = cex.labels*1.25, cex.auxdata = cex, groupfont=2, auxwhere=NULL, height=NULL, width=NULL, ...) { opar <- par("mai", "mar", "cex", "yaxs") on.exit(par(opar)) par(cex = cex, yaxs = "i") if (! is.numeric(x)) stop("'x' must be a numeric vector or matrix") x <- as.matrix(x) n <- nrow(x) nc <- ncol(x) pch <- rep(pch, length.out=nc) if(! length(labels)) labels <- rownames(x) if(! length(labels)) stop('labels not defined') if(length(groups)) groups <- as.factor(groups) glabels <- levels(groups) plot.new() linch <- max(strwidth(labels, "inch"), na.rm = TRUE) if (! length(glabels)) { ginch <- 0 goffset <- 0 } else { ginch <- max(strwidth(glabels, "inch", cex=cex.group.labels, font=groupfont), na.rm = TRUE) goffset <- 0.4 } if(length(labels) + length(glabels) > 0) { nmai <- par("mai") nmai[2L] <- max(nmai[2L], nmai[4L] + max(linch + goffset, ginch) + 0.1) ## Run strwidth separately because on of the 3 variables might ## be an expression, in which case an overall c(...) would combine the ## widths of character vectors if(length(auxdata) + length(auxgdata) > 0) nmai[4L] <- .2 + 1.1 * max(strwidth(auxtitle, 'inch', cex=cex.auxdata), strwidth(auxdata, 'inch', cex=cex.auxdata), strwidth(auxgdata, 'inch', cex=cex.auxdata)) par(mai = nmai) } if (! length(groups)) { o <- n:1L y <- o ylim <- c(.5, n + .5) x <- x[o, , drop=FALSE] labels <- labels[o] if(length(auxdata)) auxdata <- auxdata[o] } else { # Added: For each group reverse order of data so plotting will # put first levels at top o <- sort.list(as.numeric(groups), decreasing = TRUE) groups <- groups[o] # for(g in levels(groups)) { # i <- groups == g # o[i] <- rev(o[i]) # } x <- x[o, , drop=FALSE] # ascending within region labels <- labels[o] if(length(auxdata)) auxdata <- auxdata[o] # End added # groups <- groups[o] (put earlier) color <- rep(color, length.out = length(groups))[o] lcolor <- rep(lcolor, length.out = length(groups))[o] offset <- cumsum(c(0, diff(as.numeric(groups)) != 0)) y <- 1L:n + 2 * offset ylim <- range(0.5, y + 1.5) # range(0, y + 2) } plot.window(xlim = xlim, ylim = ylim, log = "") lheight <- par("csi") if(length(labels)) { linch <- max(strwidth(labels, "inch", cex=cex.labels), na.rm = TRUE) loffset <- (linch + 0.1) / lheight # was line=loffset mtext(labels, side = 2, line = .1*loffset, at = y, adj = 1, col = color, las = 2, cex = cex.labels, ...) } abline(h = y, lty = "dotted", col = lcolor) if(length(auxtitle)) { upedge <- par('usr')[4] outerText(auxtitle, upedge + strheight(auxtitle, cex=cex) / 2, cex=cex) } gpos <- if(length(groups)) rev(cumsum(rev(tapply(groups, groups, length)) + 2) - 1) if(length(auxdata) + length(auxgdata) > 0) outerText(c(auxdata, auxgdata), c(y, if(length(auxgdata)) gpos), cex=cex.auxdata) for(i in 1:nc) points(x[,i], y, pch = pch[i], col = color, bg = bg) if(length(groups)) { ginch <- max(strwidth(glabels, "inch", font=groupfont, cex=cex.group.labels), na.rm = TRUE) goffset <- (max(linch + 0.2, ginch, na.rm = TRUE) + 0.1)/lheight mtext(glabels, side = 2, line = .2, at = gpos, adj = 1, # was adj=0 col = gcolor, las = 2, cex = cex.group.labels, font=groupfont, ...) if (length(gdata)) { abline(h = gpos, lty = "dotted") if(is.matrix(gdata)) for(j in 1:ncol(gdata)) points(gdata[, j], gpos, pch=gpch[j], col=gcolor, bg=bg, ...) else points(gdata, gpos, pch = gpch, col = gcolor, bg = bg, ...) } } if(length(axisat)) axis(1, at=axisat, labels=axislabels) else axis(1) box() title(main = main, xlab = xlab, ylab = ylab, ...) invisible() } dotchartp <- function (x, labels = NULL, groups = NULL, gdata = NULL, xlim = range(c(x, gdata), na.rm=TRUE), main=NULL, xlab = NULL, ylab = '', auxdata=NULL, auxtitle=NULL, auxgdata=NULL, auxwhere=c('right', 'hover'), symbol='circle', col=colorspace::rainbow_hcl, legendgroup=NULL, axisat=NULL, axislabels=NULL, sort=TRUE, digits=4, dec=NULL, height=NULL, width=700, layoutattr=FALSE, showlegend=TRUE, ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") auxwhere <- match.arg(auxwhere) fmt <- if(length(dec)) function(x) format(round(x, dec)) else function(x) format(x, digits=digits) mu <- markupSpecs$html lspace <- mu$lspace if (! is.numeric(x)) stop("'x' must be a numeric vector or matrix") x <- as.matrix(x) n <- nrow(x) nc <- ncol(x) symbol <- rep(symbol, length.out=nc) col <- if(length(col)) { if(! is.function(col)) col else col(ncol(x)) } col <- rep(col, length.out=nc) if(length(gdata)) { gdata <- as.matrix(gdata) if(ncol(gdata) != nc) stop('gdata must have same columns as x') } if(! length(labels)) labels <- rownames(x) if(! length(labels)) stop('labels not defined') if(length(groups)) groups <- as.factor(groups) glabels <- levels(groups) groups.pres <- length(groups) > 0 if(! groups.pres) y <- n : 1 else { if(is.character(sort) || sort) { o <- if(is.character(sort)) { if(sort == 'ascending') order(x[, 1]) else order(-x[, 1]) } else order(as.integer(groups)) ###, as.integer(labels)) groups <- groups[o] x <- x[o, , drop=FALSE] labels <- labels[o] if(length(auxdata)) auxdata <- if(is.matrix(auxdata)) auxdata[o,, drop=FALSE] else auxdata[o] } lgroups <- Lag(as.character(groups)) lgroups[1] <- 'NULL' first.in.group <- groups != lgroups y <- cumsum(1 + 1.5 * first.in.group) yg <- y[first.in.group] - 1 y <- -y yg <- -yg } X <- x[, 1] tly <- y auxd <- NULL auxh <- auxwhere == 'hover' auxt <- if(length(auxtitle) && auxtitle != '') # ifelse(auxh, paste0(auxtitle, '<br>'), paste0(auxtitle, ':')) paste0(auxtitle, ':') else '' if(auxh) auxd <- if(length(auxdata)) paste0(auxt, if(is.matrix(auxdata)) auxdata[, 1] else auxdata) else rep('', length(X)) if(length(gdata) || (auxh && length(auxgdata))) { X <- c(X, if(length(gdata)) gdata[, 1] else rep(NA, length(auxgdata))) tly <- c(tly, yg) if(auxh) auxd <- c(auxd, if(length(auxgdata)) paste0(auxt, # was lspace after auxt, if(is.matrix(auxgdata)) auxgdata[, 1] else auxgdata) else rep('', length(yg))) } nx <- if(nc == 1) '' else colnames(x)[1] ht <- if(nx == '') fmt(X) else paste(nx, '<br>', fmt(X)) if(auxh && any(auxd != '')) ht <- paste0(ht, '<br>', auxd) # <br> was lspace d <- data.frame(X, y=tly, ht=ht) if(length(height) && height == 'auto') height <- plotlyParm$heightDotchart(n) auto <- .Options$plotlyauto if(length(auto) && auto) height <- width <- NULL p <- plotly::plot_ly(d, x=~ X, y=~ y, mode='markers', type='scatter', marker=list(symbol=symbol[1], color=col[1]), text = ~ ht, hoverinfo = 'text', name=nx, legendgroup=if(length(legendgroup)) legendgroup[1], width = width, height=height) if(nc > 1) for(i in 2 : nc) { X <- x[, i] tly <- y if(length(gdata)) { X <- c(X, gdata[, i]) tly <- c(tly, yg) } ax <- if(length(auxdata) && is.matrix(auxdata)) auxdata[, i] else '' d <- data.frame(X=X, y=tly, ht=paste0(colnames(x)[i], '<br>', fmt(X), lspace, ax)) p <- plotly::add_markers(p, data=d, x=~ X, y=~ y, #mode='markers', marker=list(symbol=symbol[i], color=col[i]), text = ~ ht, hoverinfo='text', legendgroup=if(length(legendgroup)) legendgroup[i], name=colnames(x)[i]) } dx <- 0.1 * diff(xlim) lenaux <- length(auxdata) + length(auxgdata) if(auxwhere == 'right' && lenaux) { yb <- tb <- NULL if(length(auxdata)) { yb <- y tb <- auxdata } if(groups.pres && length(auxgdata)) { yb <- c(yb, yg) tb <- c(tb, auxgdata) } if(length(auxtitle)) { yb <- c(yb, min(yb) - 2) tb <- c(tb, auxtitle) } if(length(auxgdata)) { yb <- c(yb, yg) tb <- c(tb, paste('<b>', auxgdata, '</b>', sep='')) } z <- data.frame(xb=xlim[2] + dx, yb, tb) p <- plotly::add_text(p, data=z, x=~ xb, y=~ yb, text=~ tb, # mode='text', textposition='left', textfont=list(size=10), hoverinfo='none', name='') } if(length(axisat)) {tlx <- axisat; ttx <- axislabels} else { tlx <- pretty(xlim, 10) tlxs <- pretty(xlim, 5) ttx <- format(tlx) for(j in 1 : length(tlx)) if(! any(abs(tlxs - tlx[j]) < 1e-10)) ttx[j] <- '' } tly <-y tty <- as.character(labels) if(groups.pres) { tly <- c(tly, yg) tty <- c(tty, paste('<b>', glabels, '</b>', sep='')) } if(! length(ylab)) ylab <- '' tty <- ifelse(nchar(tty) >= 40, mu$smaller2(tty), ifelse(nchar(tty) > 20, mu$smaller(tty), tty)) leftmargin <- plotlyParm$lrmargin(tty) rx <- if(auxwhere == 'right' && lenaux > 0) dx else dx / 2 ylim <- c(min(y) - .15, max(y) + 1.5) lo <- list(title=main, xaxis=list(title=xlab, range=c(xlim[1] - 0.2 * dx, xlim[2] + rx), zeroline=FALSE, tickvals=tlx, ticktext=ttx), yaxis=list(title=ylab, range=ylim, zeroline=FALSE, tickvals=tly, ticktext=tty), # width=width, # height=if(length(height) && height == 'auto') # plotlyParm$heightDotchart(n) else height, autosize=(length(width) + length(height)) == 0, margin=list(l=leftmargin, t=5), showlegend=showlegend) if(layoutattr) { attr(p, 'layout') <- lo return(p) } plotly::layout(p, title = main, xaxis = list(title=xlab, range=c(xlim[1] - 0.2 * dx, xlim[2] + rx), zeroline=FALSE, tickvals=tlx, ticktext=ttx), yaxis = list(title=ylab, range=ylim, zeroline=FALSE, tickvals=tly, ticktext=tty), # width = width, # height= if(length(height) && height == 'auto') # plotlyParm$heightDotchart(n) else height, # autosize=(length(width) + length(height)) == 0, margin = list(l=leftmargin, t=5), legendgroup=legendgroup, showlegend = showlegend) } summaryD <- function(formula, data=NULL, fun=mean, funm=fun, groupsummary=TRUE, auxvar=NULL, auxtitle='', auxwhere=c('hover', 'right'), vals=length(auxvar) > 0, fmtvals=format, symbol=if(use.plotly) 'circle' else 21, col=if(use.plotly) colorspace::rainbow_hcl else 1:10, legendgroup=NULL, cex.auxdata=.7, xlab=v[1], ylab=NULL, gridevery=NULL, gridcol=gray(.95), sort=TRUE, ...) { use.plotly <- grType() == 'plotly' auxwhere <- match.arg(auxwhere) if(! missing(fmtvals)) vals <- TRUE data <- if(! length(data)) environment(formula) else list2env(data, parent=environment(formula)) if(length(auxvar) && is.character(auxvar) && missing(auxtitle)) auxtitle <- auxvar v <- all.vars(formula) m <- length(v) - 1 yn <- v[1]; xn <- v[-1] two <- length(xn) == 2 y <- get(yn, envir=data) x1 <- get(xn[1], envir=data) x2 <- if(two) get(xn[2], envir=data) s <- summarize(y, if(two) llist(x1, x2) else llist(x1), fun, type='matrix', keepcolnames=TRUE) ## if(is.matrix(s$y)) colnames(s$y) <- colnames(y) cx1 <- if(is.factor(s$x1)) as.integer(s$x1) else s$x1 yy <- if(is.matrix(s$y)) s$y[, 1, drop=FALSE] else s$y if(sort) s <- if(two) s[order(cx1, - yy), ] else s[order(- yy), ] auxd <- function(z) { sy <- z$y if(length(auxvar)) { if(! is.matrix(sy)) stop('auxvar is only used when fun returns > 1 statistic') f <- if(vals) fmtvals(sy[, auxvar]) sy <- if(is.numeric(auxvar)) sy[, -auxvar, drop=FALSE] else sy[, setdiff(colnames(sy), auxvar), drop=FALSE] } else f <- if(vals) fmtvals(if(is.matrix(sy)) sy[, 1] else sy) list(sy=sy, fval=f) # sy = remaining y, fval = formatted auxvar } z <- auxd(s) if(two) { if(groupsummary) { s2 <- summarize(y, llist(x1), funm, type='matrix', keepcolnames=TRUE) z2 <- auxd(s2) } z <- auxd(s) col <- if(length(col)) { if(! is.function(col)) col else col(if(is.matrix(z$sy)) ncol(z$sy) else 1) } ## if already sorted (group variable order first) don't re-sort ## sort causes problems to dotchart3 res <- if(use.plotly) dotchartp(z$sy, s$x2, groups=s$x1, auxdata=z$fval, auxtitle=if(vals) auxtitle, auxwhere=auxwhere, cex.auxdata=cex.auxdata, gdata =if(groupsummary) z2$sy, auxgdata=if(groupsummary) z2$fval, xlab=xlab, ylab=ylab, symbol=symbol, col=col, legendgroup=legendgroup, sort=FALSE, ...) else dotchart3(z$sy, s$x2, groups=s$x1, auxdata=z$fval, auxtitle=if(vals) auxtitle, cex.auxdata=cex.auxdata, gdata =if(groupsummary) z2$sy, auxgdata=if(groupsummary) z2$fval, xlab=xlab, ylab=ylab, pch=symbol, ...) } else res <- if(use.plotly) dotchartp(z$sy, s$x1, auxdata=z$fval, auxtitle=if(vals) auxtitle, auxwhere=auxwhere, cex.auxdata=cex.auxdata, xlab=xlab, ylab=ylab, symbol=symbol, col=col, legendgroup=legendgroup, sort=FALSE, ...) else dotchart3(z$sy, s$x1, auxdata=z$fval, auxtitle=if(vals) auxtitle, pch=symbol, cex.auxdata=cex.auxdata, xlab=xlab, ylab=ylab, ...) if(! use.plotly && length(gridevery)) { xmin <- par('usr')[1] xmin <- ceiling(xmin / gridevery) * gridevery xmax <- if(length(xn) == 1) max(s$y, na.rm=TRUE) else max(c(s$y, s2$y), na.rm=TRUE) abline(v=seq(xmin, xmax, by=gridevery), col=gridcol) } if(use.plotly) res else invisible(res) } summaryDp <- function(formula, fun=function(x) c(Mean=mean(x, na.rm=TRUE), N=sum(! is.na(x))), overall=TRUE, xlim=NULL, xlab=NULL, data=NULL, subset=NULL, na.action=na.retain, ncharsmax=c(50, 30), digits=4, ...) { Y <- if(length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) X <- Y[-1] y <- Y[[1]] swr <- function(w, ...) sapply(strwrap(w, ..., simplify=FALSE), function(x) paste(x, collapse='<br>')) addbr <- markupSpecs$html$addBreak width <- ncharsmax[1]; minbreak <- ncharsmax[2] if(! length(xlab)) xlab <- swr(label(y, default=names(Y)[1]), width=width) major <- minor <- ht <- character(0) x <- numeric(0) funlabs <- names(fun(y)) nx <- names(X) if(overall) nx <- c(nx, 'Overall') for(v in nx) { if(v == 'Overall') { by <- rep('Overall', length(y)) bylab <- 'Overall' } else { by <- X[[v]] bylab <- addbr(label(by, default=v), minbreak=minbreak) } s <- summarize(y, by, fun) i <- order(- s[, 2]) s <- s[i, ] m <- s[, 2] faux <- paste0(funlabs[1], ': ', format(m, digits=digits)) if(NCOL(s) > 2) { j <- 0 aux <- s[-(1:2)] for(a in names(aux)) { j <- j + 1 faux <- paste0(faux, '<br>', funlabs[j + 1], ': ', format(aux[[a]], digits=digits)) } } major <- c(major, rep(bylab, length(m))) minor <- c(minor, if(v == 'Overall') '' else as.character(s[, 1])) ht <- c(ht, faux) x <- c(x, unname(m)) } if(! length(xlim)) { r <- range(x) xlim <- r + c(-1, 1) * diff(r) / 20 } dotchartpl(x, major, minor, htext=ht, xlim=xlim, xlab=xlab, ...) } ���������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/pairUpDiff.r��������������������������������������������������������������������������������0000644�0001762�0000144�00000014554�13736155724�013760� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Pair-up and Compute Differences ##' ##' This function sets up for plotting half-width confidence intervals for differences, sorting by descending order of differences within major categories, especially for dot charts as produced by [dotchartpl()]. Given a numeric vector `x` and a grouping (superpositioning) vector `group` with exactly two levels, computes differences in possibly transformed `x` between levels of `group` for the two observations that are equal on `major` and `minor`. If `lower` and `upper` are specified, using `conf.int` and approximate normality on the transformed scale to backsolve for the standard errors of estimates, and uses approximate normality to get confidence intervals on differences by taking the square root of the sum of squares of the two standard errors. Coordinates for plotting half-width confidence intervals are also computed. These intervals may be plotted on the same scale as `x`, having the property that they overlap the two `x` values if and only if there is no "significant" difference at the `conf.int` level. ##' @title pairUpDiff ##' @param x a numeric vector ##' @param major an optional factor or character vector ##' @param minor an optional factor or character vector ##' @param group a required factor or character vector with two levels ##' @param refgroup a character string specifying which level of `group` is to be subtracted ##' @param lower an optional numeric vector giving the lower `conf.int` confidence limit for `x` ##' @param upper similar to `lower` but for the upper limit ##' @param minkeep the minimum value of `x` required to keep the observation. An observation is kept if either `group` has `x` exceeding or equalling `minkeep`. Default is to keep all observations. ##' @param sortdiff set to `FALSE` to avoid sorting observations by descending between-`group` differences ##' @param conf.int confidence level; must have been the value used to compute `lower` and `upper` if they are provided ##' @return a list of two objects both sorted by descending values of differences in `x`. The `X` object is a data frame that contains the original variables sorted by descending differences across `group` and in addition a variable `subscripts` denoting the subscripts of original observations with possible re-sorting and dropping depending on `sortdiff` and `minkeep`. The `D` data frame contains sorted differences (`diff`), `major`, `minor`, `sd` of difference, `lower` and `upper` confidence limits for the difference, `mid`, the midpoint of the two `x` values involved in the difference, `lowermid`, the midpoint minus 1/2 the width of the confidence interval, and `uppermid`, the midpoint plus 1/2 the width of the confidence interval. Another element returned is `dropped` which is a vector of `major` / `minor` combinations dropped due to `minkeep`. ##' @author Frank Harrell ##' @export ##' @md ##' @examples ##' x <- c(1, 4, 7, 2, 5, 3, 6) ##' pairUpDiff(x, c(rep('A', 4), rep('B', 3)), ##' c('u','u','v','v','z','z','q'), ##' c('a','b','a','b','a','b','a'), 'a', x-.1, x+.1) pairUpDiff <- function(x, major=NULL, minor=NULL, group, refgroup, lower=NULL, upper=NULL, minkeep=NULL, sortdiff=TRUE, conf.int=0.95) { n <- length(x) major <- if(! length(major)) rep(' ', n) else as.character(major) minor <- if(! length(minor)) rep(' ', n) else as.character(minor) ## Note: R will not let you use z[cbind(...)] if one of the dimensions ## is of length 1 and equal to ''; needed to use ' ' group <- as.character(group) glev <- unique(group) if(length(glev) != 2) stop('group must have two distinct values') if(refgroup %nin% glev) stop('refgroup must be one of the group values') altgroup <- setdiff(glev, refgroup) mm <- c(major, minor) sep <- if(! any(grepl(':', mm))) ':' else if(! any(grepl('|', mm))) '|' else if(! any(grepl(';', mm))) ';' else if(! any(grepl('!', mm))) '!' else stop('major or minor contain all delimiters :|;!') m <- paste0(major, sep, minor) u <- unique(m) lu <- length(u) lowup <- length(lower) * length(upper) > 0 zcrit <- qnorm((1 + conf.int) / 2) ## See if any observations should be dropped dropped <- NULL if(length(minkeep)) { xa <- xb <- structure(rep(NA, lu), names=u) j <- group == refgroup xa[m[j]] <- x[j] j <- group == altgroup xb[m[j]] <- x[j] j <- ! is.na(xa + xb) & (xa < minkeep & xb < minkeep) if(any(j)) { dropped <- names(xa)[j] u <- setdiff(u, dropped) lu <- length(u) } } xa <- xb <- sda <- sdb <- diff <- mid <- dsd <- dlower <- dupper <- dlowermid <- duppermid <- structure(rep(NA, lu), names=u) j <- (group == refgroup) & (m %nin% dropped) w <- m[j] xa[w] <- x[j] if(lowup) sda[w] <- 0.5 * (upper[j] - lower[j]) / zcrit j <- (group == altgroup) & (m %nin% dropped) w <- m[j] xb[w] <- x[j] if(lowup) sdb[w] <- 0.5 * (upper[j] - lower[j]) / zcrit diff[u] <- xb[u] - xa[u] if(lowup) { dsd[u] <- sqrt(sda[u] ^ 2 + sdb[u] ^ 2) dlower[u] <- diff[u] - zcrit * dsd[u] dupper[u] <- diff[u] + zcrit * dsd[u] } mid[u] <- (xa[u] + xb[u]) / 2. if(lowup) { dlowermid[u] <- mid[u] - 0.5 * zcrit * dsd[u] duppermid[u] <- mid[u] + 0.5 * zcrit * dsd[u] } k <- strsplit(u, sep) ma <- sapply(k, function(x) x[[1]]) mi <- sapply(k, function(x) x[[2]]) ww <- list(x, major, minor, group, lower, upper, subscripts=1:length(x)) Z <- if(lowup) data.frame(x, major, minor, group, lower, upper, subscripts=1 : length(x)) else data.frame(x, major, minor, group, subscripts=1 : length(x)) if(length(dropped)) Z <- Z[m %nin% dropped, ] if(sortdiff) { m <- paste0(Z$major, sep, Z$minor) ## diff[m] is a table lookup; difference will be same for both groups j <- order(Z$major, ifelse(is.na(diff[m]), -Inf, - diff[m])) Z <- Z[j, ] } ## Variables referenced below have already had observations dropped ## due to minkeep D <- data.frame(diff=diff[u], major=ma, minor=mi, sd=dsd[u], lower=dlower[u], upper=dupper[u], mid=mid[u], lowermid=dlowermid[u], uppermid=duppermid[u]) if(sortdiff) { j <- order(D$major, ifelse(is.na(D$diff), -Inf, -D$diff)) D <- D[j, ] } list(X=Z, D=D, dropped=dropped) } ����������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/na.keep.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000000567�12250441432�013230� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������na.keep <- function(mf) { w <- na.detail.response(mf) if(length(w)) class(w) <- 'keep' attr(mf, "na.action") <- w mf } naprint.keep <- function(x, ...) { if(length(x)) { cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n") print(unclass(x)) cat("\n") } invisible() } naresid.keep <- function(omit, x, ...) x �����������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/aregImpute.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000031423�12435343744�014020� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aregImpute <- function(formula, data, subset, n.impute=5, group=NULL, nk=3, tlinear=TRUE, type=c('pmm','regression','normpmm'), pmmtype=1, match=c('weighted','closest','kclosest'), kclosest=3, fweighted=0.2, curtail=TRUE, boot.method=c('simple', 'approximate bayesian'), burnin=3, x=FALSE, pr=TRUE, plotTrans=FALSE, tolerance=NULL, B=75) { acall <- match.call() type <- match.arg(type) match <- match.arg(match) boot.method <- match.arg(boot.method) if(pmmtype %nin% 1:3) stop('pmmtype must be 1, 2, or 3') if(pmmtype == 3) boot.method <- 'approximate bayesian' lgroup <- length(group) if(type == 'normpmm' && lgroup) stop('group makes no sense when type="normpmm"') if(type == 'normpmm' && ! tlinear) stop('type="normpmm" not implemented when tlinear=FALSE because no covariance matrix is available for right hand side beta for first canonical variate') if(! inherits(formula,'formula')) stop('formula must be a formula') nam <- all.vars(formula) m <- match.call(expand.dots = FALSE) Terms <- terms(formula, specials='I') m$formula <- formula m$match <- m$fweighted <- m$curtail <- m$x <- m$n.impute <- m$nk <- m$tlinear <- m$burnin <- m$type <- m$pmmtype <- m$group <- m$pr <- m$plotTrans <- m$tolerance <- m$boot.method <- m$B <- NULL m$na.action <- na.retain m[[1]] <- as.name("model.frame") z <- eval(m, sys.parent()) p <- length(z) n <- nrow(z) rnam <- row.names(z) if(length(rnam) == 0) rnam <- as.character(1:n) if(lgroup) { if(boot.method == 'approximate bayesian') stop('group not implemented for boot.method="approximate bayesian"') if(lgroup != n) stop('group should have length equal to number of observations') ngroup <- length(unique(group[! is.na(group)])) } linear <- nam[attr(Terms,'specials')$I] cat.levels <- vector('list',p) names(cat.levels) <- nam vtype <- rep('s', p); names(vtype) <- nam dof <- rep(NA, p); names(dof) <- nam na <- vector('list',p) names(na) <- nam nna <- integer(p); names(nna) <- nam xf <- matrix(as.double(1), nrow=n, ncol=p, dimnames=list(rnam,nam)) imp <- vector('list',p) names(imp) <- nam if(lgroup) group.inds <- imp for(i in 1:p) { xi <- z[[i]] ni <- nam[i] nai <- is.na(xi) na[[i]] <- (1:n)[nai] nna[i] <- nnai <- sum(nai) if(nnai > 0) imp[[ni]] <- matrix(NA, nrow=nnai, ncol=n.impute, dimnames=list(rnam[nai],NULL)) if(lgroup) { if(any(is.na(group[! nai]))) stop('NAs not allowed in group') if(length(unique(group[! nai])) != ngroup) stop(paste('not all',ngroup, 'values of group are represented in\n', 'observations with non-missing values of', ni)) group.inds[[i]] <- split((1:n)[! nai], group[! nai]) } iscat <- FALSE if(is.character(xi)) { xi <- as.factor(xi) lev <- levels(xi) iscat <- TRUE } else if(is.factor(xi)) { lev <- levels(xi) iscat <- TRUE } if(iscat) { if(length(lev) < 2) stop(paste(ni,'is constant')) tab <- table(xi) if(any(tab == 0)) stop(paste(ni,'has the following levels with no observations:', paste(names(tab)[tab == 0],collapse=' '))) if(any(tab < 5)) warning(paste(ni,'has the following levels with < 5 observations:', paste(names(tab)[tab < 5],collapse=' '), '\nConsider using the group parameter to balance bootstrap samples')) cat.levels[[ni]] <- lev xi <- as.integer(xi) vtype[ni] <- 'c' } else { u <- unique(xi[! nai]) if(length(u) == 1) stop(paste(ni,'is constant')) else if((length(nk) == 1 && nk == 0) || length(u) == 2 || ni %in% linear) vtype[ni] <- 'l' } xf[,i] <- xi ## Initialize imputed values to random sample of non-missings if(nnai > 0) xf[nai,i] <- sample(xi[! nai], nnai, replace=nnai > (n-nnai)) } z <- NULL wna <- (1:p)[nna > 0] if(length(wna) < 2 && missing(burnin)) burnin <- 0 nu <- apply(xf, 2, function(z)length(unique(z))) ## xf = original data matrix (categorical var -> integer codes) ## with current imputations rsq <- double(length(wna)); names(rsq) <- nam[wna] resampacc <- list() if(curtail) xrange <- apply(xf, 2, range) fits <- NULL for(iter in 1:(burnin + n.impute)) { if(pr) cat('Iteration',iter,'\r') for(i in wna) { nai <- na[[i]] ## subscripts of NAs on xf[i,] j <- (1:n)[-nai] ## subscripts of non-NAs on xf[i,] npr <- length(j) ytype <- if(tlinear && vtype[i] == 's')'l' else vtype[i] if(iter == (burnin + n.impute) && length(nk) > 1) { rn <- c('Bootstrap bias-corrected R^2', '10-fold cross-validated R^2', 'Bootstrap bias-corrected mean |error|', '10-fold cross-validated mean |error|', 'Bootstrap bias-corrected median |error|', '10-fold cross-validated median |error|') racc <- matrix(NA, nrow=6, ncol=length(nk), dimnames=list(rn, paste('nk=',nk,sep=''))) jj <- 0 for(k in nk) { jj <- jj + 1 f <- areg(xf[,-i,drop=FALSE], xf[,i], xtype=vtype[-i], ytype=ytype, nk=k, na.rm=FALSE, tolerance=tolerance, B=B, crossval=10) w <- c(f$r2boot, f$rsquaredcv, f$madboot, f$madcv, f$medboot, f$medcv) racc[,jj] <- w } resampacc[[nam[i]]] <- racc } if(lgroup) { ## insure orig. no. obs from each level of group s <- rep(NA, npr) for(ji in 1:ngroup) { gi <- (group.inds[[i]])[[ji]] s[gi] <- sample(gi, length(gi), replace=TRUE) } s <- s[! is.na(s)] } else { ## sample of non-NAs if(type == 'normpmm') s <- j else { s <- sample(j, npr, replace=TRUE) if(boot.method == 'approximate bayesian') { sorig <- s s <- sample(s, replace=TRUE) } } } nami <- nam[i] nm <- c(nami, nam[-i]) if(type != 'normpmm') { xch <- which(vtype == 'c') if(length(xch)) { nus <- apply(xf[s, xch, drop=FALSE], 2, function(z)length(unique(z))) xtf <- nus < nu[xch] if(any(xtf)) stop(paste('a bootstrap resample had too few unique values of the following variables:',paste(nam[xch[xtf]],collapse=','),sep='\n')) } } X <- xf[,-i,drop=FALSE] ## If there is only one variable that has any NAs, fits on ## non-bootstrapped samples will not vary across multiple imputations. ## Otherwise, fits vary because across the multiple imputations, ## predictors are updated from previous spells as target variables if(type == 'normpmm' && length(wna) < 2) { if(iter == 1) fits[[i]] <- f <- areg(X[s,], xf[s,i], xtype=vtype[-i], ytype=ytype, nk=min(nk), na.rm=FALSE, tolerance=tolerance) else f <- fits[[i]] } else f <- areg(X[s,], xf[s,i], xtype=vtype[-i], ytype=ytype, nk=min(nk), na.rm=FALSE, tolerance=tolerance) xdf <- f$xdf dof[nam[-i]] <- xdf dof[nami] <- f$ydf if(plotTrans) plot(f) rsq[nami] <- f$rsquared ## residuals off of transformed predicted values res <- f$residuals pti <- predict(f, X) # predicted transformed xf[,i] ## if type is normpmm only those elements corresponding to ## complete cases are used if(type == 'normpmm') { xpxi <- f$xpxi if(! length(xpxi)) stop('type="normpmm" cannot be used when any variable needing imputation is categorical or nonlinear') ## See mice package .norm.draw function px <- sum(xdf) sigma.star <- sqrt(sum(res^2)/rchisq(1, length(res) - px)) beta.star <- f$xcoefficients + t(chol(xpxi)) %*% rnorm(1 + px) * sigma.star pti[nai] <- predict(f, X[nai,,drop=FALSE], type='x') %*% beta.star } else if(type == 'pmm') { if(pmmtype %in% c(1,3)) { ## Match predicted complete cases using non-bootstrap ## beta with incomplete cases using bootstrap beta ## Foro pmmtype=3 use bootstrap vs. sample w/replacement bootstrap ss <- if(pmmtype == 1) j else sorig g <- areg(X[ss,], xf[ss,i], xtype=vtype[-i], ytype=ytype, nk=min(nk), na.rm=FALSE, tolerance=tolerance) ## This would not need to be run fresh at each mult. imp. ## iteration if only one variable were ever NA pti[j] <- predict(g, X[j,]) } } if(type != 'regression') { if(ytype == 'l') pti <- (pti - mean(pti))/sqrt(var(pti)) whichclose <- if(match == 'kclosest') j[whichClosek(pti[j], pti[nai], k=kclosest)] else if(match == 'closest') { ## Jitter predicted transformed values for non-NAs to randomly ## break ties in matching with predictions for NAs in xf[,i] ## Because of normalization used by fitter, pti usually ranges ## from about -4 to 4 pti[j] <- pti[j] + runif(npr,-.0001,.0001) ## For each orig. missing xf[,i] impute with non-missing xf[,i] ## that has closest predicted transformed value j[whichClosest(pti[j], pti[nai])] ## see Misc.s } else j[whichClosePW(pti[j], pti[nai], f=fweighted)] impi <- xf[whichclose,i] } else { ## type='regression' ## predicted transformed target var + random sample of res, ## for NAs r <- sample(res, length(nai), replace=length(nai) > length(res)) ptir <- pti[nai] + r ## predicted random draws on untransformed scale impi <- f$yinv(ptir, what='sample', coef=f$ycoefficients) if(curtail) impi <- pmin(pmax(impi, xrange[1,i]), xrange[2,i]) } xf[nai,i] <- impi if(iter > burnin) imp[[nam[i]]][,iter-burnin] <- impi } } if(pr) cat('\n') if(! x) xf <- NULL structure(list(call=acall, formula=formula, match=match, fweighted=fweighted, pmmtype=pmmtype, n=n, p=p, na=na, nna=nna, type=vtype, tlinear=tlinear, nk=min(nk), cat.levels=cat.levels, df=dof, n.impute=n.impute, imputed=imp, x=xf, rsq=rsq, resampacc=resampacc), class='aregImpute') } print.aregImpute <- function(x, digits=3, ...) { cat("\nMultiple Imputation using Bootstrap and PMM\n\n") dput(x$call) cat("\n") cat('n:',x$n,'\tp:',x$p, '\tImputations:',x$n.impute,' \tnk:',x$nk,'\n') cat('\nNumber of NAs:\n'); print(x$nna); cat('\n') info <- data.frame(type=x$type, d.f.=x$df, row.names=names(x$type)) print(info) if(x$tlinear) cat('\nTransformation of Target Variables Forced to be Linear\n') cat('\nR-squares for Predicting Non-Missing Values for Each Variable\nUsing Last Imputations of Predictors\n') print(round(x$rsq, digits)) racc <- x$resampacc if(length(racc)) { cat('\nResampling results for determining the complexity of imputation models\n\n') for(i in 1:length(racc)) { cat('Variable being imputed:', names(racc)[i], '\n') print(racc[[i]], digits=digits) cat('\n') } cat('\n') } invisible() } plot.aregImpute <- function(x, nclass=NULL, type=c('ecdf','hist'), datadensity=c("hist","none","rug","density"), diagnostics=FALSE, maxn=10, ...) { type <- match.arg(type) datadensity <- match.arg(datadensity) i <- x$imputed catg <- x$categorical lev <- x$cat.levels n.impute <- x$n.impute for(n in names(i)) { xi <- i[[n]] if(! length(xi)) next if(diagnostics) { r <- range(xi) for(j in 1:min(maxn,nrow(xi))) { plot(1:n.impute, xi[j,], ylim=r, xlab='Imputation', ylab=paste("Imputations for Obs.",j,"of",n)) } } ix <- as.vector(i[[n]]) lab <- paste('Imputed',n) if(n %in% catg) { tab <- table(ix) dotchart3(tab, lev[[n]], auxdata=tab, xlab='Frequency', ylab=lab) } else { if(type == 'ecdf') Ecdf(ix, xlab=lab, datadensity=datadensity, subtitles=FALSE) else { if(length(nclass)) hist(ix, xlab=n, nclass=nclass, main='') else hist(ix, xlab=lab, main='') scat1d(ix) } } } invisible() } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/solvet.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000000632�12243661443�013224� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#FEH version of solve with argument tol passed to qr #8 Apr 91 solvet <- function(a, b, tol=1e-9) { if(!is.list(a)) a <- qr(a, tol=tol) if(a$rank < ncol(a$qr)) stop("apparently singular matrix") if(missing(b)) { b <- a$qr db <- dim(b) if(diff(db)) stop("matrix inverse only for square matrices") b[] <- rep(c(1, rep(0, db[1])), length = prod(db)) } qr.coef(a, b) } ������������������������������������������������������������������������������������������������������Hmisc/R/biVar.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000017667�12604551203�012764� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������biVar <- function(formula, statinfo, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, ...) { call <- match.call() x <- do.call('model.frame', list(formula, data=data, subset=subset, na.action=na.action)) nam <- names(x); yname <- nam[1] y <- x[[1]] ylabel <- label(y) x <- x[-1] xlabel <- sapply(x, label) m <- ncol(x) statnames <- statinfo$names stats <- matrix(NA, nrow=m, ncol=length(statnames), dimnames=list(names(x), statnames)) nmin <- statinfo$nmin fun <- statinfo$fun N <- integer(m) yna <- if(is.matrix(y))is.na(y %*% rep(1,ncol(y))) else is.na(y) for(i in 1:m) { w <- x[[i]] j <- !(yna | is.na(w)) if(exclude.imputed) j <- j & !(is.imputed(w) | is.imputed(y)) yy <- if(is.matrix(y)) y[j,,drop=FALSE] else y[j] w <- w[j] N[i] <- length(w) stats[i,] <- if(N[i] >= nmin) fun(w, yy, ...) else rep(NA, length(statnames)) } stats <- cbind(stats, n=N) structure(stats, class='biVar', yname=yname, ylabel=ylabel, xlabel=xlabel, statinfo=statinfo, call=call) } print.biVar <- function(x, ...) { info <- attr(x, 'statinfo') yname <- attr(x, 'yname') cat('\n', info$title, ' Response variable:', yname, '\n\n', sep='') dig <- c(info$digits,0) for(i in 1:ncol(x)) x[,i] <- round(x[,i],dig[i]) attr(x,'yname') <- attr(x, 'statinfo') <- attr(x, 'call') <- attr(x, 'ylabel') <- attr(x, 'xlabel') <- class(x) <- NULL print(x) invisible() } plot.biVar <- function(x, what=info$defaultwhat, sort.=TRUE, main, xlab, vnames=c('names','labels'), ...) { vnames <- match.arg(vnames) yname <- attr(x, 'yname') ylabel <- attr(x, 'ylabel') if(vnames=='labels' && ylabel!='') yname <- sedit(ylabel, ' ', '~') xlabel <- attr(x, 'xlabel') info <- attr(x, 'statinfo') aux <- info$aux auxlabel <- info$auxlabel if(!length(auxlabel)) auxlabel <- aux i <- match(what, info$names) if(is.na(i)) stop(paste('what must be one of', paste(info$names,collapse=' '))) if(missing(xlab)) xlab <- info$rxlab[i] if(missing(main)) main <- parse(text=paste(as.character(info$rmain),'~~~~Response:', yname,sep='')) auxtitle <- 'N'; auxdata <- format(x[,'n']) if(length(aux)) { auxtitle <- paste('N', auxlabel, sep=' ') auxdata <- paste(format(x[,'n']), format(x[,aux])) } stat <- x[,what] if(vnames=='labels') names(stat) <- ifelse(xlabel=='', names(stat), xlabel) if(sort.) { i <- order(stat) stat <- stat[i] auxdata <- auxdata[i] } dotchart3(stat, auxdata=auxdata, xlab=xlab, auxtitle=auxtitle, main=main, ...) invisible() } chiSquare <- function(formula, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, ...) { g <- function(x, y, minlev=0, g=3) { if(minlev) y <- combine.levels(y, minlev=minlev) if((is.character(x) || is.factor(x)) && minlev) x <- combine.levels(x, minlev=minlev) if(is.numeric(x) && length(unique(x)) > g) x <- cut2(x, g=g) ct <- chisq.test(x, y) chisq <- ct$statistic df <- ct$parameter pval <- ct$p.value c(chisq, df, chisq-df, pval) } statinfo <- list(fun=g, title='Pearson Chi-square Tests', main='Pearson Chi-squared', rmain=expression(Pearson~chi^2), names=c('chisquare','df','chisquare-df','P'), xlab=c('Chi-square','d.f.','Chi-square - d.f.','P-value'), rxlab=expression(chi^2, d.f., chi^2 - d.f., P-value), digits=c(2,0,2,4), aux='df', nmin=2, defaultwhat='chisquare-df') biVar(formula, statinfo=statinfo, data=data, subset=subset, na.action=na.action, exclude.imputed=TRUE, ...) } spearman2 <- function(x, ...) UseMethod("spearman2") spearman2.default <- function(x, y, p=1, minlev=0, na.rm=TRUE, exclude.imputed=na.rm, ...) { if(p > 2) stop('p must be 1 or 2') y <- as.numeric(y) if(is.character(x)) x <- factor(x) if(na.rm) { s <- !(is.na(x) | is.na(y)) if(exclude.imputed) { im <- is.imputed(x) | is.imputed(y) s <- s & !im } x <- x[s]; y <- y[s] } n <- length(x) ## If number of non-NA values is less then 3 then return a NA ## value. if(n < 3) return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,'Adjusted rho2'=NA)) ## Find the number of unique values in x u <- length(unique(x)) ## If is a factor and unique values are greater then 2 then find the ## lm.fit.qr.bare without an intercept. if(is.factor(x) && u > 2) { if(minlev > 0) { x <- combine.levels(x, minlev) if(length(levels(x))<2) { warning(paste('x did not have >= 2 categories with >=', minlev,'of the observations')) return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,'Adjusted rho2'=NA)) } } x <- model.matrix(~x, data=data.frame(x)) p <- ncol(x)-1 rsquare <- lm.fit.qr.bare(x, rank(y), intercept=FALSE)$rsquared } else { x <- as.numeric(x) if(u < 3) p <- 1 x <- rank(x) rsquare <- if(p==1) cor(x, rank(y))^2 else { x <- cbind(x, x^2) lm.fit.qr.bare(x, rank(y), intercept=TRUE)$rsquared } } df2 <- n-p-1 fstat <- rsquare/p/((1-rsquare)/df2) pvalue <- 1-pf(fstat,p,df2) rsqa <- 1 - (1 - rsquare)*(n-1)/df2 x <- c(rsquare,fstat,p,df2,pvalue,n,rsqa) names(x) <- c("rho2","F","df1","df2","P","n","Adjusted rho2") x } spearman2.formula <- function(formula, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, ...) { g <- function(x, y, p=1, minlev=0) spearman2(x, y, p=p, minlev=minlev, na.rm=FALSE)[-6] statinfo <- list(fun=g, title='Spearman rho^2', main='Spearman rho^2', rmain=expression(Spearman~rho^2), names=c('rho2','F','df1','df2','P','Adjusted rho2'), xlab=c('rho^2','F','df2','df2','P-value','Adjusted rho^2'), rxlab=expression(rho^2, F, df1, df2, P-value, Adjusted~rho^2), digits=c(3,2,0,0,4,3), aux='df1', auxlabel='df', nmin=2, defaultwhat='Adjusted rho2') biVar(formula, statinfo=statinfo, data=data, subset=subset, na.action=na.action, exclude.imputed=exclude.imputed, ...) } rcorrcens <- function(x, ...) UseMethod("rcorrcens") rcorrcens.formula <- function(formula, data=NULL, subset=NULL, na.action=na.retain, exclude.imputed=TRUE, outx=FALSE, ...) { g <- function(x, y, outx) { lev <- levels(x) if(is.factor(x) && length(lev)==2) x <- as.integer(x) u <- if(is.factor(x)) { i <- order(-table(x)) u <- rcorr.cens(1*(x==lev[i[1]]), y, outx=outx) v <- rcorr.cens(1*(x==lev[i[2]]), y, outx=outx) if(abs(v['Dxy']) > abs(u['Dxy'])) v else u } else rcorr.cens(x, y, outx=outx) Dxy <- u['Dxy'] SE <- u['S.D.'] aDxy <- abs(Dxy) z <- aDxy/SE P <- 2 * pnorm(- z) c(C=u['C Index'], Dxy=Dxy, aDxy=aDxy, SD=SE, Z=z, P=P) } statinfo <- list(fun=g, title="Somers' Rank Correlation for Censored Data", main="Somers' Rank Correlation", rmain=expression(paste("Somers' ", D[xy])), names=c('C','Dxy','aDxy','SD','Z','P'), xlab=c('C','Dxy','|Dxy|','SD','Z','P-value'), rxlab=expression(C-index, D[xy], paste('|',D[xy],'|'), SD, Z, P-value), digits=c(3,3,3,3,2,4), # aux='n', auxlabel='N', nmin=2, defaultwhat='aDxy') biVar(formula, statinfo=statinfo, data=data, subset=subset, na.action=na.action, exclude.imputed=exclude.imputed, outx=outx, ...) } �������������������������������������������������������������������������Hmisc/R/na.pattern.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000000364�14275526304�013767� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������na.pattern <- function(x) { k <- ncol(x) pattern <- '' idt <- is.data.table(x) for(i in 1 : k) { y <- if(idt) x[, ..i] else x[, i] pattern <- paste0(pattern, 1 * is.na(y)) } table(pattern) } utils::globalVariables('..i') ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/tex.s���������������������������������������������������������������������������������������0000644�0001762�0000144�00000000246�12243661443�012511� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tex <- function(string, lref='c', psref='c', scale=1, srt=0) paste('\\tex[',lref,'][',psref,'][', format(scale),'][',format(srt),']{',string,'}',sep='') ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/minor.tick.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000002402�12715617475�013774� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������minor.tick <- function (nx = 2, ny = 2, tick.ratio = 0.5, x.args = list(), y.args = list()) { ax <- function(w, n, tick.ratio, add.args) { range <- par("usr")[if (w == "x") 1 : 2 else 3 : 4] tick.pos <- if (w == "x") par("xaxp") else par("yaxp") distance.between.minor <- (tick.pos[2] - tick.pos[1])/tick.pos[3]/n possible.minors <- tick.pos[1] - (0 : 100) * distance.between.minor low.candidates <- possible.minors >= range[1] low.minor <- if (any(low.candidates)) min(possible.minors[low.candidates]) else tick.pos[1] possible.minors <- tick.pos[2] + (0 : 100) * distance.between.minor hi.candidates <- possible.minors <= range[2] hi.minor <- if (any(hi.candidates)) max(possible.minors[hi.candidates]) else tick.pos[2] axis.args <- c(list(if (w == "x") 1 else 2, seq(low.minor, hi.minor, by = distance.between.minor), labels = FALSE, tcl = par("tcl") * tick.ratio), add.args); do.call(axis, axis.args); } if (nx > 1) ax("x", nx, tick.ratio = tick.ratio, x.args) if (ny > 1) ax("y", ny, tick.ratio = tick.ratio, y.args) invisible() } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/mask.s��������������������������������������������������������������������������������������0000644�0001762�0000144�00000000441�12243661443�012641� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mask<- function(a) { ##determine which bits are on in a vector of status bytes if(a>=.Machine$integer.max) stop("Value > integer.max") a <- as.integer(a) as.logical((rep(a, 8)%/%rep(2^(0:7), rep(length(a),8)))%%2) } ## Rick Becker ## Improved by Peter Melewski 14Apr02 �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/xtfrm.labelled.s����������������������������������������������������������������������������0000644�0001762�0000144�00000000274�13127507504�014614� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xtfrm.labelled <- function(x) { newclass <- class(x)[class(x) != 'labelled'] if (length(newclass) == 0) { class(x) <- NULL } else { oldClass(x) <- newclass } xtfrm(x) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/xYplot.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000056025�13067150275�013217� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Cbind <- function(...) { # See llist function with Hmisc label function dotlist <- list(...) if(is.matrix(dotlist[[1]])) { y <- dotlist[[1]] ynam <- dimnames(y)[[2]] if(!length(ynam)) stop('when first argument is a matrix it must have column dimnames') other <- y[,-1,drop= FALSE] return(structure(y[,1], class='Cbind', label=ynam[1], other=other)) } lname <- names(dotlist) name <- vname <- as.character(sys.call())[-1] for(i in 1:length(dotlist)) { vname[i] <- if(length(lname)) lname[i] else '' ## Added length() and '' 12Jun01, remove length(vname[i])==0 below if(vname[i]=='') vname[i] <- name[i] } lab <- attr(y <- dotlist[[1]], 'label') if(!length(lab)) lab <- vname[1] if(!is.matrix(other <- dotlist[[2]]) || ncol(other)<2) { other <- as.matrix(as.data.frame(dotlist))[,-1,drop= FALSE] dimnames(other)[[2]] <- vname[-1] } structure(y, class='Cbind', label=lab, other=other) } as.numeric.Cbind <- as.double.Cbind <- function(x, ...) x ## Keeps xyplot from stripping off "other" attribute in as.numeric '[.Cbind' <- function(x, ...) { structure(unclass(x)[...], class='Cbind', label=attr(x,'label'), other=attr(x,'other')[...,,drop= FALSE]) } prepanel.xYplot <- function(x, y, ...) { xlim <- range(x, na.rm=TRUE) ylim <- range(y, attr(y,'other'), na.rm=TRUE) list(xlim=xlim, ylim=ylim, dx=diff(xlim), dy=diff(ylim)) } ## MB add method="filled bands" ## MB use col.fill to specify colors for filling bands panel.xYplot <- function(x, y, subscripts, groups = NULL, type = if(is.function(method) || method == "quantiles") "b" else "p", method = c("bars", "bands", "upper bars", "lower bars", "alt bars", "quantiles", "filled bands"), methodArgs = NULL, label.curves = TRUE, abline, probs = c(0.5, 0.25, 0.75), nx=NULL, cap = 0.015, lty.bar = 1, lwd = plot.line$lwd, lty = plot.line$lty, pch = plot.symbol$pch, cex = plot.symbol$cex, font = plot.symbol$font, col = NULL, lwd.bands = NULL, lty.bands = NULL, col.bands = NULL, minor.ticks = NULL, col.fill = NULL, size=NULL, rangeCex=c(.5,3), ...) { if(missing(method) || !is.function(method)) method <- match.arg(method) # was just missing() 26Nov01 type <- type # evaluate type before method changes 9May01 sizeVaries <- length(size) && length(unique(size)) > 1 if(length(groups)) groups <- as.factor(groups) g <- as.integer(groups)[subscripts] ng <- if(length(groups)) max(g) else 1 plot.symbol <- trellis.par.get(if(ng > 1) "superpose.symbol" else "plot.symbol") plot.line <- trellis.par.get(if(ng > 1) "superpose.line" else "plot.line") lty <- rep(lty, length = ng) lwd <- rep(lwd, length = ng) if(length(rangeCex) != 1) pch <- rep(pch, length = ng) if(!sizeVaries) cex <- rep(cex, length = ng) font <- rep(font, length = ng) if(!length(col)) col <- if(type == "p") plot.symbol$col else plot.line$col col <- rep(col, length = ng) pchVaries <- FALSE ## Thanks to Deepayan Sarkar for the following size code if(sizeVaries) { if(length(rangeCex) > 1) srng <- range(size, na.rm=TRUE) size <- size[subscripts] if(length(rangeCex)==1) { pch <- as.character(size) cex <- rangeCex sizeVaries <- FALSE pchVaries <- TRUE } else { cex <- rangeCex[1] + diff(rangeCex)*(size - srng[1])/diff(srng) sKey <- function(x=0, y=1, cexObserved, cexCurtailed, col, pch, other) { if(!length(x)) x <- 0.05 if(!length(y)) y <- 0.95 ## because of formals() ## had to multiply cex by 1.4 when using rlegend instead of rlegendg rlegendg(x, y, legend=format(cexObserved), cex=cexCurtailed, col=col, pch=pch, other=other) invisible() } formals(sKey) <- list(x=NULL, y=NULL, cexObserved=srng, cexCurtailed=rangeCex, col=col[1], pch=pch, other=NULL) .setsKey(sKey) } } other <- attr(y, "other") if(length(other)) { nother <- ncol(other) if(nother == 1) { lower <- y - other upper <- y + other } else { lower <- other[, 1] upper <- other[, 2] } } else nother <- 0 y <- unclass(y) levnum <- if(length(groups)) sort(unique(g)) else 1 if(is.function(method) || method == "quantiles") { if(!is.function(method)) { method <- quantile # above: methodArgs=NULL if(!length(methodArgs)) methodArgs <- list(probs = probs) } if(length(methodArgs)) methodArgs$na.rm <- TRUE else methodArgs <- list(na.rm = TRUE) if(ng == 1) { if(!length(nx)) nx <- min(length(x)/4, 40) xg <- if(nx) as.numeric(as.character(cut2(x, m = nx, levels.mean = TRUE))) else x dsum <- do.call("summarize", c(list(y, llist(xg = xg), method, type = "matrix", stat.name = "Z"), methodArgs)) } else { xg <- x if(missing(nx) || nx) for(gg in levnum) { w <- g == gg if(missing(nx)) nx <- min(sum(w)/4, 40) xg[w] <- as.numeric(as.character(cut2(xg[w], m = nx, levels.mean = TRUE))) } dsum <- do.call("summarize", c(list(y, by = llist(g, xg), method, type = "matrix", stat.name = "Z"), methodArgs)) g <- dsum$g groups <- factor(g, 1:length(levels(groups)), levels(groups)) subscripts <- TRUE } x <- dsum$xg y <- dsum$Z[, 1, drop = TRUE] other <- dsum$Z[, -1, drop=FALSE] nother <- 2 method <- "bands" } ## MB 04/17/01 default colors for filled bands ## 'pastel' colors matching superpose.line$col plot.fill <- c(9, 10, 11, 12, 13, 15, 7) ## The following is a fix of panel.xyplot to work for type='b' ppanel <- function(x, y, type, cex, pch, font, lwd, lty, col, ...) { gfun <- ordGridFun(TRUE) if(type != 'p') gfun$lines(x, y, lwd = lwd, lty = lty, col = col, ...) if(type !='l') gfun$points(x=x, y=y, pch = pch, font = font, cex = cex, col = col, type = type, lwd=lwd, lty=lty, ...) } ##The following is a fix for panel.superpose for type='b' pspanel <- function(x, y, subscripts, groups, type, lwd, lty, pch, cex, font, col, sizeVaries, pchVaries, ...) { gfun <- ordGridFun(TRUE) groups <- as.numeric(groups)[subscripts] N <- seq(along = groups) for(i in sort(unique(groups))) { which <- N[groups == i] # j <- which[order(x[which])] # sort in x j <- which # no sorting if(type != "p") gfun$lines(x[j], y[j], col = col[i], lwd = lwd[i], lty = lty[i], ...) if(type !='l') gfun$points(x[j], y[j], col = col[i], pch = pch[if(pchVaries)j else i], cex = cex[if(sizeVaries)j else i], font = font[i], lty=lty[i], lwd=lwd[i], ...) } } ## 14Apr2001 MB changes: set colors for method = "filled bands" if(!length(col.fill)) col.fill <- plot.fill col.fill <- rep(col.fill, length = ng) ## end MB if(ng > 1) { ## MB 14Apr2001: if method == "filled bands" ## have to plot filled bands first, otherwise lines/symbols ## would be hidden by the filled band if(method == "filled bands") { gfun <- ordGridFun(TRUE) for(gg in levnum) { s <- g == gg gfun$polygon(x=c(x[s],rev(x[s])), y=c(lower[s], rev(upper[s])), col=col.fill[gg], ...) } } ## end MB pspanel(x, y, subscripts, groups, lwd = lwd, lty = lty, pch = pch, cex = cex, font = font, col = col, type = type, sizeVaries=sizeVaries, pchVaries=pchVaries) if(type != "p" && !(is.logical(label.curves) && ! label.curves)) { lc <- if(is.logical(label.curves)) list(lwd = lwd, cex = cex[1]) else c(list(lwd = lwd, cex = cex[1]), label.curves) curves <- vector("list", length(levnum)) names(curves) <- levels(groups)[levnum] i <- 0 for(gg in levnum) { i <- i + 1 s <- g == gg curves[[i]] <- list(x[s], y[s]) } labcurve(curves, lty = lty[levnum], lwd = lwd[levnum], col. = col[levnum], opts = lc, grid=TRUE, ...) } } ## MB 14Apr2001: if method == "filled bands" ## plot filled bands first, otherwise lines/symbols ## would be hidden by the filled band else { if(method == "filled bands") grid.polygon(x = c(x, rev(x)), y = c(lower, rev(upper)), gp=gpar(fill = col.fill, col='transparent'), default.units='native') ## end MB ppanel(x, y, lwd = lwd, lty = lty, pch = pch, cex = cex, font = font, col = col, type = type) } ## 14Apr2001 MB ## final change for filled bands: just skip the rest ## if method = filled bands, remaining columns of other are ignored if(nother && method != "filled bands") { if(method == "bands") { dob <- function(a, def, ng, j) { if(!length(a)) return(def) if(!is.list(a)) a <- list(a) a <- rep(a, length = ng) sapply(a, function(b, j) b[j], j = j) } for(j in 1:ncol(other)) { if(ng == 1) ppanel(x, other[, j], lwd = dob(lwd.bands, lwd, ng, j), lty = dob(lty.bands, lty, ng, j), col = dob(col.bands, col, ng, j), pch = pch, cex = cex, font = font, type = "l") else pspanel(x, other[, j], subscripts, groups, lwd = dob(lwd.bands, lwd, ng, j), lty = dob(lty.bands, lty, ng, j), col = dob(col.bands, col, ng, j), pch = pch, cex = cex, font = font, type = "l", sizeVaries=sizeVaries, pchVaries=pchVaries) } } else { errbr <- function(x, y, lower, upper, cap, lty, lwd, col, connect) { gfun <- ordGridFun(TRUE) ## see Misc.s segmnts <- gfun$segments gun <- gfun$unit smidge <- 0.5 * cap * unit(1,'npc') switch(connect, all = { segmnts(x, lower, x, upper, lty = lty, lwd = lwd, col = col) segmnts(gun(x)-smidge, lower, gun(x)+smidge, lower, lwd = lwd, lty = 1, col = col) segmnts(gun(x)-smidge, upper, gun(x)+smidge, upper, lwd = lwd, lty = 1, col = col) }, upper = { segmnts(x, y, x, upper, lty = lty, lwd = lwd, col = col) segmnts(gun(x)-smidge, upper, gun(x)+smidge, upper, lwd = lwd, lty = 1, col = col) }, lower = { segmnts(x, y, x, lower, lty = lty, lwd = lwd, col = col) segmnts(gun(x)-smidge, lower, gun(x)+smidge, lower, lwd = lwd, lty = 1, col = col) } ) } if(ng == 1) errbr(x, y, lower, upper, cap, lty.bar, lwd, col, switch(method, bars = "all", "upper bars" = "upper", "lower bars" = "lower", "alt bars" = "lower")) else { if(method == "alt bars") medy <- median(y, na.rm = TRUE) for(gg in levnum) { s <- g == gg connect <- switch(method, bars = "all", "upper bars" = "upper", "lower bars" = "lower", "alt bars" = if(median(y[s], na.rm = TRUE) > medy) "upper" else "lower") errbr(x[s], y[s], lower = lower[s], upper = upper[s], cap, lty.bar, lwd[gg], col[gg], connect) } } } } if(length(minor.ticks)) { minor.at <- if(is.list(minor.ticks)) minor.ticks$at else minor.ticks minor.labs <- if(is.list(minor.ticks) && length(minor.ticks$labels)) minor.ticks$labels else FALSE gfun$axis(side = 1, at = minor.at, labels = FALSE, tck = par("tck") * 0.5, outer = TRUE, cex = par("cex") * 0.5) if(!is.logical(minor.labs)) gfun$axis(side = 1, at = minor.at, labels = minor.labs, tck = 0, cex = par("cex") * 0.5, line = 1.25) } if(ng > 1) { ##set up for key() if points plotted Key1 <- function(x=0, y=1, lev, cex, col, font, pch, other) { ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting if(!length(x)) x <- 0.05 if(!length(y)) y <- 0.95 ## because of formals() rlegendg(x, y, legend=lev, cex=cex, col=col, pch=pch, other=other) invisible() } formals(Key1) <- list(x=NULL,y=NULL,lev=levels(groups), cex=if(sizeVaries) 1 else cex, col=col, font=font, pch=pch, other=NULL) .setKey(Key1) rm(Key1) } if(!missing(abline)) { if(length(names(abline))) do.call("panel.abline", abline) else for(i in 1:length(abline)) do.call("panel.abline", abline[[i]]) } if(type == "l" && ng > 1) { ## Set up for legend (key() or rlegendg()) if lines drawn Key2 <- function(x=0, y=1, lev, cex, col, lty, lwd, other) { ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting if(!length(x)) x <- 0.05 if(!length(y)) y <- 0.95 ## because of formals() rlegendg(x, y, legend=lev, cex=cex, col=col, lty=lty, lwd=lwd, other=other) invisible() } formals(Key2) <- list(x=NULL,y=NULL,lev=levels(groups), col=col, lty=lty, lwd=lwd, other=NULL) .setKey(Key2) rm(Key2) } } xYplot <- function (formula, data=sys.frame(sys.parent()), groups, subset, xlab=NULL, ylab=NULL, ylim=NULL, panel=panel.xYplot, prepanel=prepanel.xYplot, scales=NULL, minor.ticks=NULL, sub=NULL, ...) { yvname <- as.character(formula[2]) # tried deparse y <- eval(parse(text=yvname), data) if(!length(ylab)) ylab <- label(y, units=TRUE, plot=TRUE, default=yvname, grid=TRUE) if(!length(ylim)) { yother <- attr(y,'other') if(length(yother)) ylim <- range(y, yother, na.rm=TRUE) } xvname <- formula[[3]] if(length(xvname)>1 && as.character(xvname[[1]])=='|') xvname <- xvname[[2]] # ignore conditioning var xv <- eval(xvname, data) if(!length(xlab)) xlab <- label(xv, units=TRUE, plot=TRUE, default=as.character(xvname)[1], grid=TRUE) if(!length(scales$x)) { if(length(maj <- attr(xv,'scales.major'))) scales$x <- maj } if(!length(minor.ticks)) { if(length(minor <- attr(xv,'scales.minor'))) minor.ticks <- minor } if(!missing(groups)) groups <- eval(substitute(groups),data) if(!missing(subset)) subset <- eval(substitute(subset),data) ## Note: c(list(something), NULL) = list(something) ## The following was c(list(formula=formula,...,panel=panel),if()c(),...) do.call("xyplot", c(list(x = formula, data=data, prepanel=prepanel, panel=panel), if(length(ylab))list(ylab=ylab), if(length(ylim))list(ylim=ylim), if(length(xlab))list(xlab=xlab), if(length(scales))list(scales=scales), if(length(minor.ticks))list(minor.ticks=minor.ticks), if(!missing(groups))list(groups=groups), if(!missing(subset))list(subset=subset), if(!missing(sub)) list(sub=sub), list(...))) } prepanel.Dotplot <- function(x, y, ...) { xlim <- range(x, attr(x,'other'), na.rm=TRUE) ylim <- range(as.numeric(y), na.rm=TRUE) ## as.numeric 25nov02 list(xlim=xlim, ylim=ylim) #, dx=diff(xlim), dy=diff(ylim)) } panel.Dotplot <- function(x, y, groups = NULL, pch = dot.symbol$pch, col = dot.symbol$col, cex = dot.symbol$cex, font = dot.symbol$font, abline, ...) { gfun <- ordGridFun(TRUE) ## see Misc.s segmnts <- gfun$segments y <- as.numeric(y) gp <- length(groups) dot.symbol <- trellis.par.get(if(gp)'superpose.symbol' else 'dot.symbol') dot.line <- trellis.par.get('dot.line') plot.line <- trellis.par.get(if(gp)'superpose.line' else 'plot.line') gfun$abline(h = unique(y), lwd=dot.line$lwd, lty=dot.line$lty, col=dot.line$col) if(!missing(abline)) { if(length(names(abline))) do.call("panel.abline", abline) else for(i in 1:length(abline)) do.call("panel.abline", abline[[i]]) } other <- attr(x,'other') x <- unclass(x) attr(x,'other') <- NULL if(length(other)) { nc <- ncol(other) segmnts(other[,1], y, other[,nc], y, lwd=plot.line$lwd[1], lty=plot.line$lty[1], col=plot.line$col[1]) if(nc==4) { segmnts(other[,2], y, other[,3], y, lwd=2*plot.line$lwd[1], lty=plot.line$lty[1], col=plot.line$col[1]) gfun$points(other[,2], y, pch=3, cex=cex, col=col, font=font) gfun$points(other[,3], y, pch=3, cex=cex, col=col, font=font) } if(gp) panel.superpose(x, y, groups=as.numeric(groups), pch=pch, col=col, cex=cex, font=font, ...) else gfun$points(x, y, pch=pch[1], cex=cex, col=col, font=font) } else { if(gp) panel.superpose(x, y, groups=as.numeric(groups), pch=pch, col=col, cex=cex, font=font, ...) else panel.dotplot(x, y, pch=pch, col=col, cex=cex, font=font, ...) } if(gp) { Key <- function(x=0, y=1, lev, cex, col, font, pch, other) { if(!length(x)) x <- 0.05 if(!length(y)) y <- 0.95 ## because of formals() rlegendg(x, y, legend=lev, cex=cex, col=col, pch=pch, other=other) invisible() } lev <- levels(as.factor(groups)) ng <- length(lev) formals(Key) <- list(x=NULL,y=NULL,lev=lev, cex=cex[1:ng], col=col[1:ng], font=font[1:ng], pch=pch[1:ng], other=NULL) .setKey(Key) } } Dotplot <- function (formula, data=sys.frame(sys.parent()), groups, subset, xlab=NULL, ylab=NULL, ylim=NULL, panel=panel.Dotplot, prepanel=prepanel.Dotplot, scales=NULL, xscale=NULL, ...) { yvname <- as.character(formula[2]) # tried deparse yv <- eval(parse(text=yvname), data) if(!length(ylab)) ylab <- label(yv, units=TRUE, plot=TRUE, default=yvname, grid=TRUE) if(!length(ylim)) { yother <- attr(yv,'other') if(length(yother)) ylim <- range(yv, yother, na.rm=TRUE) } if(is.character(yv)) yv <- factor(yv) if(!length(scales) && is.factor(yv)) scales <- list(y=list(at=1:length(levels(yv)),labels=levels(yv))) if(length(xscale)) scales$x <- xscale xvname <- formula[[3]] if(length(xvname)>1 && as.character(xvname[[1]])=='|') xvname <- xvname[[2]] # ignore conditioning var xv <- eval(xvname, data) if(!length(xlab)) xlab <- label(xv, units=TRUE, plot=TRUE, default=as.character(xvname)[1], grid=TRUE) if(!missing(groups)) groups <- eval(substitute(groups),data) if(!missing(subset)) subset <- eval(substitute(subset),data) dul <- options('drop.unused.levels') options(drop.unused.levels=FALSE) ## for empty cells on.exit(options(dul)) ## across some panels do.call("xyplot", c(list(x = formula, data=data, prepanel=prepanel, panel=panel), if(length(ylab))list(ylab=ylab), if(length(ylim))list(ylim=ylim), if(length(xlab))list(xlab=xlab), if(!missing(groups))list(groups=groups), if(!missing(subset))list(subset=subset), if(length(scales))list(scales=scales), list(...))) } setTrellis <- function(strip.blank=TRUE, lty.dot.line=2, lwd.dot.line=1) { if(strip.blank) trellis.strip.blank() # in Hmisc Misc.s dot.line <- trellis.par.get('dot.line') dot.line$lwd <- lwd.dot.line dot.line$lty <- lty.dot.line trellis.par.set('dot.line',dot.line) invisible() } numericScale <- function(x, label=NULL, ...) { xn <- as.numeric(x) attr(xn,'label') <- if(length(label)) label else deparse(substitute(x)) xn } ## See proc.scale.trellis, render.trellis, axis.trellis for details of ## how scale is used �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/pstamp.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000002241�13067146400�013206� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pstamp <- function(txt, pwd=FALSE, time.=TRUE) { stamp <- function(string = Sys.time(), print = TRUE, plot = TRUE) { opar <- par('yaxt', 'xaxt', 'xpd') par(yaxt='s',xaxt='s',xpd=NA) on.exit(par(opar)) plt <- par('plt') usr <- par('usr') ## when a logrithmic scale is in use (i.e. par('xlog') is true), ## then the x-limits would be 10^par('usr')[1:2]. Similarly for ## the y axis xcoord <- usr[2] + (usr[2] - usr[1])/(plt[2] - plt[1]) * (1-plt[2]) - .6*strwidth('m') ycoord <- usr[3] - diff(usr[3:4])/diff(plt[3:4])*(plt[3]) + 0.6*strheight('m') if(par('xlog')) xcoord <- 10^(xcoord) if(par('ylog')) ycoord <- 10^(ycoord) ## Print the text on the current plot text(xcoord, ycoord, string, adj=1) invisible(string) } date.txt <- if(time.) format(Sys.time()) else format(Sys.time(), '%Y-%m-%d') if(pwd) date.txt <- paste(getwd(), date.txt) oldpar <- par('mfrow', 'cex') par(mfrow=c(1,1), cex = 0.5) on.exit(par(oldpar)) if(!missing(txt)) date.txt <- paste(txt,' ',date.txt, sep='') stamp(string=date.txt,print=FALSE,plot=TRUE) invisible() } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/multLines.s���������������������������������������������������������������������������������0000644�0001762�0000144�00000004043�14112727067�013666� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������multLines <- function(x, y, pos=c('left', 'right'), col='gray', lwd=1, lty=1, lwd.vert=.85, lty.vert=1, alpha=0.4, grid=FALSE, pobj=plotly::plot_ly(), xlim, name=colnames(y)[1], legendgroup=name, showlegend=TRUE, ...) { pos <- match.arg(pos) p <- ncol(y) n <- nrow(y) if(! is.matrix(y) || p == 1 || p %% 2 != 1) stop('y must have 3, 5, 7, ... columns') if(length(x) != n) stop('length of x must match rows of y') vcol <- adjustcolor(col, alpha.f=alpha) pl <- grType() == 'plotly' && requireNamespace("plotly") if(pl) { pobj <- plotly::add_lines(pobj, data=data.frame(x=x, y=y[,1]), x=~x, y=~y, color=I(col), name=name, legendgroup=legendgroup, showlegend=showlegend, ...) xdel <- 0.005 * diff(xlim) } else if(grid) { llines(x, y[, 1], col=col, lwd=lwd, lty=lty) xdel <- unit(0.75, 'mm') x <- unit(x, 'native') gp <- gpar(col=vcol, lwd=lwd.vert, lty=lty.vert) } else { lines(x, y[, 1], col=col, lwd=lwd, lty=lty) xdel <- 0.005 * diff(par('usr')[1 : 2]) } half <- (p - 1) / 2 x0 <- if(grid) unit(x, 'native') else x for(i in 1 : half) { i1 <- i + 1 i2 <- p - i + 1 x0 <- switch(pos, left = x0 - xdel, right = x0 + xdel) tn <- paste0(colnames(y)[i1], ' - ', colnames(y)[i2]) if(pl) pobj <- plotly::add_segments(pobj, data=data.frame(x0=x0, y0=y[, i1], y1=y[, i2]), x=~x0, y=~y0, xend=~x0, yend=~y1, name=tn, legendgroup=tn, showlegend=showlegend, col=I(vcol), ...) else if(grid) grid.segments(x0, y[, i1], x0, y[, i2], gp=gp, default.units='native') else segments(x0, y[, i1], x0, y[, i2], col=vcol, lty=lty.vert, lwd=lwd.vert) } if(pl) pobj else invisible(NULL) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/james.stein.s�������������������������������������������������������������������������������0000644�0001762�0000144�00000001421�12243661443�014125� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������james.stein <- function(y, group) { s <- !(is.na(y)|is.na(group)) y <- y[s]; group <- as.character(group[s]) ## as.char -> unused levels OK k <- length(unique(group)) if(k<3) stop("must have >=3 groups") stats <- function(w) { bar <- mean(w) ss <- sum((w-bar)^2) n <- length(w) ##if(n<2) ## stop("a group has n<2") c(n=length(w), mean=bar, ss=ss, var=ss/n/(n-1)) } Z <- stats(y) st <- tapply(y, group, FUN=stats) nams <- names(st) z <- matrix(unlist(st),ncol=4,byrow=TRUE) ssb <- stats(z[,2])["ss"] shrink <- 1 - (k-3)*z[,4]/ssb shrink[z[,1]==1] <- 0 shrink <- pmin(pmax(shrink,0),1) list(n=z[,1], mean=z[,2], shrunk.mean=structure(Z["mean"]*(1-shrink)+shrink*z[,2], names=nams), shrink=shrink) } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/rm.boot.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000033244�12243661443�013275� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rm.boot <- function(time, y, id=seq(along=time), subset=TRUE, plot.individual=FALSE, bootstrap.type=c('x fixed','x random'), nk=6, knots, B=500, smoother=supsmu, xlab, xlim, ylim=range(y), times=seq(min(time),max(time),length=100), absorb.subject.effects=FALSE, rho=0, cor.pattern=c('independent','estimate'), ncor=10000, ...) { bootstrap.type <- match.arg(bootstrap.type) absorb.subject.effects <- absorb.subject.effects & !missing(id) if(!is.function(cor.pattern)) cor.pattern <- match.arg(cor.pattern) if(!(is.character(cor.pattern) && cor.pattern=='independent') && rho!=0) stop("can't specify both cor.pattern='estimate' and rho") if(rho != 0) cor.pattern <- 'equal correlation' dodep <- rho !=0 || !is.character(cor.pattern) || cor.pattern=='estimate' ## X fixed also implies that subjects are fixed id <- as.character(id) ylab <- label(y) if(ylab=='') ylab <- 'y' if(missing(xlab)) { xlab <- units(time) if(xlab=='') xlab <- 'Time' } if(length(subset) > 1) { id <- id[subset]; time <- time[subset]; y <- y[subset] } s <- is.na(time + y) if(any(s)) { s <- !s id <- id[s] time <- time[s] y <- y[s] } ## Need to order data so that a subject's records stay together ## Otherwise, the mean residuals at each time will not vary over resamples ## when bootstrap.type='x fixed' s <- order(id, time) id <- id[s]; time <- time[s]; y <- y[s] if(bootstrap.type=='x fixed' && diff(range(table(id))) != 0) warning('To work properly with bootstrap.type="x fixed" all subjects must have the same # observations') n <- length(y) clusters <- unique(id) if(plot.individual) { ploti <- function(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...) { plot(0,0,xlim=range(pretty(range(time))),ylim=ylim, xlab=xlab, ylab=ylab, type='n') j <- 0 for(i in clusters) { s <- id==i j <- j+1 lines(smoother(time[s],y[s],...),lty=j) } } ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...) } if(nk==0) knots <- double(0) if(missing(knots) && nk>0) { knots <- rcspline.eval(time,nk=nk,knots.only=TRUE) if(length(knots) != nk) { warning('could not obtain requested number of knots') nk <- length(knots) } } else nk <- length(knots) p <- if(nk==0) 1 else nk-1 X.times <- if(nk==0) as.matrix(times) else rcspline.eval(times, knots, inclx=TRUE) X.Time <- if(nk==0) as.matrix(time) else rcspline.eval(time, knots, inclx=TRUE) X <- if(missing(id)) cbind(X.Time,1) else model.matrix(~ X.Time+id-1, data=list(X.Time=X.Time,id=as.factor(id))) ## was id=id 3Apr02 Thanks: Don MacQueen, for R f <- lm.fit.qr.bare(X, y, intercept=FALSE) res <- f$residuals sigma2 <- sum(res^2)/n if(absorb.subject.effects) { mean.intercept <- mean(c(0,f$coef[-(1:p)])) y <- y + mean.intercept - (f$coef[-(1:p)])[paste('id',id,sep='')] if(plot.individual) { ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...) title('Raw Data Adjusted to Have a Common Intercept') } } if(is.character(cor.pattern) && cor.pattern=='estimate') { timediff <- product <- single(ncor) used <- 0 i <- 0 meanres <- tapply(res, time, mean) names(meanres) <- as.numeric(names(meanres)) sdres <- sqrt(tapply(res, time, var)) names(sdres) <- as.numeric(names(sdres)) if(any(is.na(sdres))) stop('one or more times occur in only one subject') for(wid in clusters) { s <- id==wid x <- time[s] cx <- as.character(x) r <- (res[s] - meanres[cx])/sdres[cx] if(any(is.na(r))) stop('program logic error') diffs <- outer(x, x, FUN=function(a,b)abs(a-b)) prods <- outer(r, r, FUN='*') np <- length(prods) if(used + np > ncor) { cat('\nUsed only',i,'subjects in estimating covariance pattern.\nMay want to increase ncor.\n') break } i <- i+1 timediff[(used+1):(used+np)] <- diffs product[(used+1):(used+np)] <- prods used <- used+np } timediff <- timediff[1:used]; product <- product[1:used] product <- tapply(product, round(timediff,4), mean) timediff <- as.numeric(names(product)) product[timediff==0] <- 1 plot(timediff, product, xlab='Absolute Difference in Time', ylab='Correlation', type='b') cor.pattern <- list(x=timediff, y=product) } ##Subject effects are at the end, using cell means model ##Take intercept as average of all subject effects cof <- function(fit,p) { ko <- fit$coef c(mean(ko[-(1:p)]), ko[1:p]) } o.coef <- cof(f,p) if(bootstrap.type=='x random') { orig.obsno <- split(1:n, id) } else { R <- split(res, id) yhat <- if(!absorb.subject.effects) f$fitted.values else o.coef[1] + X.Time %*% o.coef[-1] } Coef <- matrix(NA, B+1, p+1) sse <- loglik <- single(B+1) loglik.dep <- NULL Coef[1,] <- o.coef sse[1] <- sigma2*n loglik[1] <- n*logb(2*pi*sigma2) + n if(dodep) { loglik.dep <- loglik lldep <- function(time, id, sigma2, res, rho, cor.pattern) { ll <- 0 for(subj in unique(id)) { s <- id==subj x <- time[s] y <- res[s] p <- sum(s) if(is.character(cor.pattern) && cor.pattern=='equal correlation') cov <- sigma2*(diag(rep(1-rho,p))+rho) else { cov <- if(is.function(cor.pattern)) outer(x, x, cor.pattern)*sigma2 else { timediff <- outer(x, x, function(a,b)abs(a-b)) matrix(approx(cor.pattern, xout=timediff)$y, nrow=p)*sigma2 } } ## Following code taken from dmvnorm() eS <- eigen(cov, symmetric = TRUE) ## y <- y %*% (eS$vectors * rep(1/sqrt(eS$values), each = p)) 24Feb02 y <- y %*% (eS$vectors * rep(1/sqrt(eS$values), rep(p,length(eS$values)))) logl <- sum(y^2) + p*logb(2*pi) + logb(prod(eS$values)) ll <- ll + logl } ll } loglik.dep[1] <- lldep(time, id, sigma2, res, rho, cor.pattern) } uneven <- 0 for(i in 1:B) { if(i %% 10 ==0) cat(i,'') pts <- sample(clusters, replace=TRUE) if(bootstrap.type=='x random') { obsn <- unlist(orig.obsno[pts]) idb <- id[obsn] xt <- X.Time[obsn,,drop=FALSE] f.b <- lm.fit.qr.bare(if(absorb.subject.effects || missing(id)) cbind(xt,1) else model.matrix(~xt+idb-1, data=list(xt=xt,idb=as.factor(idb))), y[obsn], intercept=FALSE) ## was idb=idb 3Apr02 } else { rr <- unlist(R[pts]) lrr <- length(rr) uneven <- max(uneven, abs(lrr-n)) if(lrr > n) rr <- rr[1:n] else if(lrr < n) rr <- c(rr, sample(rr, n-lrr, replace=TRUE)) yb.e <- yhat + rr f.b <- if(absorb.subject.effects) lm.fit.qr.bare(cbind(X.Time,1), yb.e, intercept=FALSE) else lm.fit.qr.bare(X, yb.e, intercept=FALSE) } cofb <- cof(f.b, p) #26Jun97 pred <- if(bootstrap.type=='x fixed') { if(!absorb.subject.effects) X %*% f.b$coefficients else cofb[1] + X.Time %*% cofb[-1] } else cofb[1] + X.Time %*% cofb[-1] ## x random case may only work properly if absorb.subject.effects, as ## we have to ignore the original subject ids anyway (the bootstrap ## sample in general won't represent all subjects) Coef[i+1,] <- cofb #26Jun97 sse[i+1] <- sum((y-pred)^2) sigma2 <- sum(f.b$residuals^2)/length(f.b$residuals) loglik[i+1] <- n*logb(2*pi*sigma2) + sse[i+1]/sigma2 if(dodep) loglik.dep[i+1] <- lldep(time, id, sigma2, y-pred, rho, cor.pattern) } if(uneven>0) warning(paste('Subjects had unequal number of records.\nMaximum discrepency between ', 'total number of bootstrap records sampled and original\nnumber of ', 'records (',n,') is ',uneven,'. Bootstrap estimates are approximate.', sep='')) if(dodep) { srho <- spearman(loglik, loglik.dep) cat('\n\nSpearman rank correlation between',B+1,'log likelihoods ', 'assuming independence and assuming dependence:', round(srho,3),'\n') } mode(Coef) <- 'single' mode(sse) <- 'single' structure(list(Coef=Coef, sse=sse, loglik=loglik, loglik.dep=loglik.dep, times=times, X.times=X.times, xlab=xlab, ylab=ylab, ylim=ylim, bootstrap.type=bootstrap.type, fit=f, knots=knots, rho=rho, cor.pattern=cor.pattern), class='rm.boot') } plot.rm.boot <- function(x, obj2, conf.int=.95, xlab=x$xlab, ylab=x$ylab, xlim, ylim=x$ylim, individual.boot=FALSE, pointwise.band=FALSE, curves.in.simultaneous.band=FALSE, col.pointwise.band=2, objective=c('-2 log L','sse','dep -2 log L'), add=FALSE, ncurves, multi=FALSE, multi.method=c('color','density'), multi.conf=c(.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,.99), multi.density=c(-1,90,80,70,60,50,40,30,20,10, 7,4), multi.col =c( 1, 8,20, 5, 2, 7,15,13,10,11,9,14), subtitles=TRUE, ...) { ## 2 was between 5 and 7, 17 was between 8 and 20 obj <- x objective <- match.arg(objective) if(missing(objective)) objective <- if(obj$rho==0 && is.character(obj$cor.pattern)) '-2 log L' else 'dep -2 log L' sse <- switch(objective, sse = obj$sse, '-2 log L' = obj$loglik, 'dep -2 log L' = obj$loglik.dep) B <- length(sse) Coef <- obj$Coef times <- obj$times if(!missing(obj2)) { if((length(times) != length(obj2$times)) || (any(times != obj2$times, na.rm=TRUE))) stop('times vector must be identical for both rm.boot objects') times <- ifelse(is.na(times), NA, obj2$times) sse <- sse + obj2$sse if(missing(ylab)) ylab <- paste(obj$ylab,'-',obj2$ylab) } ## order from best -2 log likelihood or sum of squared errors to worst i <- order(sse) ## Select best confidence coefficient*B estimates conf <- if(multi) max(multi.conf) else conf.int i <- i[1:round(conf*B)] if(i[1] != 1) warning(paste('design is imbalanced enough that best log likelihood or SSE was not\n', 'obtained from overall fit (objective=',format(sse[1]),') but from\n', 'a bootstrap fit (objective=',format(sse[i[1]]), ')\nThis can also happen if the objective is not -2 log L',sep='')) ## Evaluate all fits on time grid and compute point by point max and min curves <- cbind(1,obj$X.times) %*% t(Coef) if(!missing(obj2)) { curves <- curves - cbind(1,obj2$X.times) %*% t(obj2$Coef) if(missing(ylim)) ylim <- range(curves[,i]) } if(multi) { multi.method <- match.arg(multi.method) if(missing(xlim)) plot(times, curves[,1], type='n', xlab=xlab, ylab=ylab, ylim=ylim) else plot(times, curves[,1], type='n', xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim) title(paste('Simultaneous',min(multi.conf),'-',max(multi.conf), 'Confidence Regions')) high.prev <- low.prev <- curves[,1] for(j in 1:length(multi.conf)) { ii <- i[1:round(multi.conf[j]*B)] high <- apply(curves[,ii], 1, max) low <- apply(curves[,ii], 1, min) if(multi.method=='density') { polygon(c(times,rev(times)), c(high.prev,rev(high)), density=multi.density[j]) polygon(c(times,rev(times)), c(low.prev, rev(low)), density=multi.density[j]) } else { polygon(c(times,rev(times)), c(high.prev,rev(high)), col=multi.col[j]) polygon(c(times,rev(times)), c(low.prev, rev(low)), col=multi.col[j]) } high.prev <- high; low.prev <- low } lines(times, curves[,1], lwd=2, col=0) ## point estimates in white } else { if(add) lines(times, curves[,1]) else { if(missing(xlim)) plot(times, curves[,1], type='l', xlab=xlab, ylab=ylab, ylim=ylim) else plot(times, curves[,1], type='l', xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim) title(paste('Simultaneous',conf.int,'Confidence Region')) } high <- apply(curves[,i], 1, max) low <- apply(curves[,i], 1, min) lines(times, high, lty=2) lines(times, low, lty=2) } result <- list(times=times, fitted=curves[,1], lower=low, upper=high) if(individual.boot || curves.in.simultaneous.band) { subs <- if(individual.boot) 1:B else i if(!missing(ncurves)) subs <- sample(subs, ncurves) for(j in subs) lines(times, curves[,j], lty=2) } if(pointwise.band) { p <- apply(curves, 1, quantile, probs=c((1-conf.int)/2,1-(1-conf.int)/2)) lines(times,p[1,],col=col.pointwise.band) lines(times,p[2,],col=col.pointwise.band) result <- c(result, list(pointwise.lower=p[1,], pointwise.upper=p[2,])) } if(!add && subtitles) { title(sub=obj$bootstrap.type,adj=1) title(sub=paste(B-1,'bootstrap repetitions'),adj=0) } invisible(result) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/is.present.s��������������������������������������������������������������������������������0000644�0001762�0000144�00000000143�12243661443�013777� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������is.present <- function(x) { if(is.character(x)) return(x!="") else return(!is.na(x)) } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/confbar.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000005007�12243661443�013323� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������confbar <- function(at, est, se, width, q=c(.7,.8,.9,.95,.99), col=gray(c(0,.25,.5,.75,1)), type=c("v","h"), labels=TRUE, ticks=FALSE, cex=.5, side="l", lwd=5, clip=c(-1e30, 1e30), fun=function(x)x, qfun=function(x) ifelse(x==.5, qnorm(x), ifelse(x<.5,qnorm(x/2),qnorm((1+x)/2)))) { type <- match.arg(type) iusr <- if(type=="v") 1:2 else 3:4 if(missing(width)) width <- diff(par("usr")[iusr])*.02 if(side=="b") side <- "l" #treat bottom as left if(length(q)!=length(col)) stop("q and col must have same length") q <- c(1-rev(q), .5, q) ##qe <- seq(.01, .99, length=n) ##col <- seq(.8,.01, length=n/2) col <- c(rev(col), col) w <- width/2 if(type=="v") { polyg <- function(a, b, col, clip) { b[b < clip[1] | b > clip[2]] <- NA polygon(a, b, col=col) } Lines <- function(a, b, lwd=1, clip) { b[b < clip[1] | b > clip[2]] <- NA lines(a, b, lwd=lwd) } Text <- function(a, b, clip, ...) { b[b < clip[1] | b > clip[2]] <- NA text(a, b, ...) } srt <- 0 } else { polyg <- function(a, b, col, clip) { b[b < clip[1] | b > clip[2]] <- NA polygon(b, a, col=col) } Lines <- function(a, b, lwd=1, clip) { b[b < clip[1] | b > clip[2]] <- NA lines(b, a, lwd=lwd) } Text <- function(a, b, clip, ...) { b[b < clip[1] | b > clip[2]] <- NA text(b, a, ...) } srt <- 45 } for(i in 1:(length(q)-1)) polyg(c(at-w,at+w,at+w,at-w),fun(est+se*qfun(c(q[i],q[i],q[i+1],q[i+1]))), col=col[i], clip=clip) a <- fun(est) z <- w*.24 Lines(c(at-w-3.5*z, at+w+3.5*z), c(a,a), lwd=lwd, clip=clip) a <- fun(est+se*qfun(q)) do <- TRUE if(labels || ticks) for(i in 1:length(q)) { b <- c(a[i], a[i]) if(ticks) { Lines(c(at-w-z,at-w),b, clip=clip) Lines(c(at+w+z,at+w),b, clip=clip) } if(labels && do && q[i]!=.5) { if(side=="l") Text(at-w-2*z, a[i], format(max(1-q[i],q[i])), cex=cex, adj=1, srt=srt, clip=clip) else Text(at+w+2*z, a[i], format(max(1-q[i],q[i])), cex=cex, adj=0, srt=srt, clip=clip) } if(q[i]!=.5) do <- !do } names(a) <- format(q) invisible(a) } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/ggplotlyr.r���������������������������������������������������������������������������������0000644�0001762�0000144�00000002364�14112727067�013740� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##' Render `plotly` Graphic from a `ggplot2` Object ##' ##' Uses `plotly::ggplotly()` to render a `plotly` graphic with a specified tooltip attribute, removing extraneous text that `ggplotly` puts in hover text when `tooltip='label'` ##' @title ggplotlyr ##' @param ggobject an object produced by `ggplot` ##' @param tooltip attribute specified to `ggplot` to hold hover text ##' @param remove extraneous text to remove from hover text. Default is set to assume `tooltip='label'` and assumed the user specified `aes(..., label=txt)`. If you instead specified `aes(..., label=myvar)` use `remove='myvar: '`. ##' @param ... other arguments passed to `ggplotly` ##' @return a `plotly` object ##' @author Frank Harrell ##' @export ##' @md ggplotlyr <- function(ggobject, tooltip='label', remove='txt: ', ...) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") # Get around a bug in tooltip construction with ggplotly # See https://stackoverflow.com/questions/66316337 g <- plotly::ggplotly(ggobject, tooltip=tooltip, ...) if(! length(remove) || remove == '') return(g) d <- g$x$data for(i in 1 : length(d)) { w <- d[[i]]$text if(length(w)) d[[i]]$text <- gsub(remove, '', w) } g$x$data <- d g } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/latex.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000122506�14300717536�013033� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������first.word <- function(x, i=1, expr=substitute(x)) { words <- if(! missing(x)) as.character(x)[1] else as.character(unlist(expr))[1] if(i > 1) stop('i > 1 not implemented') chars <- substring(words, 1 : nchar(words), 1 : nchar(words)) legal.chars <- c(letters, LETTERS, '.', '0','1','2','3','4','5','6','7','8','9') non.legal.chars <- (1 : length(chars))[chars %nin% legal.chars] if(! any(non.legal.chars)) return(words) if(non.legal.chars[1] == 1) return(character(0)) substring(words, 1, non.legal.chars[1] - 1) } ##1. if x is a data.frame, then do each component separately. ##2. if x is a matrix, but not a data.frame, make it a data.frame ## with individual components for the columns. ##3. if a component x$x is a matrix, then do all columns the same. ##4. Use right justify by default for numeric columns. ##5. Use left justify for non-numeric columns. ## The following are made complicated by matrix components of data.frames: ##6. vector cdec must have number of items equal to number of columns ## of input x. ##7. matrix dec must have number of columns equal to number of columns ## of input x. ##8. scalar dec is expanded to a vector cdec with number of items equal ## to number of columns of input x. ##9. vector rdec must have number of items equal to number of rows of input x. ## rdec is expanded to matrix dec. ##10. col.just must have number of columns equal to number of columns ## of output cx. ## Value: ## character matrix with character images of properly rounded x. ## matrix components of input x are now just sets of columns of character matrix. ## attr(,col.just) repeats input col.just when provided. ## Otherwise, recommended justification for columns of output. ## Default is "l" for characters and factors, "r" for numeric. ## When dcolumn==T, numerics will have ".". ## FEH 21May96 - changed default for numeric.dollar to cdot ## FEH 5Jun96 - re-written to not rely on as.data.frame, ## converted data frames to matrices the slow way ## added matrix.sep ## 12Aug99 - allowed # decimal places=NA (no rounding, just use format()) ## 27May02 - added booktabs FEH ## 13Dec02 - added ctable FEH ## arguments included check.names=TRUE 23jan03 ## ## 16Jan15 (A. Kiermeier) pass "..." to formt() and format() format.df <- function(x, digits, dec=NULL, rdec=NULL, cdec=NULL, numeric.dollar=! dcolumn, na.blank=FALSE, na.dot=FALSE, blank.dot=FALSE, col.just=NULL, cdot=FALSE, dcolumn=FALSE, matrix.sep=' ', scientific=c(-4,4), math.row.names=FALSE, already.math.row.names=FALSE, math.col.names=FALSE, already.math.col.names=FALSE, double.slash=FALSE, format.Date='%m/%d/%Y', format.POSIXt="%m/%d/%Y %H:%M:%OS", ...) { sl <- ifelse(double.slash, "\\\\", "\\") cleanLatex <- function(string) { if(! is.character(string)) string <- as.character(string) ## Find strings not in math mode (surrounded by $) s <- gsub("(^[[:space:]]+)|([[:space:]]+$)", "", string) k <- ! (substring(s, 1, 1) == '$' & substring(s, nchar(s)) == '$') k <- k & ! is.na(k) if(! any(k)) return(string) inn <- c('< =', '> =', '<=', '>=', '<', '>', '\\\\%', '%', '\\\\&', '&') out <- c('<=', '>=', paste('$', sl, sl, 'leq$', sep=''), paste('$', sl, sl, 'geq$', sep=''), paste(sl, sl, 'textless ', sep=''), paste(sl, sl, 'textgreater ', sep=''), '%', paste(sl, sl, '%', sep=''), '&', paste(sl, sl, '&', sep='')) for(i in 1 : length(inn)) string[k] <- gsub(inn[i], out[i], string[k]) string } if(numeric.dollar && dcolumn) stop('cannot have both numeric.dollar=TRUE and dcolumn=TRUE') if(missing(digits)) digits <- NULL if((! length(digits))+(! length(dec))+(! length(rdec))+(! length(cdec)) < 3) stop('only one of digits, dec, rdec, cdec may be given') if(! length(digits) && ! length(dec) && ! length(rdec) && ! length(cdec)) { digits <- 15 } if(length(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } formt <- function(x, decimal.mark='.', nsmall=0, scientific=c(-4,4), digits=NULL, na.blank=FALSE, ...) { y <- format(x, nsmall=nsmall, decimal.mark=decimal.mark, digits=digits, ...) if(decimal.mark != '.') y <- gsub('\\.', decimal.mark, y) if(na.blank) y <- ifelse(is.na(x), '', y) y } dot <- if(cdot && numeric.dollar) paste(sl,sl,'cdotp',sl,sl,'!',sep='') else getOption('OutDec') decimal.point <- if(cdot && dcolumn) paste(sl,'cdot',sep='') else dot if(is.data.frame(x)) x <- unclass(x) xtype <- if(is.list(x)) 1 else if(length(dim(x))) 2 else 3 ncx <- if(xtype == 1) length(x) else if(xtype == 2) ncol(x) else 1 nams <- if(xtype == 1) names(x) else if(xtype == 2) dimnames(x)[[2]] else '' if(! missing(col.just) && (length(col.just) < ncx)) stop('col.just needs the same number of elements as number of columns') if(! length(nams)) nams <- rep('', ncx) nrx <- if(xtype == 1) { if(length(d <- dim(x[[1]]))) d[1] else length(x[[1]]) } else if(xtype == 2) nrow(x) else length(x) rnams <- if(xtype == 1) attr(x,'row.names') else if(xtype == 2) dimnames(x)[[1]] else names(x) if(length(dec) + length(rdec) + length(cdec) == 0) rtype <- 1 if(length(rdec)) { rtype <- 2 dec <- matrix(rdec, nrow=nrx, ncol=ncx) } if(length(dec)) { rtype <- 3 if(length(dec) == 1) cdec <- rep(dec, ncx) } if(length(cdec)) rtype <- 4 cx <- NULL nam <- NULL cjust <- NULL if(blank.dot) sas.char <- function(x) { n.x <- nchar(x) blanks.x <- sapply(n.x, function(n.x.i) paste(rep(" ", n.x.i), collapse="")) ifelse(x == blanks.x, ".", x) } nams <- if(math.col.names) paste('$', nams, '$', sep='') else if(already.math.col.names) nams else cleanLatex(nams) rnams <- if(math.row.names) paste('$', rnams, '$', sep='') else if(already.math.row.names) rnams else cleanLatex(rnams) for(j in 1 : ncx) { xj <- if(xtype == 1) x[[j]] else if(xtype == 2) x[,j] else x num <- is.numeric(xj) || all(is.na(xj)) if(testDateTime(xj)) num <- FALSE ## using xtype avoids things like as.matrix changing special characters ncxj <- max(1, dim(xj)[2], na.rm=TRUE) for(k in 1 : ncxj) { xk <- if(ld <- length(dim(xj)) == 2) xj[, k] else xj names(xk) <- NULL ## gets around bug in format.default when ## nsmall is given and there are NAs namk <- if(ld) { dn <- dimnames(xj)[[2]][k] if(length(dn) == 0) dn <- as.character(k) if(math.row.names) { paste('$', dn, '$', sep='') } else if(already.math.row.names) dn else cleanLatex(dn) } else '' namk <- paste(nams[j], if(nams[j] != '' && namk != '') matrix.sep else '', namk, sep='') if(num) { cj <- if(length(col.just)) col.just[j] else 'r' if(rtype == 1) cxk <- formt(xk, decimal.mark=dot, scientific=scientific, digits=digits, na.blank=na.blank, ...) else if(rtype == 3) { cxk <- character(nrx) for(i in 1 : nrx) cxk[i] <- if(is.na(dec[i,j])) formt(xk[i], decimal.mark=dot, scientific=scientific, digits=digits, na.blank=na.blank, ...) else formt(round(xk[i], dec[i,j]), decimal.mark=dot, digits=digits, nsmall=dec[i,j], scientific=scientific, na.blank=na.blank, ...) } else if(rtype == 4) cxk <- if(is.na(cdec[j])) formt(xk, decimal.mark=dot, scientific=scientific, digits=digits, na.blank=na.blank, ...) else formt(round(xk, cdec[j]), decimal.mark=dot, nsmall=cdec[j], digits=digits, scientific=scientific, na.blank=na.blank, ...) if(na.dot) cxk[is.na(xk)] <- '.' # SAS-specific if(blank.dot) cxk <- sas.char(cxk) if(numeric.dollar) cxk <- paste("$",cxk,"$",sep="") ## These columns get real minus signs in LaTeX, not hyphens, ## but lose alignment unless their col.just="r" if(dcolumn | (length(col.just) && col.just[j] == 'c')) { cxk <- sedit(cxk, " ", "~") if(dcolumn) cj <- paste("D{.}{",decimal.point,"}{-1}",sep='') } } else { #ended if(num) cj <- if(length(col.just)) col.just[j] else 'l' if(inherits(xk, "Date")) { cxk <- cleanLatex(format(xk, format=format.Date)) } else if(inherits(xk, "POSIXt")) { cxk <- cleanLatex(format(xk, format=format.POSIXt)) } else { cxk <- cleanLatex(xk) } if(na.blank) cxk <- ifelse(is.na(xk), '', cxk) } cx <- cbind(cx, cxk) nam <- c(nam, namk) cjust <- c(cjust, cj) } # end k } #end j dimnames(cx) <- list(rnams, nam) attr(cx,"col.just") <- cjust cx } ##first.hline.double added FEH 11Jun95 ##Usage: ## latex(x) # for x any S object ##Value is a file object of class=c("latex","file") which is ##automatically printed by print.latex(), which constructs a file objecT ##of class=c("dvi","file"), and automatically prints it using ##print.dvi(). print.latex() returns an invisible file object. ## dcolumn numeric.dollar cdot ## ## dc cd nd format.df latex.default # comment ## F F T $ # LaTeX usage ## F T T \cdot! $ # LaTeX usage ## T F F . ~ . dcolumn # LaTeX usage ## T T F . ~ \cdot dcolumn # LaTeX usage ## ## F F F # non-TeX (hyphens in TeX) ## ## F T F \cdot! # TeX errors, hyphens ## T F T . ~ $ . dcolumn # TeX errors ## T T T . ~ $ \cdot dcolumn # TeX errors latex.default <- function(object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, label=title, rowlabel=title, rowlabel.just="l", cgroup=NULL, n.cgroup=NULL, rgroup=NULL, n.rgroup=NULL, cgroupTexCmd="bfseries", rgroupTexCmd="bfseries", rownamesTexCmd=NULL, colnamesTexCmd=NULL, cellTexCmds=NULL, rowname, cgroup.just=rep("c", length(n.cgroup)), colheads=NULL, extracolheads=NULL, extracolsize='scriptsize', dcolumn=FALSE, numeric.dollar=! dcolumn, cdot=FALSE, longtable=FALSE, draft.longtable=TRUE, ctable=FALSE, booktabs=FALSE, table.env=TRUE, here=FALSE, lines.page=40, caption=NULL, caption.lot=NULL, caption.loc=c('top','bottom'), star=FALSE, double.slash=FALSE, vbar=FALSE, collabel.just=rep("c",nc), na.blank=TRUE, insert.bottom=NULL, insert.bottom.width=NULL, insert.top=NULL, first.hline.double=! (booktabs | ctable), where='!tbp', size=NULL, center=c('center','centering','centerline','none'), landscape=FALSE, multicol=TRUE, ## to remove multicolumn if no need math.row.names=FALSE, already.math.row.names=FALSE, math.col.names=FALSE, already.math.col.names=FALSE, hyperref=NULL, continued='continued', ...) { if(length(hyperref)) hyperref <- sprintf('\\hyperref[%s]{', hyperref) center <- match.arg(center) caption.loc <- match.arg(caption.loc) cx <- format.df(object, dcolumn=dcolumn, na.blank=na.blank, numeric.dollar=numeric.dollar, cdot=cdot, math.row.names=math.row.names, already.math.row.names=already.math.row.names, math.col.names=math.col.names, already.math.col.names=already.math.col.names, double.slash=double.slash, ...) if(missing(rowname)) rowname <- dimnames(cx)[[1]] nocolheads <- length(colheads) == 1 && is.logical(colheads) && ! colheads if (! length(colheads)) colheads <- dimnames(cx)[[2]] col.just <- attr(cx, "col.just") nc <- ncol(cx) nr <- nrow(cx) if (length(cgroup)) { k <- length(cgroup) if(! length(n.cgroup)) n.cgroup <- rep(nc / k, k) if(sum(n.cgroup) != nc) stop("sum of n.cgroup must equal number of columns") if(length(n.cgroup) != length(cgroup)) stop("cgroup and n.cgroup must have same lengths") } if(! length(rowname)) rgroup <- NULL if(! length(n.rgroup) && length(rgroup)) n.rgroup <- rep(nr / length(rgroup), length(rgroup)) if(length(n.rgroup) && sum(n.rgroup) != nr) stop("sum of n.rgroup must equal number of rows in object") if(length(rgroup) && length(n.rgroup) && (length(rgroup) != length(n.rgroup))) stop("lengths of rgroup and n.rgroup must match") if (length(rgroup) && rowlabel.just == "l") rowname <- paste("~~", rowname, sep="") sl <- ifelse(double.slash, "\\\\", "\\") if(ctable && !booktabs) { eol <- paste(sl, 'NN\n', sep='') eog <- "" } else if(ctable) { eol <- paste(sl, 'NN\n', sep='') eog <- paste(sl, 'NN\n', sep='') } else if(longtable && length(n.rgroup)) { eol <- paste(sl,"tabularnewline*\n", sep='') eog <- paste(sl, "tabularnewline\n", sep='') } else { eol <- paste(sl,"tabularnewline\n", sep='') eog <- paste(sl, "tabularnewline\n", sep='') } if(booktabs) { toprule <- paste(sl, "toprule\n",sep="") midrule <- paste(sl, "midrule\n",sep="") bottomrule <- paste(sl, "bottomrule\n",sep="") } else if(ctable) { toprule <- paste(sl, 'FL\n', sep='') midrule <- paste(sl, 'ML\n', sep='') bottomrule <- paste(sl, 'LL\n', sep='') } else { toprule <- if(first.hline.double) paste(sl, "hline", sl, "hline\n", sep="") else paste(sl, "hline\n", sep="") midrule <- bottomrule <- paste(sl, "hline\n", sep="") } ## ################ CELL AND ROWNAMES FORMATS ################### ## If no formats are specified for the rownames and cells there is ## nothing to do. If only one is specified then the other must ## faked. But rownamesTexCmd should only be faked if rownames is ## not NULL. ## Check to make sure the dimensions of the cell formats ## match the dimensions of the object to be formatted. if (length(cellTexCmds) & ! (all(dim(cx) == dim(cellTexCmds)) & length(dim(cx)) == length(dim(cellTexCmds)))) { msg <- "The dimensions of cellTexCmds must be:" msg1 <- paste(dim(cx), collapse=" x ") msg <- paste(msg, msg1) msg <- paste(msg, ", but you gave me: ") msg1 <- paste(dim(cellTexCmds), collapse=" x ") msg <- paste(msg, msg1, sep="") stop(msg) } if (length(cellTexCmds) | length(rownamesTexCmd)) { ## LaTeX commands have been specified for either the rownames or ## the cells. ## Fake rownamesTexCmd if it is NULL and if rowname exists. if (! length(rownamesTexCmd) & length(rowname)) rownamesTexCmd <- rep("", nr) ## Fake cellTexCmds if it is NULL. if (! length(cellTexCmds)) cellTexCmds <- array('', dim=dim(cx)) ## Create a combined rowname and cell format object rcellTexCmds <- cbind(rownamesTexCmd, cellTexCmds) thisDim <- dim(rcellTexCmds) ## Prefix the latex commands with slashes. rcellTexCmds <- paste(sl, rcellTexCmds, sep="") ## Remove slashes from elements where no format was specified. rcellTexCmds[rcellTexCmds == sl] <- "" ## Restore the dimensions of the matrix (paste loses them). dim(rcellTexCmds) <- thisDim } else rcellTexCmds <- NULL ################ END OF CELL AND ROWNAMES FORMATS ############### if (length(cgroup)) { last.col <- cumsum(n.cgroup) first.col <- c(1, 1 + last.col[- length(last.col)]) cgroup.cols <- cbind(first.col,last.col) col.subs <- split(seq(length.out=nc), rep.int(seq_along(n.cgroup), times=n.cgroup)) cxi <- rctci <- list() ## Initialize with row name column and first column group: rctcx <- if(length(rcellTexCmds)) rcellTexCmds[, 1] # rctci <- if(length(rcellTexCmds)) # list(cbind(rcellTexCmds[, 1], rcellTexCmds[1 + col.subs[[1]] for (i in seq(along=col.subs)) { cxi[[i]] <- cx[, col.subs[[i]], drop=FALSE] if(length(rctcx)) rctcx <- cbind(rctcx, rcellTexCmds[, 1 + col.subs[[i]], drop=FALSE], if(i < length(col.subs)) '') } if(length(rctcx)) rcellTexCmds <- rctcx cxx <- cxi[[1]] col.justxx <- col.just[col.subs[[1]]] collabel.justxx <- collabel.just[col.subs[[1]]] colheadsxx <- colheads[col.subs[[1]]] extracolheadsxx <- extracolheads[col.subs[[1]]] cgroupxx <- cgroup[1] n.cgroupxx <- n.cgroup[1] for(i in seq(along=col.subs)[-1]) { cxx <- cbind(cxx, "", cxi[[i]]) col.justxx <- c(col.justxx, "c", col.just[col.subs[[i]]]) collabel.justxx <- c(collabel.justxx, "c", collabel.just[col.subs[[i]]]) cgroupxx <- c(cgroupxx, "", cgroup[i]) n.cgroupxx <- c(n.cgroupxx, 1, n.cgroup[i]) colheadsxx <- c(colheadsxx, "", colheads[col.subs[[i]]]) if(length(extracolheads)) extracolheadsxx <- c(extracolheadsxx, "", extracolheads[col.subs[[i]]]) } cgroup.colsxx <- cgroup.cols + 0 : (nrow(cgroup.cols) - 1) cx <- cxx col.just <- col.justxx collabel.just <- collabel.justxx n.cgroup <- n.cgroupxx cgroup.cols <- cgroup.colsxx[cgroup != "", , drop=FALSE] cgroup <- cgroupxx colheads <- colheadsxx extracolheads <- extracolheadsxx nc <- ncol(cx) } cline <- NULL if (length(rowname)) { cx <- cbind(rowname, cx) col.just <- c(rowlabel.just, col.just) if(length(extracolheads)) extracolheads <- c('', extracolheads) collabel.just <- c(rowlabel.just, collabel.just) if (length(cgroup) == 0L) colheads <- c(rowlabel, colheads) else { colheads <- c('', colheads) cgroup <- c(rowlabel, cgroup) rlj <- ifelse(rowlabel.just == "l", "l", "c") cgroup.just <- c(rlj, cgroup.just) n.cgroup <- c(1, n.cgroup) cgroup.cols <- 1+cgroup.cols cline <- paste(sl, "cline{", cgroup.cols[,1],"-", cgroup.cols[,2], "}", sep="", collapse=" ") } nc <- 1 + nc } else if(length(cgroup) > 0L) { cline <- paste0(sl, "cline{", cgroup.cols[,1], "-", cgroup.cols[,2], "}", collapse=" ") } vbar <- ifelse(vbar, "|", "") if(! append) cat("", file=file) #start new file ## pandoc used by R Markdown gets fooled by LaTeX comments olc <- getOption('omitlatexcom') if(length(olc) && olc) cat("%", deparse(sys.call()), "%\n", file=file, append=file != '', sep='') if(dcolumn) { decimal.point <- ifelse(cdot, paste(sl, "cdot", sep=""), ".") cat(sl,"newcolumntype{.}{D{.}{",decimal.point,"}{-1}}\n", sep="", file=file, append=file != '') } { # tabular.cols tabular.cols <- paste(vbar, col.just, sep="") if (! length(n.cgroup)) tabular.cols <- c(tabular.cols, vbar) else { vv2 <- cumsum(n.cgroup) tabular.cols[vv2] <- paste(tabular.cols[vv2],vbar,sep="") } tabular.cols <- paste(tabular.cols, collapse="") } intop <- function() { if(! length(insert.top)) return(NULL) paste(if(center == 'none') '\n\\vspace{1ex}\n\n', paste('\\textbf{', insert.top, '}', sep=''), # if(center %in% c('centerline', 'centering')) '\\\\', if(center != 'center') '\n\\vspace{1ex}\n\n', sep='') } if(length(caption) && ! ctable) { caption <- paste(sl, "caption", if(length(caption.lot)) paste("[", caption.lot, "]", sep=""), "{", caption, if(! longtable) paste(sl, "label{", label, "}", sep=""), "}", sep="") table.env <- TRUE } if(ctable) { latex.begin <- latexBuild( if(length(size)) paste('{', sl, size, sep=''), '{', intop(), '', paste(sl, 'ctable[', sep=''), '', if(length(caption) && caption.loc == 'bottom') 'botcap,', '', if(length(caption)) paste('caption={', caption, '},', sep=''), '', if(length(caption.lot)) paste('cap={', caption.lot, '},', sep=''), '', if(length(caption)) paste('label=', label, ',', sep=''), '', if (star) 'star, ', '', if(! landscape) paste('pos=', where, ',', sep=''), '', if(landscape) 'sideways', '', paste(']{', tabular.cols, '}', sep=''), '', if(length(insert.bottom)) paste('{', paste(sl,'tnote[]{', sedit(insert.bottom,'\\\\',' '),'}', sep='', collapse=''), '}', sep=''), '', if(! length(insert.bottom)) '{}', '', ## tnote does not allow \\ in its argument paste('{', toprule, sep=''), '{') latex.end <- attr(latex.begin, 'close') } else if(! longtable) { latex.begin <- latexBuild( if(landscape) paste(sl, "begin{landscape}", sep=""), 'landscape', if(table.env) paste(sl, "begin{table}", if(here) "[H]" else paste('[', where, ']', sep=''), "\n", sep=""), 'table', if(length(size)) paste('{', sl, size, '\n', sep=''), '{', if(caption.loc == 'top' && length(caption)) paste(caption, "\n"), '', intop(), '', if(center == 'center') paste(sl, "begin{center}\n", sep=""), 'center', if(center == 'centering') paste('{', sl, 'centering\n', sep=''), '{', if(center == 'centerline') paste(sl, 'centerline{', sep=''),'{', hyperref, '{', paste(sl, "begin{tabular}{", tabular.cols, "}\n", toprule, sep=""), 'tabular', insert=list(if(! table.env && length(insert.bottom)) list('tabular', 'after', paste('\\par', insert.bottom)), if(table.env) list('table', 'before', paste(insert.bottom, collapse = ' ')), if(caption.loc == 'bottom' && length(caption)) list('tabular', 'after', caption) ) ) latex.end <- attr(latex.begin, 'close') } else { ## longtable, not ctable latex.begin <- latexBuild( if(! draft.longtable) paste(sl,"let",sl,"LTmulticolumn=",sl,"multicolumn", sep=""), '', paste(sl, "setlongtables", sep=""), '', if(landscape) paste(sl, "begin{landscape}",sep=""), 'landscape', if(length(size)) paste('{', sl, size, '\n', sep=''), '{', intop(), '', paste(sl,"begin{longtable}{", tabular.cols, "}", sep=""), 'longtable', if(caption.loc == 'top' && length(caption)) paste(caption, eog), '', toprule, '', insert=list( if(caption.loc == 'bottom' && length(caption)) list('longtable', 'after', caption) ) ) latex.end <- attr(latex.begin, 'close') if(! length(caption)) latex.end <- paste(latex.end, '\\addtocounter{table}{-1}', sep='\n') } cat(latex.begin, file=file, append=file != '') cgroupheader <- NULL if(length(cgroup)) { cvbar <- paste(cgroup.just, vbar, sep="") cvbar[1] <- paste(vbar, cvbar[1], sep="") cvbar[-length(cvbar)] <- paste(cvbar[-length(cvbar)], vbar, sep="") slmc <- paste(sl, "multicolumn{", sep="") if (length(cgroupTexCmd)) labs <- paste(sl, cgroupTexCmd, " ", cgroup, sep="") else labs <- cgroup if(multicol) labs <- paste(slmc, n.cgroup, "}{", cvbar, "}{", labs, "}", sep="") cgroupheader <- paste(labs, collapse="&") if (! length(cline)) { inr <- as.numeric(length(rowname)) cline <- paste(sl, "cline{", 1 + inr, "-", nc, "}", sep="") } cgroupheader <- paste(cgroupheader, eol, cline, "\n", sep="") cat(cgroupheader, file=file, append=file != '') } { # column labels cvbar <- paste(collabel.just, vbar, sep="") cvbar[1] <- paste(vbar, cvbar[1], sep="") if (length(n.cgroup)) { vv2 <- cumsum(n.cgroup[-length(n.cgroup)]) cvbar[vv2] <- paste(cvbar[vv2], vbar, sep="") } slmc1 <- paste(sl, "multicolumn{1}{", sep="") labs <- colheads if (length(colnamesTexCmd)) labs <- paste(sl, colnamesTexCmd, " ", labs, sep="") if(nocolheads) colheads <- labs <- NULL header <- NULL if(length(labs)) { if(! length(extracolheads)) { heads <- get2rowHeads(labs) colheads <- heads[[1]] if(any(heads[[2]] != '')) extracolheads <- heads[[2]] } if(multicol) colheads <- paste(slmc1, cvbar, "}{", colheads, "}", sep="") header <- if(length(colheads)) paste(colheads, collapse='&') if(length(extracolheads)) { extracolheads <- ifelse(extracolheads == ''| extracolsize == '', extracolheads, paste('{',sl,extracolsize,' ', extracolheads,'}',sep='')) if(multicol) extracolheads <- ifelse(extracolheads == '',extracolheads, paste(slmc1,cvbar,'}{',extracolheads,'}',sep='')) else extracolheads <- ifelse(extracolheads == '',extracolheads, paste(extracolheads,sep='')) header <- if(length(header)) paste(header, eol, paste(extracolheads, collapse='&'), sep='') } if(length(header)) cat(header, eog, file=file, sep='', append=file != '') if(ctable) cat(midrule, file=file, append=file != '') else cat(midrule, file=file, append=file != '') } } if(longtable) { if(! length(caption)) cat(sl,"endhead\n",midrule,sl,"endfoot\n",sep="", file=file,append=file != '') else { cat(sl,"endfirsthead", sep="",file=file, append=file != '') cat(sl,"caption[]{\\em (", continued, ")} ", eol, sep="",file=file, append=file != '') cat(midrule, sep="",file=file, append=file != '') if(length(cgroupheader)) cat(cgroupheader, file=file, append=file != '') if(length(header)) cat(header, file=file, sep="&", append=file != '') cat(eog, midrule, sl, "endhead", '\n', midrule, sep="", file=file, append=file != '') if(length(insert.bottom)) { if(length(insert.bottom.width) == 0) { insert.bottom.width = paste0(sl, "linewidth") } cat(paste(sl, 'multicolumn{', nc, '}{', "p{",insert.bottom.width,'}}{', insert.bottom, '}', eol, sep='', collapse='\n'), sep="", file=file, append=file != '') } cat(sl,"endfoot\n", sep="",file=file, append=file != '') cat(sl,"label{", label, "}\n", sep="", file=file, append=file != '') } } { # individual lines, grouped if appropriate, longtable if appropriate if (length(n.rgroup)) { rg.end <- cumsum(n.rgroup) rg.start <- rg.end-n.rgroup+1 if(! length(rgroup)) { rgroup <- rep("",length(n.rgroup)) } else { if (length(rgroupTexCmd)) { rgroup <- paste("{",sl, rgroupTexCmd, " ", rgroup,"}",sep="") } else rgroup <- paste("{", rgroup,"}",sep="") } seq.rgroup <- seq(along=n.rgroup) } else { seq.rgroup <- 1 rg.end <- nr rg.start <- 1 } linecnt <- 0 for (j in seq.rgroup) { if (length(n.rgroup)) { if(longtable && linecnt > 0 && (linecnt + n.rgroup[j] + (n.rgroup[j] > 1)) > lines.page) { cat(sl, "newpage\n", sep="", file=file, append=file != '') linecnt <- 0 } cat(rgroup[j], rep("", nc - 1), sep="&", file=file, append=file != '') cat(eol, sep="",file=file, append=file != '') linecnt <- linecnt + 1 } ## Write the object (and it's formatting instructions) ## to the output. ## Loop through the rows of the object. for(i in rg.start[j] : rg.end[j]) { if (! length(n.rgroup)) { if(longtable && linecnt > 0 && (linecnt + 1 > lines.page)) { cat(sl, "newpage\n", sep="", file=file, append=file != '') linecnt <- 0 } } ## Loop through the columns of the object ## write each value (and it's format if there ## is one) if (length(rcellTexCmds)) { num.cols <- ncol(cx) for (colNum in 1 : num.cols) { cat(rcellTexCmds[i, colNum], " ", cx[i, colNum], file=file, append=file != '') if (colNum < num.cols) cat("&", file=file, append=file != '') } } else { ## Original code that writes object to output. cat(cx[i, ], file=file, sep="&", append=file != '') } cat(if(i == rg.end[j] || (! ctable && ! length(n.rgroup))) eog else if(i < rg.end[j]) eol, sep="", file=file, append=file != '') linecnt <- linecnt+1 } ## End of for loop that writes the object. if(length(n.rgroup) > j) cat(midrule, sep = "", file=file, append=file != '') else cat(bottomrule, sep="",file=file, append=file != '') } } cat(latex.end, file=file, sep='\n', append=file != '') sty <- c("longtable"[longtable], "here"[here], "dcolumn"[dcolumn], "ctable"[ctable], "booktabs"[booktabs], if(landscape && ! ctable) "lscape") structure(list(file=file, style=sty), class='latex') } ## Re-written by Daniel Calvelo Aros <dcalvelo@minag.gob.pe> to not use ## S.sty 18Feb04 latex.function <- function(object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, assignment=TRUE, type=c('example','verbatim','Sinput'), width.cutoff=70, size='', ...) { type <- match.arg(type) fctxt <- deparse(object, width.cutoff=width.cutoff) if(assignment) fctxt[1] <- paste(title , '<-', fctxt[1]) environment <- ifelse(type == 'example', "alltt", "verbatim") environment <- c(example='alltt', verbatim='verbatim', Sinput=paste('Sinput',size,sep=''))[type] preamble <- paste("\\begin{",environment,"}\n",sep="") cat(preamble, file=file, append=file != "") if(type == 'Sinput') cat(fctxt, sep='\n') else { rxs <- if(type == 'example') c("\t=> ", "\\\\=>\\\\(\\\\backslash\\\\)", "([{}])=>\\\\\\1", "<-=>\\\\(\\\\leftarrow\\\\)", "#(.*?$)=>{\\\\rm\\\\scriptsize\\\\#\\1}" ) else c("\t=> ") substitute <- strsplit( rxs, "=>" ) for(line in fctxt) { for( subst in substitute ) { line <- gsub( subst[1], subst[2], line, perl=TRUE ) } line <- paste(line,"\n",sep="") cat(line, file=file, append=file != "") } } postamble <- paste("\\end{",environment,"}\n", sep="") cat(postamble, file=file, append=file != '') structure(list(file=file, style=if(type == 'example')'alltt'), class='latex') } latexVerbatim <- function(x, title=first.word(deparse(substitute(x))), file=paste(title, ".tex", sep=""), append=FALSE, size=NULL, hspace=NULL, width=.Options$width, length=.Options$length, ...) { if(! missing(width) || ! missing(length)) { old <- options('width', 'length') options(width=width, length=length) on.exit(options(old)) } if(file != '') sink(file, append=append) cat('\\setbox0=\\vbox{\n', if(length(size)) c('\\',size,'\n'), '\\begin{verbatim}\n', sep='') print(x, ...) cat('\\end{verbatim}\n}\n', if(length(hspace)) c('\\hspace{',hspace,'}'), '{\\makebox[\\textwidth]{\\box0}}\n', sep='') if(file == '') return(invisible()) sink() structure(list(file=file, style=NULL), class='latex') } latex.list <- function(object, title=first.word(deparse(substitute(object))), file=paste(title, ".tex", sep=""), append=FALSE, label, caption, caption.lot, caption.loc=c('top','bottom'), ...) { caption.loc <- match.arg(caption.loc) nx <- names(object) if (! length(nx)) nx <- paste(title, "[[", seq(along=object), "]]", sep="") tmp <- latex(object=object[[1]], caption=nx[1], label=nx[1], append=append, title=title, file=file, caption.lot=NULL, caption.loc=caption.loc, ...) tmp.sty <- tmp$style for (i in seq(along=object)[-1]) { tmp <- latex(object=object[[i]], caption=nx[i], label=nx[i], append=file != '', title=title, file=file, caption.lot=NULL, caption.loc=caption.loc, ...) tmp.sty <- c(tmp.sty, tmp$style) } sty <- if(length(tmp.sty)) unique(tmp.sty) else NULL structure(list(file=file, style=sty), class='latex') } ## Function to translate several expressions to LaTeX form, many of ## which require to be put in math mode. ## Arguments inn and out specify additional input and translated ## strings over the usual defaults. ## If pb=T, also translates [()] to math mode using \left, \right ## Assumes that input text always has matches, e.g. [) [] (] (), and ## that surrounding by $$ is OK ## latexTranslate is used primarily by summary.formula latexTranslate <- function(object, inn=NULL, out=NULL, pb=FALSE, greek=FALSE, na='', ...) { text <- ifelse(is.na(object), na, as.character(object)) inn <- c("|", "%", "#", "<=", "<", ">=", ">", "_", "\\243", "&", inn, if(pb) c("[", "(", "]", ")")) out <- c("$|$", "\\%", "\\#", "$\\leq$", "$<$", "$\\geq$", "$>$", "\\_", "\\pounds", "\\&", out, if(pb) c("$\\left[", "$\\left(", "\\right]$", "\\right)$")) text <- sedit(text, '$', 'DOLLARS', wild.literal=TRUE) text <- sedit(text, inn, out) ##See if string contains an ^ - superscript followed by a number ## (number condition added 31aug02) dig <- c('0','1','2','3','4','5','6','7','8','9') for(i in seq_along(text)) { lt <- nchar(text[i]) x <- substring(text[i], 1 : lt, 1 : lt) j <- x == '^' if(any(j)) { is <- ((1 : lt)[j])[1] #get first ^ remain <- x[-(1 : is)] k <- remain %in% c(' ',',',')',']','\\','$') if(remain[1] %in% dig || (length(remain) > 1 && remain[1] == '-' && remain[2] %in% dig)) k[-1] <- k[-1] | remain[-1] %nin% dig ie <- if(any(k)) is + ((1 : length(remain))[k])[1] else length(x)+1 ##See if math mode already turned on (odd number of $ to left of ^) dol <- if(sum(x[1 : is] == '$') %% 2) '' else '$' substring2(text[i],is,ie-1) <- paste(dol, '^{', substring(text[i], is + 1, ie - 1), '}', dol,sep='') } if(greek) { gl <- c('alpha','beta','gamma','delta','epsilon','varepsilon','zeta', 'eta','theta','vartheta','iota','kappa','lambda','mu','nu', 'xi','pi','varpi','rho','varrho','sigma','varsigma','tau', 'upsilon','phi','carphi','chi','psi','omega','Gamma','Delta', 'Theta','Lambda','Xi','Pi','Sigma','Upsilon','Phi','Psi','Omega') for(w in gl) text[i] <- gsub(paste('\\b', w, '\\b', sep=''), paste('$\\\\',w,'$', sep=''), text[i]) } } sedit(text, 'DOLLARS', '\\$', wild.literal=TRUE) } latex <- function(object, ...) { ## added title= 25May01 if (! length(class(object))) class(object) <- data.class(object) UseMethod("latex") } optionsCmds <- function(pgm) { optionName <- paste(pgm, 'cmd', sep='') v <- .Options[[optionName]] if(pgm == 'xdvi' && .Platform$OS.type != 'unix' && ! length(v)) v <- 'yap' # MikTeX if(length(v) && v != '') pgm <- v pgm } ## From Rich Heiberger 2014-12-04: ## The original function in Hmisc_3.14-5 doesn't work on Windows. ## system doesn't handle DOS internal commands such as 'cd' ## I switched it to 'shell' on Windows. ## This revision works on Windows and Macintosh without setting options. ## On Windows yap displays the dvi file and gives a warning I don't understand ## on Mac X displays the dvi file. ## For pdflatex, we need options ## Windows and Macintosh ## options(latexcmd='pdflatex') ## options(dviExtension='pdf') ## Windows with pdflatex ## options(xdvicmd='c:\\progra~1\\Adobe\\Reader~1.0\\Reader\\AcroRd32.exe') ## 32-bit ## options(xdvicmd='c:\\progra~2\\Adobe\\Reader~1.0\\Reader\\AcroRd32.exe') ## 64 bit windows ## Adobe opens correctly and displays the file, but it also gives a warning that ## I don't understand. ## Macintosh with pdflatex ## options(xdvicmd='open') dvi.latex <- function(object, prlog=FALSE, nomargins=TRUE, width=5.5, height=7, ...) { fi <- object$file; sty <- object$style if(length(sty)) sty <- paste('\\usepackage{',sty,'}',sep='') if(nomargins) sty <- c(sty, paste('\\usepackage[paperwidth=',width, 'in,paperheight=', height, 'in,noheadfoot,margin=0in]{geometry}',sep='')) ## pre <- tempfile(); post <- tempfile() # 1dec03 tmp <- tempfile() tmptex <- paste(tmp, 'tex', sep='.') infi <- readLines(fi, n=-1) # Splus 7 doesn't default to read to EOF 3may05 cat('\\documentclass{report}', sty, '\\begin{document}\\pagestyle{empty}', infi, '\\end{document}\n', file=tmptex, sep='\n') if (.Platform$OS.type == "unix") sys(paste("cd", shQuote(tempdir()), "&&", optionsCmds("latex"), "-interaction=scrollmode", shQuote(tmp)), output = FALSE) else ## MS DOS shell(paste("cd", shQuote(tempdir()), "&", optionsCmds("latex"), "-interaction=scrollmode", shQuote(tmp)), shell="CMD", intern = FALSE) if(prlog) cat(scan(paste(tmp,'log',sep='.'),list(''),sep='\n')[[1]], sep='\n') fi <- paste(tmp, getOption("dviExtension", "dvi"), sep='.') structure(list(file=fi), class='dvi') } show.dvi <- function(object, width=5.5, height=7) { viewer <- optionsCmds('xdvi') cmd <- if(viewer == 'yap') { paste(viewer, object$file) } else if(viewer == 'kdvi') { paste(viewer, object$file) } else if(viewer == 'xdvi') { paste(viewer, ' -paper ', width, 'x', height, 'in -s 0 ', object$file, sep='') } else { paste(viewer, object$file) } system(cmd, intern = TRUE, wait=TRUE) invisible(NULL) } ## enhanced show.latex 22dec02 - special treatment of file=='' show.latex <- function(object) { if(object$file == '') { if(length(object$style)) { environment(show.latex)$latexStyles <- if(exists("latexStyles", envir=environment(show.latex))) unique(c(environment(show.latex)$latexStyles, object$style)) else object$style } return(invisible()) } show.dvi(dvi.latex(object)) } environment(show.latex) <- new.env() print.dvi <- function(x, ...) show.dvi(x) print.latex <- function(x, ...) show.latex(x) dvi <- function(object, ...) UseMethod('dvi') dvips <- function(object, ...) UseMethod('dvips') dvigv <- function(object, ...) UseMethod('dvigv') dvips.dvi <- function(object, file, ...) { cmd <- if(missing(file)) paste(optionsCmds('dvips'), shQuote(object$file)) else paste(optionsCmds('dvips'),'-o', file, shQuote(object$file)) ## paste(optionsCmds('dvips'),'-f', object$file,' | lpr') else 5dec03 ## 2 dQuote 26jan04 invisible(sys(cmd)) } dvigv.dvi <- function(object, ...) invisible(sys(paste(optionsCmds('dvips'), '-f', object$file, '| gv - &'))) ## added ... to dvixx.dvi calls below 1dec03 dvips.latex <- function(object, ...) invisible(dvips.dvi(dvi.latex(object),...)) dvigv.latex <- function(object, ...) invisible(dvigv.dvi(dvi.latex(object),...)) latexSN <- function(x) { x <- format(x) x <- sedit(x, c('e+00','e-0*', 'e-*', 'e+0*', 'e+*'), c('', '\\!\\times\\!10^{-*}','\\!\\times\\!10^{-*}', '\\!\\times\\!10^{*}','\\!\\times\\!10^{*}')) x } htmlSN <- function(x, pretty=TRUE, ...) { x <- if(pretty) prettyNum(x, ...) else format(x, ...) times <- htmlSpecial('times') x <- gsub('e\\+00', '', x) x <- gsub('e\\+0([0-9])', '\u00D710<sup>\\1</sup>', x) x <- gsub('e\\+(.*)', '\u00D710<sup>\\1</sup>', x) x <- gsub('e-0([0-9])', '\u00D710<sup>-\\1</sup>', x) gsub('e-(.*)', '\u00D710<sup>-\\1</sup>', x) } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/bpplot.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000005021�13036501341�013174� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##Modified FEH 30Jun97 - delete missing data, names default to T, ## auto names for list argument, ylab default to "" instead of Percentiles ## names -> name, added srtx bpplot <- function(..., name = TRUE, main = "Box-Percentile Plot", xlab = "", ylab = "", srtx=0, plotopts=NULL) { all.x <- list(...) ## FH 30Jun97 nam <- character(0) ## FH ## if(is.list(...)) { ## FH if(is.list(all.x[[1]])) { all.x <- all.x[[1]] if(is.logical(name) && name) name <- names(...) ## FH } n <- length(all.x) centers <- seq(from = 0, by = 1.2, length = n) ymax <- max(sapply(all.x, max, na.rm=TRUE)) ## na.rm=T FEH ymin <- min(sapply(all.x, min, na.rm=TRUE)) xmax <- max(centers) + 0.5 xmin <- -0.5 pargs <- c(list(c(xmin, xmax), c(ymin, ymax), type = "n", main = main, xlab = '', ylab = ylab, xaxt = "n"), plotopts) do.call("plot", pargs) for(i in 1 : n) { plot.values <- bpx(all.x[[i]], centers[i]) lines(plot.values$x1, plot.values$y1) lines(plot.values$x2, plot.values$y2) lines(plot.values$q1.x, plot.values$q1.y) lines(plot.values$q3.x, plot.values$q3.y) lines(plot.values$med.x, plot.values$med.y) } if(is.logical(name)) { if(name) mgp.axis(1, centers, sapply(substitute(list(...)), deparse)[2:(n + 1)], srt=srtx, adj=if(srtx==0).5 else 1, axistitle=xlab) } else mgp.axis(1, centers, name, srt=srtx, adj=if(srtx==0).5 else 1, axistitle=xlab) invisible(centers) } bpx <- function(y, offset) { y <- y[!is.na(y)] ## FEH 30Jun97 n <- length(y) delta <- 1/(n + 1) prob <- seq(delta, 1 - delta, delta) quan <- sort(y) med <- median(y) q1 <- median(y[y < med]) q3 <- median(y[y > med]) first.half.p <- prob[quan <= med] second.half.p <- 1 - prob[quan > med] plotx <- c(first.half.p, second.half.p) ## calculating the ends of the first quartile line qx <- approx(quan, plotx, xout = q1)$y q1.x <- c( - qx, qx) + offset ## calculating the ends of the third quartile line qx <- approx(quan, plotx, xout = q3)$y q3.x <- c( - qx, qx) + offset q1.y <- c(q1, q1) q3.y <- c(q3, q3) med.x <- c( - max(first.half.p), max(first.half.p)) + offset med.y <- c(med, med) return(list(x1 = ( - plotx) + offset, y1 = quan, x2 = plotx + offset, y2 = quan, q1.y = q1.y, q1.x = q1.x, q3.y = q3.y, q3.x = q3.x, med.y = med.y, med.x = med.x)) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/Cs.s����������������������������������������������������������������������������������������0000644�0001762�0000144�00000000250�14235535437�012257� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Cs <- function(...)as.character(sys.call())[-1] .q <- function(...) { s <- sys.call()[-1] w <- as.character(s) n <- names(s) if(length(n)) names(w) <- n w } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/ggMisc.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000004334�12662361764�013134� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# From Sandy Muspratt: http://stackoverflow.com/questions/28652284 # See better approach there, test is in ~/r/ggplot2/panel-border.r colorFacet <- function(g, col=adjustcolor('blue', alpha.f=0.3)) { ## Get the plot grob gt <- ggplotGrob(g) ## Check the layout ##gtable_show_layout(gt) # Vertical gaps are in columns 5 and 7 # and span rows 3 to 6 # Horizontal gap is in row 5 # and spans columns 4 to 9 ## To automate the selection of the relevant rows and columns: ## Find out which items in the layout correspond to the panels. ## "r" and "b" (below) refer to the right and bottom indices for the panels in the layout ## The gaps' indices are one to the right of the panels' r index (except the right most panel) ## and one below the panels' b index (except the bottom most panel) ## Rmin and Rmax give the span of the horizontal gap; ## Bmin and Bmax give the span of the vertical gap panelsR <- unique(gt$layout$r[grepl("panel", gt$layout$name)]) Rmin = panelsR[1] ## 4 Rmax = panelsR[length(panelsR)] + 1 panelsR = panelsR[-length(panelsR)] +1 panelsB <- unique(gt$layout$b[grepl("panel", gt$layout$name)]) Bmin = panelsB[1] - 1 ## 3 Bmax = panelsB[length(panelsB)] panelsB = panelsB[-length(panelsB)] + 1 ## Add colored rectangles into the vertical and horizontal gaps for(i in panelsR) gt <- gtable::gtable_add_grob(gt, list(grid::rectGrob(gp = grid::gpar(col = NA, fill = col))), Bmin, i, Bmax, i) for(j in panelsB) gt <- gtable::gtable_add_grob(gt, list(grid::rectGrob(gp = grid::gpar(col = NA, fill = col))), j, Rmin, j, Rmax) grid::grid.draw(gt) invisible() } ## The following exists to nullify invisible() used in arrangeGrob's ## returned value. Regarding class and print see ## http://stackoverflow.com/questions/29062766/store-output-from-gridextragrid-arrange-into-an-object arrGrob <- function(...) { if(! requireNamespace('gridExtra', quietly=TRUE)) stop('gridExtra package not installed') z <- gridExtra::arrangeGrob(...) class(z) <- c("arrGrob", class(z)) z } print.arrGrob <- function(x, ...) { grid::grid.newpage() grid::grid.draw(x) } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/subplot.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000001431�12243661443�013376� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������subplot <- function (fun, x, y = NULL, size = c(1, 1), vadj = 0.5, hadj = 0.5, pars = NULL) { old.par <- par(no.readonly = TRUE) on.exit(par(old.par)) if (missing(x)) x <- locator(2) xy <- xy.coords(x, y) if (length(xy$x) != 2) { pin <- par("pin") tmp <- cnvrt.coords(xy$x[1], xy$y[1], "usr")$plt x <- c(tmp$x - hadj * size[1]/pin[1], tmp$x + (1 - hadj) * size[1]/pin[1]) y <- c(tmp$y - vadj * size[2]/pin[2], tmp$y + (1 - vadj) * size[2]/pin[2]) xy <- cnvrt.coords(x, y, "plt")$fig } else { xy <- cnvrt.coords(xy, , "usr")$fig } if(length(pars)) par(pars) par(plt = c(xy$x, xy$y), new = TRUE) if(is.function(fun))fun() else fun tmp.par <- par(no.readonly = TRUE) return(invisible(tmp.par)) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/summaryS.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000074440�13734175407�013546� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������summaryS <- function(formula, fun=NULL, data=NULL, subset=NULL, na.action=na.retain, continuous=10, ...) { formula <- Formula(formula) Y <- if(length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) X <- model.part(formula, data=Y, rhs=1) Y <- model.part(formula, data=Y, lhs=1) nY <- NCOL(Y) nX <- NCOL(X) namY <- names(Y) namX <- names(X) if(nX == 0) X <- data.frame(x=rep(1, NROW(Y))) ux <- unique(X) Z <- NULL n <- nrow(X) g <- function(y) { y <- y[! is.na(y)] if(is.character(y)) c(NA, NA) else if(is.factor(y)) c(1, length(levels(y))) else if(length(unique(y)) < continuous) range(y) else if(! is.matrix(y)) quantile(y, c(0.01, 0.99)) } ylim <- lapply(Y, g) w <- reshape(cbind(X, Y), direction='long', v.names='y', varying=namY, times=namY, timevar='yvar') if(is.Surv(Y[[1]])) { at <- attributes(Y[[1]]) at <- at[setdiff(names(at), c('dim', 'dimnames'))] attributes(w$y) <- c(attributes(w$y), at) } w$yvar <- factor(w$yvar, namY) funlabel <- NULL if(length(fun)) { by <- c('yvar', if(length(namX)) namX else 'x') y <- w$y w <- summarize(y, w[by], fun, type='matrix', keepcolnames=TRUE) funlabel <- if(is.matrix(w$y)) colnames(w$y)[1] } gg <- function(x) if(is.character(x) || is.factor(x)) 'categorical' else 'numeric' ## For some reason sapply is doubling names e.g. sbp.sbp sapply2 <- function(data, ...) { s <- sapply(data, ...) names(s) <- names(data) s } xlabels <- sapply2(X, label) xlabels <- ifelse(xlabels == '', names(xlabels), xlabels) ylabels <- sapply2(Y, label) ylabels <- ifelse(ylabels == '', names(ylabels), ylabels) for(n in names(w)) # takes care of R change stringsAsFactors=FALSE if(is.character(w[[n]])) w[[n]] <- factor(w[[n]]) structure(w, class=c('summaryS', 'data.frame'), formula=formula, fun=fun, xnames=names(X), xlabels=xlabels, xunits=sapply2(X, units), xtype=sapply(X, gg), ynames=namY, ylabels=ylabels, yunits=sapply2(Y, units), ylim=ylim, funlabel=funlabel) } plot.summaryS <- function(x, formula=NULL, groups=NULL, panel=NULL, paneldoesgroups=FALSE, datadensity=NULL, ylab='', funlabel=NULL, textonly='n', textplot=NULL, digits=3, custom=NULL, xlim=NULL, ylim=NULL, cex.strip=1, cex.values=0.5, pch.stats=NULL, key=list(columns=length(groupslevels), x=.75, y=-.04, cex=.9, col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)), outerlabels=TRUE, autoarrange=TRUE, scat1d.opts=NULL, ...) { xtype <- attr(x, 'xtype') nn <- sum(xtype == 'numeric') if(nn > 1) stop('does not handle more than one numeric continuous x') X <- x at <- attributes(x) Form <- at$formula nX <- at$nX nY <- at$nY ylabels <- at$ylabels yunits <- at$yunits ylims <- at$ylim xnames <- at$xnames xlabels <- at$xlabels xunits <- at$xunits fun <- at$fun funlabel <- if(length(at$funlabel)) at$funlabel else funlabel Panel <- panel ptype <- if(length(fun)) { # used to always be 'dot' if(length(Panel)) 'xy.special' else 'dot' } else 'xy' if(ptype %in% c('xy', 'xy.special') && ! any(xtype == 'numeric')) stop('must have a numeric x variable to make x-y plot') groupslevels <- if(length(groups)) levels(x[[groups]]) condvar <- xnames[xtype == 'categorical'] ## Reorder condvar in descending order of number of levels numu <- function(x) if(is.factor(x)) length(levels(x)) else length(unique(x[! is.na(x)])) if(autoarrange && length(condvar) > 1) { nlev <- sapply(X[condvar], numu) condvar <- condvar[order(nlev)] } form <- if(length(formula)) formula else { ## Non-groups conditioning variables ngcond <- setdiff(condvar, groups) ## Collapsed non-group conditioning variables ccv <- paste('|', paste(c(ngcond, 'yvar'), collapse=' * ')) ## Collapsed non-group cond var after the first ccv1 <- if(length(ngcond) > 1) paste('|', paste(c(ngcond[-1], 'yvar'), collapse=' * ')) f <- if(ptype %in% c('xy', 'xy.special')) paste('y ~', xnames[xtype == 'numeric'], ccv, sep='') else paste(ngcond[1], '~ y', ccv1, sep='') as.formula(f) } pst <- list(cex=cex.strip) yvarlev <- NULL for(v in levels(X$yvar)) { un <- yunits[v] l <- if(ylabels[v] == v && un == '') v else labelPlotmath(ylabels[v], un) yvarlev <- c(yvarlev, l) } strip <- function(which.given, which.panel, var.name, factor.levels, ...) { current.var <- var.name[which.given] levs <- if(current.var == 'yvar') yvarlev else factor.levels strip.default(which.given, which.panel, var.name, factor.levels=levs, ...) } ylev <- levels(X$yvar) ## lims <- if(length(xlim)) xlim else ylims[ylev] lims <- ylims[ylev] ## lims needs to be repeated according to layout vars <- all.vars(form) cond <- vars[- (1 : 2)] if(! all(cond %in% 'yvar') && cond[1] != 'yvar') { ngny <- setdiff(cond, 'yvar') nr <- length(unique(do.call('paste', X[ngny]))) lims <- rep(lims, each=nr) } if(length(ylim)) lims <- ylim d <- if(ptype == 'xy') { pan <- if(! length(datadensity)) function(...) {} else function(x, y, subscripts, groups=NULL, ...) { gp <- length(groups) plot.line <- trellis.par.get(if(gp) "superpose.line" else "plot.line") col <- plot.line$col gr <- if(gp) groups[subscripts] else factor(rep('', length(x))) ypos <- unit(1, 'npc') for(i in 1:length(levels(gr))) { j <- which(gr == levels(gr)[i]) ypos <- ypos - if(length(scat1d.opts) && 'nhistSpike' %in% names(scat1d.opts)) unit(2, 'mm') else unit(1, 'mm') do.call('scat1d', c(list(x=x[j], y=ypos, col=col[i], grid=TRUE), scat1d.opts)) } } scal <- list(y=list(relation='free', limits=lims, rot=0)) if(length(xlim)) scal$x <- list(limits=xlim) xlab <- labelPlotmath(xlabels[xtype == 'numeric'], xunits [xtype == 'numeric']) if(! length(groups)) { if(! length(Panel)) Panel <- panel.xyplot xyplot(form, data=X, panel=function(...) {Panel(...); pan(...)}, xlab=xlab, ylab=ylab, scales=scal, strip=strip, par.strip.text=pst, ...) } else { panel.groups <- if(paneldoesgroups) NULL else if(length(Panel)) Panel else panel.xyplot if(! paneldoesgroups) Panel <- panel.superpose g <- if(length(panel.groups)) "xyplot(form, groups=%s, data=X, scales=scal, panel=function(...) {if(length(Panel)) Panel(..., scat1d.opts=scat1d.opts); pan(...)}, panel.groups=panel.groups, auto.key=key, xlab=xlab, ylab=ylab, strip=strip, par.strip.text=pst, ...)" else "xyplot(form, groups=%s, data=X, scales=scal, panel=function(...) {if(length(Panel)) Panel(..., scat1d.opts=scat1d.opts); pan(...)}, auto.key=key, xlab=xlab, ylab=ylab, strip=strip, par.strip.text=pst, ...)" eval(parse(text=sprintf(g, groups))) } } else { # Dot chart or xy.special ## If > 1 calculated statistics, y is a matrix. ## Save first column as y and save remaining columns that are not ## text only as yother y <- X$y yother <- NULL yText <- NULL if(is.matrix(y) && length(c(textonly, textplot))) { ptext <- colnames(y) %in% c(textonly, textplot) if(any(ptext)) yText <- y[, ptext, drop=FALSE] ponly <- colnames(y) %in% textonly if(any(ponly)) y <- y[, ! ponly] } if(is.matrix(y) && ncol(y) > 1) { yother <- y[, -1, drop=FALSE] X$y <- y[, 1] } # ylev <- levels(X$yvar) # lims <- if(length(xlim)) xlim else ylims[ylev] # ## lims needs to be repeated according to layout # vars <- all.vars(form) # cond <- vars[- (1 : 2)] # if(! all(cond %in% 'yvar') && cond[1] != 'yvar') { # ngny <- setdiff(cond, 'yvar') # nr <- length(unique(do.call('paste', X[ngny]))) # lims <- rep(lims, each=nr) # } # if(length(ylim)) lims <- ylim scal <- list(x=list(relation='free', limits=xlim)) # limits=lims)) if(ptype == 'xy.special') names(scal) <- 'y' if(ptype == 'dot') { pan <- function(x, y, subscripts, groups=NULL, yother=NULL,...) { gp <- length(groups) dot.symbol <- trellis.par.get(if(gp)'superpose.symbol' else 'dot.symbol') plot.line <- trellis.par.get(if(gp) "superpose.line" else "plot.line") pch = dot.symbol$pch col = dot.symbol$col cex = dot.symbol$cex font = dot.symbol$font segmnts <- function (x0, y0, x1, y1, ...) grid::grid.segments(x0, y0, x1, y1, default.units = "native", gp = grid::gpar(...)) if(length(yother)) { snames <- colnames(yother) nc <- ncol(yother) yoth <- yother[subscripts,, drop=FALSE] gr <- if(gp) groups[subscripts] else factor(rep('', length(x))) if(length(yText)) { yText <- yText[subscripts,, drop=FALSE] if(length(custom)) { k <- custom(yText) pasted <- k$result llong <- unit(1, 'strwidth', k$longest) } else { pasted <- rep('', length(y)) for(i in 1 : ncol(yText)) { if(i > 1) pasted <- paste(pasted, ' ', sep='') pasted <- paste(pasted, colnames(yText)[i], '=', format(round(yText[, i], digits=digits)), sep='') } llong <- unit(1, 'strwidth', paste(' ', pasted[which.max(nchar(pasted))], sep='')) } xpos <- unit(1, 'npc') - unit(1, 'mm') - length(levels(gr)) * llong } for(i in 1:length(levels(gr))) { j <- which(gr == levels(gr)[i]) if(nc > 1) segmnts(yoth[j, 1], y[j], yoth[j, nc], y[j], lwd=2 * plot.line$lwd[1], lty=plot.line$lty[i], col=plot.line$col[i]) if(nc == 4) segmnts(yoth[j ,2], y[j], yoth[j ,3], y[j], lwd=2*plot.line$lwd[1], lty=plot.line$lty[i], col=plot.line$col[i]) for(k in 1 : nc) { if(length(pch.stats)) { p <- pch.stats[snames[k]] if(!is.na(p)) lpoints(yoth[j, k], y[j], pch=p, cex=cex, col=col[i], font=font) } } ## Show selected statistics just under dot lines if(length(yText)) { xpos <- xpos + llong grid::grid.text(pasted[j], xpos, unit(y[j], 'native') - unit(1.75, 'mm'), just='right', gp=grid::gpar(cex=cex.values, col=col[i])) } } if(gp) panel.superpose(x, y, groups=as.numeric(groups), subscripts=subscripts, pch=pch, col=col, cex=cex, font=font, ...) else panel.dotplot(x, y, subscripts=subscripts, pch=pch, col=col, cex=cex, font=font, ...) } else { if(gp) panel.superpose(x, y, groups=as.numeric(groups), subscripts=subscripts, pch=pch, col=col, cex=cex, font=font, ...) else panel.dotplot(x, y, subscripts=subscripts, pch=pch, col=col, cex=cex, font=font, ...) } } d <- if(!length(groups)) dotplot(form, data=X, panel=pan, strip=strip, par.strip.text=pst, xlab=funlabel, scale=scal, yother=yother,...) else eval(parse(text= sprintf("dotplot(form, groups=%s, data=X, panel=pan, strip=strip, par.strip.text=pst, auto.key=key, xlab=funlabel, scale=scal, yother=yother, ...)", groups) )) } # end ptype 'dot' else { # ptype 'xy.special' xl <- labelPlotmath(xlabels[1], xunits[1]) yl <- if(ylab == '') funlabel else ylab d <- if(!length(groups)) xyplot(form, data=X, panel=Panel, strip=strip, par.strip.text=pst, xlab=xl, ylab=yl, scale=scal, yother=yother, ...) else { panel.groups <- if(paneldoesgroups) NULL else Panel if(! paneldoesgroups) panel <- panel.superpose g <- if(length(panel.groups)) "xyplot(form, groups=%s, data=X, panel=Panel, panel.groups=panel.groups, strip=strip, par.strip.text=pst, auto.key=key, xlab=xl, ylab=yl, scale=scal, yother=yother, ...)" else "xyplot(form, groups=%s, data=X, panel=Panel, strip=strip, par.strip.text=pst, auto.key=key, xlab=xl, ylab=yl, scale=scal, yother=yother, ...)" eval(parse(text=sprintf(g, groups))) } } } if(outerlabels && length(dim(d)) == 2) d <- latticeExtra::useOuterStrips(d, strip=strip, strip.left=strip) d } plotp.summaryS <- function(data, formula=NULL, groups=NULL, sfun=NULL, fitter=NULL, showpts=! length(fitter), funlabel=NULL, digits=5, xlim=NULL, ylim=NULL, shareX=TRUE, shareY=FALSE, autoarrange=TRUE, ...) { xtype <- attr(data, 'xtype') nn <- sum(xtype == 'numeric') if(nn > 1) stop('does not handle more than one numeric continuous x') X <- data at <- attributes(data) Form <- at$formula nX <- at$nX nY <- at$nY ylabels <- at$ylabels yunits <- at$yunits ylims <- at$ylim xnames <- at$xnames xlabels <- at$xlabels xunits <- at$xunits fun <- at$fun funlabel <- if(! length(funlabel) && length(at$funlabel)) at$funlabel else funlabel funlabel <- htmlTranslate(funlabel, greek=TRUE) ly <- length(ylabels) ylab <- ylabels for(i in 1 : length(ylab)) ylab[i] <- labelPlotmath(ylabels[i], yunits[i], html=TRUE) aform <- function(n) as.formula(paste('~', n)) fmt <- function(x) htmlSN(x, digits=digits) ptype <- if(length(fun)) { # used to always be 'dot' if(length(sfun)) 'xy.special' else 'dot' } else 'xy' if(length(sfun)) ptype <- 'xy.special' if(ptype %in% c('xy', 'xy.special') && ! any(xtype == 'numeric')) stop('must have a numeric x variable to make x-y plot') groupslevels <- if(length(groups)) levels(data[[groups]]) condvar <- xnames[xtype == 'categorical'] ## Reorder condvar in descending order of number of levels numu <- function(x) if(is.factor(x)) length(levels(x)) else length(unique(x[! is.na(x)])) if(autoarrange && length(condvar) > 1) { nlev <- sapply(X[condvar], numu) condvar <- condvar[order(nlev)] } form <- if(length(formula)) formula else { ## Non-groups conditioning variables ngcond <- setdiff(condvar, groups) ## Collapsed non-group conditioning variables ccv <- paste('|', paste(c(ngcond, 'yvar'), collapse=' * ')) ## Collapsed non-group cond var after the first ccv1 <- if(length(ngcond) > 1) paste('|', paste(c(ngcond[-1], 'yvar'), collapse=' * ')) f <- if(ptype %in% c('xy', 'xy.special')) paste('y ~', xnames[xtype == 'numeric'], ccv, sep='') else paste(ngcond[1], '~ y', ccv1, sep='') as.formula(f) } yvarlev <- NULL for(v in levels(X$yvar)) { un <- yunits[v] l <- if(ylabels[v] == v && un == '') v else labelPlotmath(ylabels[v], un, html=TRUE) yvarlev <- c(yvarlev, l) } ylev <- levels(X$yvar) lims <- ylims[ylev] vars <- all.vars(form) cond <- vars[- (1 : 2)] if(! all(cond %in% 'yvar') && cond[1] != 'yvar') { ngny <- setdiff(cond, 'yvar') nr <- length(unique(do.call('paste', X[ngny]))) } if(length(ylim)) lims <- ylim gp <- length(groups) ylev <- levels(X$yvar) nyvar <- length(ylev) xn <- setdiff(xnames, groups) if(length(xn) %nin% 1:2) stop(paste('expecting at most two variables, found these:', paste(xn, collapse=', '))) if(length(xn) > 1) { strata <- aform(xn[2]) xn <- xn[1] } else strata <- NULL statnames <- if(is.matrix(X$y)) colnames(X$y) .txt. <- paste0(xn, ': ', fmt(X[[xn]])) if(gp) .txt. <- paste0(.txt., '<br>', X[[groups]]) nstat <- length(statnames) if(nstat == 0) .txt. <- paste0(.txt., '<br>', X$yvar, ': ', fmt(X$y)) else for(i in 1 : nstat) { ## ?? was if(i > 1)'<br>' if(i == 2 && length(funlabel) && funlabel != '' && funlabel != ' ') .txt. <- paste0(.txt., '<br>', funlabel) .txt. <- paste0(.txt., '<br>', statnames[i], ': ', fmt(X$y[, i])) } X$.txt. <- .txt. xlab <- labelPlotmath(xlabels[xn], xunits[xn], html=TRUE) gp <- length(groups) gr <- if(gp) X[[groups]] else factor(rep('', nrow(X))) if(ptype == 'xy') { if(nstat > 0 && ! gp) X <- cbind(X, tracename=statnames[1]) p <- plotlyM(X, x = aform(xn), y = ~y, htext = ~.txt., color = if(gp) aform(groups), multplot = ~yvar, strata = strata, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, fitter=fitter, showpts=showpts, ...) } else { ## end ptype xy ## Dot chart or xy.special ## If > 1 calculated statistics, y is a matrix. ## Save first column as y and save remaining columns as yother y <- X$y yother <- NULL if(is.matrix(y) && ncol(y) > 1) { yother <- y[, -1, drop=FALSE] X$y <- y[, 1] } if(ptype == 'dot') { R <- X R$yhi <- NA if(length(statnames) && ! gp) R <- cbind(R, tracename=statnames[1]) if(length(yother)) { snames <- colnames(yother) nc <- ncol(yother) ## if(nc > 1) yother[,1] yother[,nc] if((all(c('Lower', 'Upper') %in% snames)) && nc < 4) { S <- R S$y <- yother[, 'Lower'] S$yhi <- yother[, 'Upper'] if(! gp) S$tracename <- 'C.L.' R <- rbind(R, S) } if(nc == 4) { S <- R S$y <- yother[, 2] S$yhi <- yother[, 3] if(! gp) S$tracename <- paste(snames[2:3], collapse=' ') R <- rbind(R, S) } # for(k in 1 : nc) { # if(length(pch.stats)) { # p <- pch.stats[snames[k]] # if(!is.na(p)) # R <- rbind(R, data.frame(x=X$y, y=yother[, k], yhi=NA, gr=gr, # tracename=snames[k])) # ## TODO: need to communicate pch=p # } # } } p <- plotlyM(R, x=aform(xn), multplot=~ yvar, color = if(gp) aform(groups), htext = ~ .txt., rotate = TRUE, ylab = ylab, xlim = xlim, ylim=ylim, shareX = shareX, shareY=shareY, ...) } # end ptype 'dot' else { # ptype 'xy.special' xl <- labelPlotmath(xlabels[1], xunits[1], html=TRUE) yl <- if(! length(ylab) || ylab[1] == '') funlabel else ylab p <- sfun(X[[xn]], X$y, groups=if(gp) gr, yother=yother, yvar=X$yvar, maintracename=statnames[1], xlab=xl, ylab=yl, xlim=xlim, ylim=ylim, zeroline=FALSE, ...) } } p } mbarclPanel <- function(x, y, subscripts, groups=NULL, yother, ...) { gp <- length(groups) plot.line <- trellis.par.get(if(gp) "superpose.line" else "plot.line") yother <- yother[subscripts, , drop=FALSE] se <- if('se' %in% colnames(yother)) yother[, 'se'] yother <- yother[, colnames(yother) %nin% c('n', 'se'), drop=FALSE] if(all(c('0.375', '0.625') %in% colnames(yother))) { ## If HD median estimate is not between 0.375 and 0.625 quantiles ## take it to be the closer of the two y375 <- yother[, '0.375'] y625 <- yother[, '0.625'] y <- pmin(y, y625) y <- pmax(y, y375) } yother <- cbind(y=y, yother) gr <- if(gp) groups[subscripts] else factor(rep('', length(x))) lev <- levels(gr) for(i in 1 : length(lev)) { j <- which(gr == levels(gr)[i]) multLines(x[j], yother[j,, drop=FALSE], col=plot.line$col[i], lty=plot.line$lty[i], lwd=plot.line$lwd[i], grid=TRUE, pos=c('left', 'right')[i]) } if(length(lev) == 2 && length(se)) { xu <- sort(unique(x)) j1 <- gr == lev[1] j2 <- gr == lev[2] Y <- matrix(NA, nrow=length(xu), ncol=4, dimnames=list(as.character(xu), c('y1', 'y2', 'se1', 'se2'))) x1 <- as.character(x)[j1] x2 <- as.character(x)[j2] Y[x1, 'y1' ] <- y [j1] Y[x1, 'se1'] <- se[j1] Y[x2, 'y2' ] <- y [j2] Y[x2, 'se2'] <- se[j2] ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2. halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2) col <- adjustcolor('black', alpha.f=0.7) grid::grid.segments(xu, ymid - 0.5 * halfwidthci, xu, ymid + 0.5 * halfwidthci, default.units='native', gp=grid::gpar(col=col, lwd=1.5)) } } medvPanel <- function(x, y, subscripts, groups=NULL, violin=TRUE, quantiles=FALSE, ...) { gp <- length(groups) plot.line <- trellis.par.get(if(gp) "superpose.line" else "plot.line") sym <- trellis.par.get(if(gp) "superpose.symbol" else "plot.symbol") quant <- function(y) { probs <- c(0.05, 0.125, 0.25, 0.375) probs <- sort(c(probs, 1. - probs)) y <- y[! is.na(y)] if(length(y) < 3) { if(quantiles) { w <- c(median(y), rep(NA, 9), length(y)) names(w) <- c('Median', format(probs), 'se', 'n') } else w <- c(Median=median(y), se=NA, n=length(y)) return(w) } w <- if(quantiles) hdquantile(y, probs) m <- hdquantile(y, 0.5, se=TRUE) se <- as.numeric(attr(m, 'se')) c(Median=as.numeric(m), w, se=se, n=length(y)) } denpoly <- function(x, y, col, n=50, pos, ...) { y <- y[! is.na(y)] n <- length(y) if(n < 2) return() den <- density(y, n=n, ...) d <- den$y y <- den$x ## Scale density of 0-3 mm d <- 3 * d / max(d) d <- c(d, d[length(d)]) mm <- grid::convertUnit(unit(d, 'mm'), 'mm', typeFrom='dimension') kol <- if(n < 5 ) adjustcolor(col, alpha.f=0.2) else if(n < 10) adjustcolor(col, alpha.f=0.4) else col grid::grid.polygon(y=unit(c(y, y[1]), 'native'), x=if(pos == 'left') unit(x, 'native') - mm else unit(x, 'native') + mm, gp=grid::gpar(col=FALSE, fill=kol)) } gr <- if(gp) groups[subscripts] else factor(rep('', length(x))) lev <- levels(gr) W <- NULL for(i in 1 : length(lev)) { j <- which(gr == levels(gr)[i]) xj <- x[j] yj <- y[j] w <- summarize(yj, xj, quant, type='matrix', keepcolnames=TRUE) Y <- w$yj xu <- w$xj lpoints(xu, Y[,'Median'], cex=sym$cex[i], pch=sym$pch[i], col=sym$col[i], alpha=sym$alpha[i]) llines(xu, Y[,'Median'], col=plot.line$col[i], lty=plot.line$lty[i], lwd=plot.line$lwd[i], alpha=plot.line$alpha) col <- plot.line$col[i] if(violin) for(xx in sort(unique(xj))) denpoly(xx, yj[xj == xx], col=adjustcolor(plot.line$col[i], alpha.f=0.4), pos=c('left', 'right')[i]) if(quantiles) multLines(xu, Y[, colnames(Y) %nin% c('se', 'n'), drop=FALSE], col=plot.line$col[i], lty=plot.line$lty[i], lwd=plot.line$lwd[i], grid=TRUE, pos=c('left', 'right')[i]) W <- rbind(W, cbind(gr=levels(gr)[i], w)) } if(length(lev) == 2) { x <- W$xj xu <- sort(unique(W$xj)) j1 <- W$gr == lev[1] j2 <- W$gr == lev[2] Y <- matrix(NA, nrow=length(xu), ncol=4, dimnames=list(as.character(xu), c('y1', 'y2', 'se1', 'se2'))) x1 <- as.character(x)[j1] x2 <- as.character(x)[j2] Y[x1, 'y1' ] <- W$yj[j1, 'Median'] Y[x1, 'se1'] <- W$yj[j1, 'se'] Y[x2, 'y2' ] <- W$yj[j2, 'Median'] Y[x2, 'se2'] <- W$yj[j2, 'se'] ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2. halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2) col <- adjustcolor('black', alpha.f=0.7) grid::grid.segments(xu, ymid - 0.5 * halfwidthci, xu, ymid + 0.5 * halfwidthci, default.units='native', gp=grid::gpar(col=col, lwd=1.5)) } } mbarclpl <- function(x, y, groups=NULL, yother, yvar=NULL, maintracename='y', xlim=NULL, ylim=NULL, xname='x', alphaSegments=0.45, ...) { gp <- length(groups) gr <- if(gp) groups else rep('', length(x)) gr <- as.factor(gr) color <- if(gp) ~ .g. lev <- levels(gr) if(! length(yvar)) yvar <- rep('', length(x)) yvar <- as.factor(yvar) se <- if('se' %in% colnames(yother)) yother[, 'se'] ## prn(se, 'mbarclpl', fi='/tmp/z') cy <- colnames(yother) n <- if('n' %in% cy) yother[, 'n'] yother <- yother[, cy %nin% c('n', 'se'), drop=FALSE] if(all(c('0.375', '0.625') %in% cy)) { ## If HD median estimate is not between 0.375 and 0.625 quantiles ## take it to be the closer of the two y375 <- yother[, '0.375'] y625 <- yother[, '0.625'] y <- pmin(y, y625) y <- pmax(y, y375) } fmt <- function(x) htmlSN(x, digits=5) xdel <- 0.01 * diff(range(x, na.rm=TRUE)) xtxt <- paste0(xname, ': ', fmt(x), '<br>') if(length(n)) xtxt <- paste0(xtxt, 'n:', n, '<br>') R <- data.frame(x=x, y=y, yhi=NA, .g.=gr, .yvar.=yvar, tracename = maintracename, connect=TRUE, txt = paste0(xtxt, maintracename, ': ', fmt(y))) p <- ncol(yother) half <- p / 2 x0 <- x for(i in 1 : half) { i1 <- i i2 <- p - i + 1 tn <- paste0(colnames(yother)[i1], ' - ', colnames(yother)[i2]) x0 <- ifelse(gr == lev[1], x - i * xdel, x + i * xdel) txt <- paste0(xtxt, tn, ': [', fmt(yother[, i1]), ', ', fmt(yother[, i2]), ']') r <- data.frame(x=x0, y=yother[, i1], yhi=yother[, i2], .g.=gr, .yvar.=yvar, tracename=tn, connect=NA, txt=txt) R <- rbind(R, r) } if(length(lev) == 2 && length(se)) { rr <- NULL for(yv in levels(yvar)) { k <- yvar == yv xk <- x[k]; yk <- y[k]; sek <- se[k]; grk <- gr[k] xu <- sort(unique(xk)) j1 <- grk == lev[1] j2 <- grk == lev[2] Y <- matrix(NA, nrow=length(xu), ncol=4, dimnames=list(as.character(xu), c('y1', 'y2', 'se1', 'se2'))) x1 <- as.character(xk)[j1] x2 <- as.character(xk)[j2] Y[x1, 'y1' ] <- yk [j1] Y[x1, 'se1'] <- sek[j1] Y[x2, 'y2' ] <- yk [j2] Y[x2, 'se2'] <- sek[j2] ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2. halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2) ydel <- Y[, 'y2'] - Y[, 'y1'] txt <- paste0(xname, ': ', fmt(xu), '<br>\u0394: ', fmt(ydel), '<br>0.95 C.I. for \u0394: [', fmt(ydel - halfwidthci), ', ', fmt(ydel + halfwidthci), ']') r <- data.frame(x = xu, y = ymid - 0.5 * halfwidthci, yhi = ymid + 0.5 * halfwidthci, .g. = paste0(lev[1], ' vs. ', lev[2]), .yvar. = yv, txt=txt, tracename='\u00BD 0.95 C.I. for \u0394', connect=FALSE) ## prn(cbind(yother, 2 * rep(halfwidthci, each=2)), fi='/tmp/z') rr <- rbind(rr, r) } R <- rbind(R, rr) } plotlyM(R, multplot=~.yvar., color=color, htext=~txt, xlim=xlim, ylim=ylim, alphaSegments=alphaSegments, ...) } medvpl <- function(x, y, groups=NULL, yvar=NULL, maintracename='y', xlim=NULL, ylim=NULL, xlab=xname, ylab=NULL, xname='x', zeroline=FALSE, yother=NULL, alphaSegments=0.45, dhistboxp.opts=NULL, ...) { gp <- length(groups) gr <- if(gp) groups else factor(rep('', length(x))) lev <- levels(gr) yvarpres <- length(yvar) if(! length(yvar)) yvar <- rep('', length(x)) if(! length(ylab)) ylab <- structure(unique(yvar), names=yvar) if(! length(names(ylab))) stop('ylab must have names') fmt <- function(x) htmlSN(x, digits=5) R <- NULL for(yv in unique(yvar)) { k <- yvar == yv xk <- x[k] yk <- y[k] gk <- gr[k] r <- do.call('dhistboxp', c(list(yk, group=gk, strata=xk, xlab=ylab[yv]), dhistboxp.opts)) r$strata <- NULL ry <- r$y r$y <- r$x r$x <- ry ryhi <- r$yhi r$yhi <- r$xhi r$xhi <- ryhi r$yvar <- yv R <- rbind(R, r) xku <- unique(xk) if(length(lev) == 2) { for(xa in xku) { Y <- matrix(NA, nrow=length(xku), ncol=4, dimnames=list(as.character(xku), c('y1','y2','se1','se2'))) for(ga in lev) { j <- xk == xa & gk == ga ykj <- yk[j] ykj <- ykj[! is.na(ykj)] if(length(ykj) < 3) { m <- median(ykj) se <- NA } else { m <- hdquantile(ykj, probs=0.5, se=TRUE) se <- attr(m, 'se') } med <- as.vector(m) if(ga == lev[1]) { med1 <- med se1 <- se } else { med2 <- med se2 <- se } } ydel <- med2 - med1 ymid <- (med1 + med2) / 2. halfwidthci <- qnorm(0.975) * sqrt(se1 ^ 2 + se2 ^ 2) txt <- paste0(xname, ': ', fmt(xa), '<br>\u0394: ', fmt(ydel), '<br>0.95 C.I. for \u0394: [', fmt(ydel - halfwidthci), ', ', fmt(ydel + halfwidthci), ']') R <- rbind(R, data.frame(x =xa, y =ymid - 0.5 * halfwidthci, xhi=NA, yhi=ymid + 0.5 * halfwidthci, group=paste0(lev[1], ' vs. ', lev[2]), yvar = yv, txt = txt, type = '', connect=NA)) } } } R$group <- paste(R$group, R$type) R$type <- NULL plotlyM(R, htext=~txt, multplot=~yvar, color=~group, alphaSegments=alphaSegments, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, zeroline=zeroline, ...) } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/binconf.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000005413�12243661443�013330� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������binconf <- function(x, n, alpha = 0.05, method = c("wilson","exact","asymptotic","all"), include.x = FALSE, include.n = FALSE, return.df = FALSE) { ## ..modifications for printing and the addition of a ## method argument and the asymptotic interval ## and to accept vector arguments were ## made by Brad Biggerstaff on 10 June 1999 method <- match.arg(method) bc <- function(x, n, alpha, method) { nu1 <- 2 * (n - x + 1) nu2 <- 2 * x ll <- if(x > 0) x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) else 0 nu1p <- nu2 + 2 nu2p <- nu1 - 2 pp <- if(x < n) qf(1 - alpha/2, nu1p, nu2p) else 1 ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp) zcrit <- - qnorm(alpha/2) z2 <- zcrit * zcrit p <- x/n cl <- (p + z2/2/n + c(-1, 1) * zcrit * sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n) if(x == 1) cl[1] <- - log(1 - alpha)/n if(x == (n - 1)) cl[2] <- 1 + log(1 - alpha)/n asymp.lcl <- x/n - qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n))/n) asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n) )/n) res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl)) res <- cbind(rep(x/n, 3), res) ##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c( ## "Point Estimate", "Lower", "Upper")) switch(method, wilson = res[2, ], exact = res[1, ], asymptotic = res[3, ], all = res, res) } if((length(x) != length(n)) & length(x) == 1) x <- rep(x, length(n)) if((length(x) != length(n)) & length(n) == 1) n <- rep(n, length(x)) if((length(x) > 1 | length(n) > 1) & method == "all") { method <- "wilson" warning("method=all will not work with vectors...setting method to wilson") } if(method == "all" & length(x) == 1 & length(n) == 1) { mat <- bc(x, n, alpha, method) dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"), c("PointEst", "Lower", "Upper")) if(include.n) mat <- cbind(N = n, mat) if(include.x) mat <- cbind(X = x, mat) if(return.df) mat <- as.data.frame(mat) return(mat) } mat <- matrix(ncol = 3, nrow = length(x)) for(i in 1:length(x)) mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method) dimnames(mat) <- list(rep("", dim(mat)[1]), c("PointEst", "Lower", "Upper")) if(include.n) mat <- cbind(N = n, mat) if(include.x) mat <- cbind(X = x, mat) if(return.df) mat <- as.data.frame(mat, row.names=NULL) mat } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/drawPlot.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000025557�12623343364�013522� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������utils::globalVariables("pch.to.use") drawPlot <- function(..., xlim=c(0,1), ylim=c(0,1), xlab='', ylab='', ticks=c('none','x','y','xy'), key=FALSE, opts=NULL) { Points <- function(label=' ', type=c('p','r'), n, pch=pch.to.use[1], cex=par('cex'), col=par('col'), rug=c('none','x','y','xy'), ymean=NULL) { type <- match.arg(type) rug <- match.arg(rug) cat('\nClick mouse for each point', if(label!='') paste(' for group ',label), '.', if(missing(n)) ' Right click when finished (Esc with RStudio).', '\n',sep='') pts <- if(missing(n)) locator(type='p', pch=pch, cex=cex, col=col) else locator(n, type='p', pch=pch, cex=cex, col=col) if(length(ymean)) pts$y <- pts$y - mean(pts$y) + ymean if(type=='p') assign("pch.to.use", pch.to.use[pch.to.use != pch], envir=environment(Points)) else { scat1d(pts$x, side=1) pch <- NA } switch(rug, x = scat1d(pts$x, side=1), y = scat1d(pts$y, side=2), xy = {scat1d(pts$x, side=1); scat1d(pts$y, side=2)}, none = ) structure(list(points=pts, label=label, type=type, pch=pch, cex=cex, col=col, rug=rug), class='Points') } Curve <- function(label=' ', type=c('bezier','polygon','linear','pol','loess','step', 'gauss'), n=NULL, lty=1, lwd=par('lwd'), col=par('col'), degree=2, evaluation=100, ask=FALSE) { isfun <- is.function(type) if(! isfun) type <- match.arg(type) if(! isfun && ! length(n) && type == 'linear') n <- 2 if(! isfun && type=='gauss') n <- 3 xlim <- par('usr')[1 : 2] redraw <- TRUE if(isfun) { x <- seq(xlim[1], xlim[2], length=evaluation) pts <- list(x=as.numeric(x), y=as.numeric(type(x))) lines(pts, lty=lty, lwd=lwd, col=col) } else repeat { cat('\nClick mouse for each point', if(label!='') paste(' for group ',label), '.', if(!length(n)) ' Right click when finished (Esc with RStudio).', '\n', sep='') pts <- if(!length(n)) locator(type='l', lty=lty, lwd=lwd, col=col) else locator(n, type='l', lty=lty, lwd=lwd, col=col) n <- length(pts$x) if(n < 2) stop('must click at least 2 points') if(n==2) type <- 'linear' if(type=='pol') { x <- matrix(NA, nrow=n, ncol=degree) for(i in 1:degree) x[,i] <- pts$x^i f <- lm.fit.qr.bare(x, pts$y) x <- matrix(NA, nrow=evaluation, ncol=degree) x[,1] <- seq(min(pts$x),max(pts$x), length=evaluation) if(degree > 1) for(i in 1:degree) x[,i] <- x[,1]^i cof <- f$coefficients y <- cof[1] + x %*% cof[-1] pts <- list(x=as.numeric(x[,1]), y=as.numeric(y)) if(redraw) lines(pts, lty=lty, lwd=lwd, col=col) } if(type == 'loess') { w <- lowess(pts$x, pts$y, f=.25) pts <- approx(w, xout=seq(min(pts$x), max(pts$x), length=evaluation)) if(redraw) lines(pts, lty=lty, lwd=lwd, col=col) } if(type=='bezier') { pts <- bezier(pts, xlim=range(pts$x), evaluation=evaluation) if(redraw) lines(pts, lty=lty, lwd=lwd, col=col) } if(type=='gauss') { mu <- pts$x[2] delta <- diff(pts$x[-2])/2 htavg <- sum(pts$y[-2])/2 htmax <- pts$y[2] x <- seq(xlim[1], xlim[2], length=evaluation) b2 <- delta^2 / log(htmax/htavg) y <- htmax * exp(-(x-mu)^2/b2) i <- y > 1e-4 pts <- list(x=as.single(x[i]), y=as.single(y[i])) lines(pts, lty=lty, lwd=lwd, col=col) } if(type=='step' && redraw) lines(pts, type='s', lty=lty, lwd=lwd, col=col) if(!ask) break if(readline('\nType y to accept, n to re-draw:') == 'y') break } structure(list(points=pts, label=label, type=type, lty=lty, lwd=lwd, col=col), class='Curve') } Abline <- function(...) { abline(...) structure(list(...), class='Abline') } environment(Points)$pch.to.use <- c(1,2,3,4,16,17,5,6,15,18,19) ticks <- match.arg(ticks) if(missing(ticks)) { if(!missing(xlim)) ticks <- 'x' if(!missing(ylim)) ticks <- 'y' if(!missing(xlim) && !missing(ylim)) ticks <- 'xy' } plot(xlim, ylim, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type='n', axes=ticks=='xy') switch(ticks, none={ axis(1, at=xlim, labels=FALSE) axis(2, at=ylim, labels=FALSE) }, x={ axis(1) axis(2, at=ylim, labels=FALSE) }, y={ axis(1, at=xlim, labels=FALSE) axis(2) }, xy = ) cc <- as.list(substitute(list(...))[-1]) W <- list() for(i in seq_along(cc)) W[[i]] <- eval(cc[[i]]) ## See http://stackoverflow.com/questions/33737102/elegant-way-to-define-a-function-inside-another-function m <- length(W) type <- label <- rep('', m) lty <- lwd <- pch <- cex <- col <- rep(NA, m) curves <- vector('list', m) i <- 0 for(j in 1:m) { w <- W[[j]] if(attr(w,'class')=='Abline') next i <- i + 1 isfun <- is.function(w$type) curves[[i]] <- if(!key || isfun) w$points else switch(w$type, step = approx(w$points, xout=seq(min(w$points$x), max(w$points$x), length=50), method='constant', f=0), linear = approx(w$points, xout=seq(min(w$points$x), max(w$points$x), length=50)), w$points) label[i] <- w$label col[i] <- w$col type[i] <- if(isfun) 'l' else switch(w$type, p = 'p', r = 'r', step= 's', 'l') if(type[i]=='p') { pch[i] <- w$pch cex[i] <- w$cex } else if(type[i] != 'r') { lty[i] <- w$lty lwd[i] <- w$lwd } } if(i < m) { curves <- curves[1:i] label <- label [1:i] type <- type [1:i] lty <- lty [1:i] lwd <- lwd [1:i] pch <- pch [1:i] cex <- cex [1:i] col <- col [1:i] } keyloc <- NULL j <- type != 'r' if(any(j)) { if(!key) labcurve(curves[j], labels=label[j], type=type[j], lty=lty[j], lwd=lwd[j], col.=col[j], opts=opts) else { x <- unlist(lapply(curves, function(z)z$x)) y <- unlist(lapply(curves, function(z)z$y)) keyloc <- putKeyEmpty(x, y, labels=label[j], type=type[j], pch=pch[j], lty=lty[j], lwd=lwd[j], cex=cex[j], col=col[j]) } } invisible(structure(list(W, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, ticks=ticks, key=key, keyloc=keyloc, opts=opts), class='drawPlot')) } bezier <- function(x, y, xlim, evaluation=100) { if(missing(y)) { y <- x[[2]] x <- x[[1]] } n <- length(x) X <- Y <- single(evaluation) Z <- seq(0, 1, length=evaluation) X[1] <- x[1]; X[evaluation] <- x[n] Y[1] <- y[1]; Y[evaluation] <- y[n] for(i in 2:(evaluation - 1)) { z <- Z[i] xz <- yz <- 0 const <- (1 - z) ^ (n - 1) for(j in 0 : (n - 1)) { xz <- xz + const * x[j + 1] yz <- yz + const * y[j + 1] const <- const* (n - 1 - j) / (j + 1) * z / (1 - z) if(is.na(const)) prn(c(i, j, z)) } X[i] <- xz; Y[i] <- yz } list(x=as.numeric(X), y=as.numeric(Y)) } plot.drawPlot <- function(x, xlab, ylab, ticks, key=x$key, keyloc=x$keyloc, ...) { if(missing(xlab)) xlab <- x$xlab if(missing(ylab)) ylab <- x$ylab xlim <- x$xlim ylim <- x$ylim if(missing(ticks)) ticks <- x$ticks plot(xlim, ylim, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type='n', axes=ticks=='xy') switch(ticks, none={ axis(1, at=xlim, labels=FALSE) axis(2, at=ylim, labels=FALSE) }, x={ axis(1) axis(2, at=ylim, labels=FALSE) }, y={ axis(1, at=xlim, labels=FALSE) axis(2) }, xy= ) data <- x[[1]] m <- length(data) type <- label <- rep('', m) lty <- lwd <- pch <- cex <- col <- rep(NA, m) curves <- vector('list', m) i <- 0 for(j in 1 : m) { w <- data[[j]] if(attr(w, 'class') == 'Abline') { do.call("abline", unclass(w)) next } i <- i + 1 if(is.function(w$type)) w$type <- 'l' curves[[i]] <- if(!key) w$points else switch(w$type, step = approx(w$points, xout=seq(min(w$points$x),max(w$points$x), length=50), method='constant', f=0), linear = approx(w$points, xout=seq(min(w$points$x),max(w$points$x), length=50)), w$points) label[i] <- w$label col[i] <- w$col switch(attr(w, 'class'), Points = { type[i] <- w$type pch[i] <- w$pch cex[i] <- w$cex switch(w$type, p = points(w$points, cex=w$cex, pch=w$pch, col=w$col), r = scat1d(w$points$x, side=1, col=w$col)) switch(w$rug, x = scat1d(w$points$x, side=1, col=w$col), y = scat1d(w$points$y, side=2, col=w$col), xy = { scat1d(w$points$x, side=1, col=w$col) scat1d(w$points$y, side=2, col=w$col) }, none = ) }, Curve = { type[i] <- if(w$type=='step') 's' else 'l' lty[i] <- w$lty lwd[i] <- w$lwd lines(w$points, lty=w$lty, lwd=w$lwd, col=col[i], type=type[i]) }) } if(i < m) { curves <- curves[1:i] label <- label[1:i] type <- type[1:i] pch <- pch[1:i] lty <- lty[1:i] lwd <- lwd[1:i] cex <- cex[1:i] col <- col[1:i] } if(key && !length(keyloc)) stop('you may not specify key=T unless key=T was specified to drawPlot or keyloc is specified to plot') if(any(label!='')) { j <- type!='r' if(any(j)) { if(key) putKey(keyloc, labels=label[j], type=type[j], pch=pch[j], lty=lty[j], lwd=lwd[j], cex=cex[j], col=col[j]) else labcurve(curves[j], type=type[j], lty=lty[j], lwd=lwd[j], col.=col[j], labels=label[j], opts=x$opts) } } invisible() } �������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/units.s�������������������������������������������������������������������������������������0000644�0001762�0000144�00000001055�12257361076�013056� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������units <- function(x, ...) UseMethod("units") "units<-.default" <- function(x, value) { attr(x, "units") <- value x } units.default <- function(x, none='', ...) { lab <- attr(x, "units") if(is.null(lab)) lab <- attr(attr(x,'tspar'),'units') if(is.null(lab)) lab <- none lab } units.Surv <- function(x, none='', ...) { at <- attributes(x) un <- at$units ia <- at$inputAttributes if(! length(un) && length(ia)) { un <- ia$time2$units if(! length(un)) un <- ia$time$units } if(! length(un)) un <- none un } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/somers2.s�����������������������������������������������������������������������������������0000644�0001762�0000144�00000006527�13101441117�013277� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������##S function somers2 ## ## Calculates concordance probability and Somers' Dxy rank correlation ## between a variable X (for which ties are counted) and a binary ## variable Y (having values 0 and 1, for which ties are not counted). ## Uses short cut method based on average ranks in two groups. ## ## Usage: ## ## somers2(x, y, weights) ## ## Returns vector whose elements are C Index, Dxy, n and missing, where ## C Index is the concordance probability and Dxy=2(C Index-.5). ## ## F. Harrell 28 Nov 90 6 Apr 98: added weights somers2 <- function(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE) { if(length(y) != length(x)) stop("y must have same length as x") y <- as.integer(y) wtpres <- length(weights) if(wtpres && (wtpres != length(x))) stop('weights must have same length as x') if(na.rm) { miss <- if(wtpres) is.na(x + y + weights) else is.na(x + y) nmiss <- sum(miss) if(nmiss > 0) { miss <- !miss x <- x[miss] y <- y[miss] if(wtpres) weights <- weights[miss] } } else nmiss <- 0 if(any(y %nin% 0:1)) stop('y must be binary') if(wtpres) { if(normwt) weights <- length(x)*weights/sum(weights) n <- sum(weights) } else n <- length(x) if(n < 2) stop("must have >=2 non-missing observations") n1 <- if(wtpres)sum(weights[y==1]) else sum(y==1) if(n1 == 0 || n1 == n) return(c(C=NA, Dxy=NA, n=n, Missing=nmiss)) mean.rank <- if(wtpres) wtd.mean(wtd.rank(x, weights, na.rm=FALSE), weights * y) else mean(rank(x)[y==1]) c.index <- (mean.rank - (n1 + 1) / 2) / (n - n1) dxy <- 2 * (c.index - 0.5) r <- c(c.index, dxy, n, nmiss) names(r) <- c("C", "Dxy", "n", "Missing") r } if(FALSE) rcorrs <- function(x, y, weights=rep(1,length(y)), method=c('exact','bin'), nbin=1000, na.rm=TRUE) { ## Experimental function - probably don't need method <- match.arg(method) if(na.rm) { s <- !is.na(x + unclass(y) + weights) x <- x[s]; y <- y[s]; weights <- weights[s] } n <- length(x) if(missing(method)) method <- if(n < 1000) 'exact' else 'bin' y <- as.factor(y); nly <- length(levels(y)) y <- as.integer(y) if(method == 'bin') { r <- range(x); d <- r[2] - r[1] x <- 1 + trunc((nbin - 1) * (x - r[1]) / d) xy <- y * nbin + x ## Code below is lifted from rowsum() storage.mode(weights) <- "double" temp <- .C('R_rowsum', dd=as.integer(dd), as.double(max(1,weights)*n), x=weights, as.double(xy), PACKAGE='base') new.n <- temp$dd[1] weights <- temp$x[1:new.n] uxy <- unique(xy) x <- uxy %% nbin y <- (uxy - x)/nbin n <- length(x) } list(x=x, y=y, weights=weights) #storage.mode(x) <- "single" #storage.mode(y) <- "single" #storage.mode(event) <- "logical" ## wcidxy doesn't exist yet ## z <- .Fortran(F_wcidxy,as.single(x),as.single(y),as.integer(weights),as.integer(n), # nrel=double(1),nconc=double(1),nuncert=double(1), # c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx)) r <- c(z$c.index,z$gamma,z$sd,n,z$nrel,z$nconc,z$nuncert) names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored", "Relevant Pairs", "Concordant","Uncertain") r } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/upData.s������������������������������������������������������������������������������������0000644�0001762�0000144�00000042172�14252142463�013131� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cleanup.import <- function(obj, labels=NULL, lowernames=FALSE, force.single=TRUE, force.numeric=TRUE, rmnames=TRUE, big=1e20, sasdict, print=prod(dimobj) > 5e5, datevars=NULL, datetimevars=NULL, dateformat='%F', fixdates=c('none','year'), autodate=FALSE, autonum=FALSE, fracnn=0.3, considerNA=NULL, charfactor=FALSE) { fixdates <- match.arg(fixdates) nam <- names(obj) dimobj <- dim(obj) nv <- length(nam) if(!missing(sasdict)) { sasvname <- makeNames(sasdict$NAME) if(any(w <- nam %nin% sasvname)) stop(paste('The following variables are not in sasdict:', paste(nam[w],collapse=' '))) saslabel <- structure(as.character(sasdict$LABEL), names=as.character(sasvname)) labels <- saslabel[nam] names(labels) <- NULL } if(length(labels) && length(labels) != dimobj[2]) stop('length of labels does not match number of variables') if(lowernames) names(obj) <- casefold(nam) if(print) cat(dimobj[2],'variables; Processing variable:') for(i in 1:dimobj[2]) { if(print) cat(i,'') x <- obj[[i]] modif <- FALSE if(length(dim(x))) next if(rmnames) { if(length(attr(x,'names'))) { attr(x,'names') <- NULL modif <- TRUE } else if(length(attr(x,'.Names'))) { attr(x,'.Names') <- NULL modif <- TRUE } } if(length(attr(x,'Csingle'))) { attr(x,'Csingle') <- NULL modif <- TRUE } if(length(c(datevars,datetimevars)) && nam[i] %in% c(datevars,datetimevars) && !all(is.na(x))) { if(!(is.factor(x) || is.character(x))) stop(paste('variable',nam[i], 'must be a factor or character variable for date conversion')) x <- as.character(x) ## trim leading and trailing white space x <- sub('^[[:space:]]+','',sub('[[:space:]]+$','', x)) xt <- NULL if(nam[i] %in% datetimevars) { xt <- gsub('.* ([0-9][0-9]:[0-9][0-9]:[0-9][0-9])','\\1',x) xtnna <- setdiff(xt, c('',' ','00:00:00')) if(!length(xtnna)) xt <- NULL x <- gsub(' [0-9][0-9]:[0-9][0-9]:[0-9][0-9]','',x) } if(fixdates != 'none') { if(dateformat %nin% c('%F','%y-%m-%d','%m/%d/%y','%m/%d/%Y')) stop('fixdates only supported for dateformat %F %y-%m-%d %m/%d/%y %m/%d/%Y') x <- switch(dateformat, '%F' =gsub('^([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})', '20\\1-\\2-\\3',x), '%y-%m-%d'=gsub('^[0-9]{2}([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})', '\\1-\\2-\\3',x), '%m/%d/%y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/[0-9]{2}([0-9]{2})', '\\1/\\2/\\3',x), '%m/%d/%Y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/([0-9]{2})$', '\\1/\\2/20\\3',x) ) } x <- if(length(xt) && requireNamespace("chron", quietly = TRUE)) { cform <- if(dateformat=='%F') 'y-m-d' else gsub('%','',tolower(dateformat)) chron::chron(x, xt, format=c(dates=cform,times='h:m:s')) } else as.Date(x, format=dateformat) modif <- TRUE } if(autodate) { x <- convertPdate(x, fracnn=fracnn, considerNA=considerNA) if(inherits(x, 'Date')) modif <- TRUE } if(autonum && (is.character(x) || is.factor(x))) { xc <- trimws(as.character(x)) bl <- is.na(xc) | xc == '' xn <- suppressWarnings(as.numeric(xc)) ignore <- bl | tolower(xc) %in% tolower(trimws(considerNA)) illegal <- is.na(xn) & ! bl illegal.not.ignored <- xc[illegal & ! ignore] # if(length(illegal) < sum(tolower(xc) %nin% # c('', tolower(considerNA)) & ! is.na(xc)) * fracnn) { if(length(illegal.not.ignored) < sum(! ignore) * fracnn) { labx <- attr(x, 'label') x <- xn label(x) <- labx attr(x, 'special.miss') <- list(codes=xc[illegal], obs=which(illegal)) class(x) <- c(class(x), 'special.miss') modif <- TRUE } } if(length(labels)) { label(x) <- labels[i] modif <- TRUE } if(force.numeric && length(lev <- levels(x))) { if(all.is.numeric(lev)) { labx <- attr(x,'label') x <- as.numeric(as.character(x)) label(x) <- labx modif <- TRUE } } if(storage.mode(x) == 'double') { xu <- unclass(x) j <- is.infinite(xu) | is.nan(xu) | abs(xu) > big if(any(j,na.rm=TRUE)) { x[j] <- NA modif <- TRUE if(print) cat('\n') cat(sum(j,na.rm=TRUE),'infinite values set to NA for variable', nam[i],'\n') } isdate <- testDateTime(x) if(force.single && !isdate) { allna <- all(is.na(x)) if(allna) { storage.mode(x) <- 'integer' modif <- TRUE } if(!allna) { notfractional <- !any(floor(x) != x, na.rm=TRUE) if(max(abs(x),na.rm=TRUE) <= (2^31-1) && notfractional) { storage.mode(x) <- 'integer' modif <- TRUE } } } } if(charfactor && is.character(x)) { if(length(unique(x)) < .5*length(x)) { x <- sub(' +$', '', x) # remove trailing blanks x <- factor(x, exclude=c('', NA)) modif <- TRUE } } if(modif) obj[[i]] <- x NULL } if(print) cat('\n') if(!missing(sasdict)) { sasat <- sasdict[1,] attributes(obj) <- c(attributes(obj), sasds=as.character(sasat$MEMNAME), sasdslabel=as.character(sasat$MEMLABEL)) } obj } upData <- function(object, ..., subset, rename=NULL, drop=NULL, keep=NULL, labels=NULL, units=NULL, levels=NULL, force.single=TRUE, lowernames=FALSE, caplabels=FALSE, moveUnits=FALSE, charfactor=FALSE, print=TRUE, html=FALSE) { if(html) print <- FALSE upfirst <- function(txt) gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", txt, perl=TRUE) if(lowernames) names(object) <- casefold(names(object)) isdt <- inherits(object, 'data.table') no <- names(object) nobs <- nrow(object) out <- paste('Input object size:\t', object.size(object), 'bytes;\t', length(no), 'variables\t', nobs, 'observations\n') if(print) cat(out) if(! missing(subset)) { s <- substitute(subset) r <- eval(s, object, parent.frame()) if(! is.logical(r)) stop('subset must be a logical expression') r <- r & ! is.na(r) object <- object[r, , drop=FALSE] nobs <- sum(r) } rnames <- row.names(object) g <- function(x) c(sm = storage.mode(x), labclass = inherits(x, 'labelled'), labpres = length(la <- attr(x, 'label')) && la != '', lowuc = is.character(x) && length(unique(x)) < length(x) / 2) vinfo <- sapply(object, g) ## Variables with labels but not classed as 'labelled' will not ## keep labels upon subscripting. Examples: variables imported ## using the haven package j <- which(vinfo['labpres', ] == 'TRUE' & vinfo['labclass', ] == 'FALSE') if(length(j)) for(i in j) { x <- object[[i]] class(x) <- c('labelled', class(x)) object[[i]] <- x } ## Much slower: ## for(i in j) class(object[[i]]) <- c('labelled', class(object[[i]])) ## The following is targeted at R workspaces exported from StatTransfer al <- attr(object, 'var.labels') if(length(al)) { if(caplabels) al <- upfirst(al) for(i in 1:length(no)) if(al[i] != '') label(object[[i]]) <- al[i] attr(object, 'var.labels') <- NULL if(missing(force.single)) force.single <- FALSE } else if(caplabels) { for(i in which(vinfo['labpres', ] == 'TRUE')) { x <- object[[i]] if(length(la <- attr(x, 'label'))) { attr(x, 'label') <- upfirst(la) object[[i]] <- x } } } al <- attr(object, 'label.table') if(length(al)) { for(i in 1 : length(no)) { ali <- al[[i]] if(length(ali)) object[[i]] <- factor(object[[i]], unname(ali), names(ali)) } attr(object, 'label.table') <- attr(object, 'val.labels') <- NULL } if(moveUnits) for(i in 1:length(no)) { z <- object[[i]] lab <- olab <- attr(z,'label') if(!length(lab) || length(attr(z, 'units'))) next paren <- length(grep('\\(.*\\)',lab)) brack <- length(grep('\\[.*\\]',lab)) if(paren + brack == 0) next u <- if(paren) regexpr('\\(.*\\)', lab) else regexpr('\\[.*\\]', lab) len <- attr(u,'match.length') un <- substring(lab, u + 1, u + len - 2) lab <- substring(lab, 1, u-1) if(substring(lab, nchar(lab), nchar(lab)) == ' ') lab <- substring(lab, 1, nchar(lab) - 1) out <- c(out, outn <- paste('Label for', no[i], 'changed from', olab, 'to', lab, '\n\tunits set to', un, '\n')) if(print) cat(outn) attr(z,'label') <- lab attr(z,'units') <- un object[[i]] <- z } if(length(rename)) { nr <- names(rename) if(length(nr) == 0 || any(nr == '')) stop('the list or vector specified in rename must specify variable names') for(i in 1 : length(rename)) { if(nr[i] %nin% no) stop(paste('unknown variable name:',nr[i])) out <- c(out, outn <- paste('Renamed variable\t', nr[i], '\tto', rename[[i]], '\n')) if(print) cat(outn) } no[match(nr, no)] <- colnames(vinfo)[match(nr, colnames(vinfo))] <- unlist(rename) names(object) <- no } z <- substitute(list(...)) if(length(z) > 1) { z <- z[-1] vn <- names(z) if(!length(vn) || any(vn == '')) stop('variables must all have names') for(i in 1 : length(z)) { x <- eval(z[[i]], object, parent.frame()) v <- vn[i] if(v %in% no) { out <- c(out, outn <- paste0('Modified variable\t', v, '\n')) if(print) cat(outn) vinfo[, v] <- g(x) } else { out <- c(out, outn <- paste0('Added variable\t\t', v, '\n')) if(print) cat(outn) no <- c(no, v) vinfo <- cbind(vinfo, g(x)) colnames(vinfo)[ncol(vinfo)] <- v } d <- dim(x) lx <- if(length(d)) d[1] else length(x) if(lx != nobs) { if(lx == 1) { warning(paste('length of ',v, ' is 1; will replicate this value.', sep='')) x <- rep(x, length.out=nobs) } else { f <- find(v) if(length(f)) { out <- c(out, outn <- paste('Variable', v, 'found in', paste(f, collapse=' '), '\n')) if(print) cat(outn) } stop(paste('length of ', v, ' (', lx, ')\n', 'does not match number of rows in object (', nobs, ')', sep='')) } } ## If x is factor and is all NA, user probably miscoded. Add ## msg. if(is.factor(x) && all(is.na(x))) warning(paste('Variable ',v,'is a factor with all values NA.\n', 'Check that the second argument to factor() matched the original levels.\n', sep='')) object[[v]] <- x } } if(force.single) { ## sm <- sapply(object, storage.mode) sm <- vinfo['sm', ] ii <- which(sm == 'double') if(length(ii)) for(i in ii) { x <- object[[i]] if(testDateTime(x) || is.matrix(x)) next ## For long vectors don't consider unless the first 500 are NA ## all(is.na(x)) is slow notallna <- any(! is.na(x[1:min(nobs,500)])) notallna <- notallna || any(! is.na(x)) if(! notallna) storage.mode(object[[i]]) <- 'integer' else { ## For long vectors don't consider unless the first 500 ## are integer if(nobs < 500 || ! any(floor(x[1:500]) != x[1:500], na.rm=TRUE)) { notfractional <- ! any(floor(x) != x, na.rm=TRUE) if(notfractional && max(abs(x), na.rm=TRUE) <= (2 ^ 31 - 1)) { storage.mode(x) <- 'integer' object[[i]] <- x } } } } } if(charfactor) { mfact <- as.logical(vinfo['lowuc', ]) if(any(mfact)) for(i in which(mfact)) { x <- sub(' +$', '', object[[i]]) # remove trailing blanks object[[i]] <- factor(x, exclude=c('', NA)) } } if(length(drop) && length(keep)) stop('cannot specify both drop and keep') if(length(drop)) { if(length(drop) == 1) { out <- c(out, outn <- paste0('Dropped variable\t',drop,'\n')) if(print) cat(outn) } else { out <- c(out, outn <- paste0('Dropped variables\t', paste(drop, collapse=','), '\n')) if(print) cat(outn) } s <- drop %nin% no if(any(s)) warning(paste('The following variables in drop= are not in object:', paste(drop[s], collapse=' '))) no <- no[no %nin% drop] object <- if(isdt) object[, ..no] else object[no] } if(length(keep)) { if(length(keep) == 1) { out <- c(out, outn <- paste0('Kept variable\t', keep, '\n')) if(print) cat(outn) } else { out <- c(out, outn <- paste0('Kept variables\t', paste(keep, collapse=','), '\n')) if(print) cat(outn) } s <- keep %nin% no if(any(s)) warning(paste('The following variables in keep= are not in object:', paste(keep[s], collapse=' '))) no <- no[no %in% keep] object <- if(isdt) object[, ..no] else object[no] } if(length(levels)) { if(!is.list(levels)) stop('levels must be a list') nl <- names(levels) s <- nl %nin% no if(any(s)) { warning(paste('The following variables in levels= are not in object:', paste(nl[s], collapse=' '))) nl <- nl[! s] } for(n in nl) { x <- object[[n]] if(! is.factor(x)) x <- as.factor(x) levels(x) <- levels[[n]] object[[n]] <- x ## levels[[nn]] will usually be a list; S+ invokes merge.levels } } if(length(labels)) { nl <- names(labels) if(!length(nl)) stop('elements of labels were unnamed') s <- nl %nin% no if(any(s)) { warning(paste('The following variables in labels= are not in object:', paste(nl[s], collapse=' '))) nl <- nl[!s] } for(n in nl) { x <- object[[n]] label(x) <- labels[[n]] object[[n]] <- x } } if(length(units)) { nu <- names(units) s <- nu %nin% no if(any(s)) { warning(paste('The following variables in units= are not in object:', paste(nu[s], collapse=' '))) nu <- nu[!s] } for(n in nu) attr(object[[n]], 'units') <- units[[n]] } out <- c(out, outn <- paste0('New object size:\t', object.size(object), ' bytes;\t', length(no), ' variables\t', nobs, ' observations\n')) if(print) cat(outn) if(html) { cat('<pre style="font-size:60%;">\n') cat(out) cat('</pre>\n') } object } dataframeReduce <- function(data, fracmiss=1, maxlevels=NULL, minprev=0, print=TRUE) { g <- function(x, fracmiss, maxlevels, minprev) { if(is.matrix(x)) { f <- mean(is.na(x %*% rep(1, ncol(x)))) return(if(f > fracmiss) paste('fraction missing>',fracmiss,sep='') else '') } h <- function(a, b) if(a == '') b else if(b == '') a else paste(a, b, sep=';') f <- mean(is.na(x)) x <- x[!is.na(x)] n <- length(x) r <- if(f > fracmiss) paste('fraction missing>', fracmiss,sep='') else '' if(is.character(x)) x <- factor(x) if(length(maxlevels) && is.factor(x) && length(levels(x)) > maxlevels) return(h(r, paste('categories>',maxlevels,sep=''))) s <- '' if(is.factor(x) || length(unique(x))==2) { tab <- table(x) if((min(tab) / max(n, 1L)) < minprev) { if(is.factor(x)) { x <- combine.levels(x, minlev=minprev) s <- 'grouped categories' if(length(levels(x)) < 2) s <- paste('prevalence<', minprev, sep='') } else s <- paste('prevalence<', minprev, sep='') } } h(r, s) } h <- sapply(data, g, fracmiss, maxlevels, minprev) if(all(h == '')) return(data) info <- data.frame(Variable=names(data)[h != ''], Reason=h[h != ''], row.names=NULL, check.names=FALSE) if(print) { cat('\nVariables Removed or Modified\n\n') print(info) cat('\n') } s <- h == 'grouped categories' if(any(s)) for(i in which(s)) data[[i]] <- combine.levels(data[[i]], minlev=minprev) if(any(h != '' & ! s)) data <- data[h == '' | s] attr(data, 'info') <- info data } utils::globalVariables('..no') ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hmisc/R/describe.s����������������������������������������������������������������������������������0000644�0001762�0000144�00000114664�14370546652�013512� 0����������������������������������������������������������������������������������������������������ustar �ligges��������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������describe <- function(x, ...) UseMethod("describe") describe.default <- function(x, descript, ...) { if(missing(descript)) { descript <- deparse(substitute(x)) } if(is.matrix(x)) { describe.matrix(x, descript, ...) } else { describe.vector(x, descript, ...) } } describe.vector <- function(x, descript, exclude.missing=TRUE, digits=4, listunique=0, listnchar=12, weights=NULL, normwt=FALSE, minlength=NULL, ...) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) weighted <- length(weights) > 0 if(! weighted) weights <- rep(1, length(x)) special.codes <- attr(x, "special.miss")$codes labx <- attr(x,"label") if(missing(descript)) descript <- as.character(sys.call())[2] if(length(labx) && labx != descript) descript <- paste(descript,":",labx) un <- attr(x, "units") if(length(un) && un == '') un <- NULL fmt <- attr(x, 'format') if(length(fmt) && (is.function(fmt) || fmt == '')) fmt <- NULL if(length(fmt) > 1) fmt <- paste(as.character(fmt[[1]]), as.character(fmt[[2]])) present <- if(all(is.na(x))) rep(FALSE, length(x)) else if(is.character(x)) x != "" & x != " " & ! is.na(x) else ! is.na(x) present <- present & ! is.na(weights) if(length(weights) != length(x)) stop('length of weights must equal length of x') if(normwt) { weights <- sum(present) * weights / sum(weights[present]) n <- sum(present) } else n <- sum(weights[present]) if(exclude.missing && n==0) return(structure(list(), class="describe")) missing <- sum(weights[! present], na.rm=TRUE) atx <- attributes(x) atx$names <- atx$dimnames <- atx$dim <- atx$special.miss <- NULL atx$class <- atx$class[atx$class != 'special.miss'] isdot <- testDateTime(x,'either') # is date or time var isdat <- testDateTime(x,'both') # is date and time combo var x <- x[present, drop=FALSE] x.unique <- sort(unique(x)) weights <- weights[present] n.unique <- length(x.unique) attributes(x) <- attributes(x.unique) <- atx isnum <- (is.numeric(x) || isdot) && ! is.factor(x) # was isdat timeUsed <- isdat && testDateTime(x.unique, 'timeVaries') z <- list(descript=descript, units=un, format=fmt) counts <- c(n,missing) lab <- c("n","missing") if(length(special.codes)) { tabsc <- table(special.codes) counts <- c(counts, tabsc) lab <- c(lab, names(tabsc)) } if(length(atx$imputed)) { counts <- c(counts, length(atx$imputed)) lab <- c(lab, "imputed") } if(length(pd <- atx$partial.date)) { if((nn <- length(pd$month)) > 0) { counts <- c(counts, nn) lab <- c(lab, "missing month") } if((nn <- length(pd$day)) > 0) { counts <- c(counts, nn) lab <- c(lab,"missing day") } if((nn <- length(pd$both)) > 0) { counts <- c(counts, nn) lab <- c(lab,"missing month,day") } } if(length(atx$substi.source)) { tabss <- table(atx$substi.source) counts <- c(counts, tabss) lab <- c(lab, names(tabss)) } counts <- c(counts, n.unique) lab <- c(lab, "distinct") if(isnum) { xnum <- unclass(x) if(n.unique < 2) reff <- 0 else { fp <- wtd.table(xnum, weights, normwt=FALSE, na.rm=FALSE, type='table') / sum(weights) reff <- (1 - sum(fp ^ 3)) / (1 - 1 / n / n) } counts <- c(counts, round(reff, 3)) lab <- c(lab, 'Info') } x.binary <- n.unique == 2 && isnum && x.unique[1] == 0 && x.unique[2] == 1 if(x.binary) { counts <- c(counts, sum(weights[x == 1])) lab <- c(lab, "Sum") } if(isnum) { if(isdot) { dd <- sum(weights * xnum) / sum(weights) fval <- formatDateTime(dd, atx, ! timeUsed) counts <- c(counts, fval) } else counts <- c(counts, format(sum(weights * x) / sum(weights), ...)) lab <- c(lab, "Mean") if(! weighted) { gmd <- format(GiniMd(xnum), ...) counts <- c(counts, gmd) # counts <- c(counts, if(isdot) formatDateTime(gmd, atx, ! timeUsed) # else # format(gmd, ...)) lab <- c(lab, "Gmd") } } else if(n.unique == 1) { counts <- c(counts, format(x.unique)) lab <- c(lab, "value") } if(n.unique >= 10 & isnum) { q <- if(any(weights != 1)) { wtd.quantile(xnum, weights, normwt=FALSE, na.rm=FALSE, probs=c(.05,.1,.25,.5,.75,.90,.95)) } else quantile(xnum,c(.05,.1,.25,.5,.75,.90,.95), na.rm=FALSE) ## Only reason to call quantile is that the two functions can give ## different results if there are ties, and users are used to quantile() fval <- if(isdot) formatDateTime(q, atx, ! timeUsed) else format(q,...) counts <- c(counts, fval) lab <- c(lab,".05",".10",".25",".50",".75",".90",".95") } names(counts) <- lab z$counts <- counts tableIgnoreCaseWhiteSpace <- function(x) { x <- gsub('\r',' ',x) x <- gsub('^[[:space:]]+','',gsub('[[:space:]]+$','', x)) x <- gsub('[[:space:]]+',' ', x) y <- tolower(x) f <- table(y) names(f) <- x[match(names(f), y)] f } values <- NULL if(! x.binary) { if(inherits(x,'mChoice')) z$mChoice <- summary(x, minlength=minlength) else if(n.unique <= listunique && ! isnum && ! is.factor(x) && max(nchar(x)) > listnchar) values <- tableIgnoreCaseWhiteSpace(x) else if(isnum || n.unique <= 100) { if(isnum) { if(n.unique >= 100 || (n.unique > 20 && min(diff(sort(unique(xnum)))) < diff(range(xnum)) / 500)) { pret <- pretty(xnum, if(n.unique >= 100) 100 else 500) dist <- pret[2] - pret[1] r <- range(pret) xnum <- r[1] + dist * round((xnum - r[1]) / dist) z$roundedTo <- dist } } # values <- wtd.table(if(isnum) xnum else if(isdat) format(x) else x, # weights, normwt=FALSE, na.rm=FALSE) # values <- wtd.table(if(isdot) format(x) else if(isnum) xnum else x, # weights, normwt=FALSE, na.rm=FALSE) values <- wtd.table(if(isnum) xnum else x, weights, normwt=FALSE, na.rm=FALSE) vx <- values$x cx <- intersect(atx$class, c("Date", "POSIXt", "POSIXct", "dates", "times", "chron")) class(vx) <- cx # restores as date, time, etc. values <- list(value=vx, frequency=unname(values$sum.of.weights)) } z$values <- values if(n.unique >= 5) { loandhi <- x.unique[c(1 : 5, (n.unique - 4) : n.unique)] extremes <- if(isdot && all(class(loandhi) %nin% 'timeDate')) { formatDateTime(unclass(loandhi), at=atx, roundDay=! timeUsed) } else if(isnum) loandhi else format(format(loandhi), ...) names(extremes) <- c("L1","L2","L3","L4","L5","H5","H4","H3","H2","H1") z$extremes <- extremes } } structure(z, class="describe") } describe.matrix <- function(x, descript, exclude.missing=TRUE, digits=4, ...) { if(missing(descript)) descript <- as.character(sys.call())[2] nam <- dimnames(x)[[2]] if(length(nam)==0) stop('matrix does not have column names') Z <- vector('list', length(nam)) names(Z) <- nam d <- dim(x) missing.vars <- NULL for(i in 1:ncol(x)) { z <- describe.vector(x[,i],nam[i],exclude.missing=exclude.missing, digits=digits,...) #13Mar99 Z[[i]] <- z if(exclude.missing && length(z)==0) missing.vars <- c(missing.vars,nam[i]) } attr(Z, 'descript') <- descript attr(Z, 'dimensions') <- d attr(Z, 'missing.vars') <- missing.vars structure(Z, class="describe") } describe.data.frame <- function(x, descript, exclude.missing=TRUE, digits=4, ...) { if(missing(descript)) descript <- as.character(sys.call())[2] nam <- names(x) Z <- list() nams <- character(0) i <- 0 missing.vars <- NULL for(xx in x) { mat <- is.matrix(xx) i <- i+1 z <- if(mat) describe.matrix(xx,nam[i],exclude.missing=exclude.missing, digits=digits,...) else describe.vector(xx,nam[i],exclude.missing=exclude.missing, digits=digits,...) all.missing <- length(z)==0 if(exclude.missing && all.missing) missing.vars <- c(missing.vars, nam[i]) else { Z <- c(Z, if(mat) z else list(z)) nams <- c(nams, if(mat) names(z) else nam[i]) } } names(Z) <- nams attr(Z, 'descript') <- descript attr(Z, 'dimensions') <- dim(x) attr(Z, 'missing.vars') <- missing.vars structure(Z, class="describe") } describe.formula <- function(x, descript, data, subset, na.action, digits=4, weights, ...) { mf <- match.call(expand.dots=FALSE) mf$formula <- x mf$x <- mf$descript <- mf$file <- mf$append <- mf$... <- mf$digits <- NULL if(missing(na.action)) mf$na.action <- na.retain mf[[1]] <- as.name("model.frame") mf <- eval(mf, sys.parent()) weights <- model.extract(mf, weights) if(missing(descript)) { ter <- attr(mf,"terms") d <- as.character(x) if(attr(ter,"response")==1) d <- c(d[2],d[1],d[-(1:2)]) else d <- d[-1] d <- paste(d, collapse=" ") descript <- d } Z <- describe.data.frame(mf, descript, digits=digits, weights=weights, ...) if(length(z <- attr(mf,"na.action"))) attr(Z,'naprint') <- naprint(z) Z } na.retain <- function(d) d print.describe <- function(x, ...) { if(prType() == 'html') return(html.describe(x, ...)) at <- attributes(x) if(length(at$dimensions)) { cat(at$descript,'\n\n',at$dimensions[2],' Variables ',at$dimensions[1], ' Observations\n') if(length(at$naprint)) cat('\n',at$naprint,'\n') w <- paste(rep('-', .Options$width), collapse='') cat(w, '\n', sep='') for(z in x) { if(length(z)==0) next print.describe.single(z, ...) cat(w, '\n', sep='') } if(length(at$missing.vars)) { cat('\nVariables with all observations missing:\n\n') print(at$missing.vars, quote=FALSE) } } else print.describe.single(x, ...) invisible() } ## Function to format part of describe.single output after description & counts ## verb=1 means verbatim mode open formatdescribeSingle <- function(x, condense=c('extremes', 'frequencies', 'both', 'none'), lang=c('plain', 'latex', 'html'), verb=0, lspace=c(0, 0), size=85, ...) { condense <- match.arg(condense) lang <- match.arg(lang) wide <- .Options$width specs <- markupSpecs[[lang]] bv <- function() { if(lang == 'latex' && ! verb) '\\begin{verbatim}' else character() } vs <- if(lang == 'latex' && lspace[2] != 0) function() cat('\\vspace{', -lspace[2], 'ex}\n', sep='') else function() {} vbtm <- if(lang == 'html') function(x, omit1b=FALSE, ...) htmlVerbatim(x, size=size, omit1b=omit1b, ...) else function(x, omit1b=NULL) capture.output(print(x, quote=FALSE, ...)) R <- character(0) v <- x$values is.standard <- length(v) && is.list(v) && all(names(v) == c('value', 'frequency')) v_len <- if(is.standard) length(v$value) if(is.standard && v_len > 0L && v_len <= 20L) { # address GH issue #104 altv <- format(v$value) altv[is.na(altv)] <- 'NA' # check total width print.freq <- sum(nchar(altv)) <= 200 } else print.freq <- FALSE print.ext <- length(x$extremes) ## && ! print.freq if(print.ext) { val <- format(x$extremes) w <- nchar(paste(val, collapse=' ')) R <- c(R, bv()); verb <- 1 if(condense %in% c('extremes', 'both')) { if(lang == 'html') { fsize <- specs$size mnb <- function(x) specs$color(x, col='MidnightBlue') spc <- specs$space blo <- paste0(mnb('lowest'), spc, ':') bhi <- paste0(mnb('highest'), ':') if(w + 2 <= wide) { low <- paste(blo, paste(val[1: 5], collapse=' ')) hi <- paste(bhi, paste(val[6:10], collapse=' ')) R <- c(R, fsize(paste(low, ', ', hi), pct=size)) } else { low <- data.frame(name=blo, e1=val[1], e2=val[2], e3=val[3], e4=val[4], e5=val[5]) hi <- data.frame(name=bhi, e1=val[6], e2=val[7], e3=val[8], e4=val[9], e5=val[10]) tab <- html(rbind(low, hi, make.row.names=FALSE), align='r', header=NULL, border=0, size=size, file=FALSE) R <- c(R, tab) } } # end lang='html' else { # lang='plain' or 'latex' low <- paste('lowest :', paste(val[1: 5], collapse=' ')) hi <- paste('highest:', paste(val[6:10], collapse=' ')) R <- c(R, if(w + 2 <= wide) c('', paste0(low, ', ', hi)) else c('', low, hi) ) } } # end condense applicable to extremes else R <- c(R, if(lang != 'html') '', vbtm(val)) } if(print.freq) { R <- c(R, bv()); verb <- 1 val <- v$value freq <- v$frequency prop <- round(freq / sum(freq), 3) ## First try table output, if will fit in no more than 2 sets of 4 lines condensed <- TRUE if(condense %nin% c('frequencies', 'both')) { fval <- if(is.numeric(val)) format(val) else format(val, justify='right') ffreq <- format(freq) fprop <- format(prop) lval <- nchar(fval[1]) lfreq <- nchar(ffreq[1]) lprop <- nchar(fprop[1]) m <- max(lval, lfreq, lprop) ## Right justify entries in each row bl <- ' ' fval <- paste0(substring(bl, 1, m - lval ), fval) ffreq <- paste0(substring(bl, 1, m - lfreq), ffreq) fprop <- paste0(substring(bl, 1, m - lprop), fprop) w <- rbind(Value=fval, Frequency=ffreq, Proportion=fprop) colnames(w) <- rep('', ncol(w)) out <- capture.output(print(w, quote=FALSE)) if(length(out) <= 8) { R <- c(R, vbtm(w, omit1b=TRUE)) condensed <- FALSE } } # end condense frequencies (or both) if(condensed) { fval <- as.character(val) ffreq <- as.character(freq) fprop <- format(prop) lval <- nchar(fval[1]) lfreq <- nchar(ffreq[1]) lprop <- nchar(fprop[1]) w <- paste0(fval, ' (', ffreq, ', ', fprop, ')') w <- strwrap(paste(w, collapse=', '), width=wide) R <- c(R, '', w) } if(length(x$roundedTo)) R <- c(R, '', paste('For the frequency table, variable is rounded to the nearest', format(x$roundedTo, scientific=3))) } else if(length(v) && ! is.standard) R <- c(R, '', vbtm(v)) if(length(x$mChoice)) { R <- c(R, bv()); verb <- 1 R <- c(R, '', vbtm(x$mChoice, prlabel=FALSE)) } if(lang == 'latex' && verb) R <- c(R, '\\end{verbatim}') R } print.describe.single <- function(x, ...) { wide <- .Options$width des <- x$descript if(length(x$units)) des <- paste0(des, ' [', x$units, ']') if(length(x$format)) des <- paste0(des, ' Format:', x$format) cat(des,'\n') print(x$counts, quote=FALSE) R <- formatdescribeSingle(x, lang='plain', ...) cat(R, sep='\n') invisible() } '[.describe' <- function(object, i, ...) { at <- attributes(object) object <- '['(unclass(object),i) structure(object, descript=at$descript, dimensions=c(at$dimensions[1], length(object)), class='describe') } latex.describe <- function(object, title=NULL, file=paste('describe', first.word(expr=attr(object, 'descript')), 'tex', sep='.'), append=FALSE, size='small', tabular=TRUE, greek=TRUE, spacing=0.7, lspace=c(0,0), ...) { at <- attributes(object) ct <- function(..., file, append=FALSE) { if(file=='') cat(...) else cat(..., file=file, append=append) invisible() } spc <- if(spacing == 0) '' else paste0('\\begin{spacing}{', spacing, '}\n') ct(spc, file=file, append=append) if(length(at$dimensions)) { ct('\\begin{center}\\textbf{', latexTranslate(at$descript), '\\\\', at$dimensions[2],'Variables~~~~~',at$dimensions[1], '~Observations}\\end{center}\n', file=file, append=TRUE) if(length(at$naprint)) ct(at$naprint,'\\\\\n', file=file, append=TRUE) ct('\\smallskip\\hrule\\smallskip{\\',size,'\n', sep='', file=file, append=TRUE) vnames <- at$names i <- 0 for(z in object) { i <- i + 1 if(length(z)==0) next val <- z$values potentiallyLong <- length(val) && ! is.matrix(val) && length(val) != 10 || ! all(names(val)== c("L1","L2","L3","L4","L5","H5","H4","H3","H2","H1")) dovbox <- TRUE # was ! potentiallyLong if(dovbox) cat('\\vbox{', file=file, append=TRUE) latex.describe.single(z, vname=vnames[i], file=file, append=TRUE, tabular=tabular, greek=greek, lspace=lspace, ...) ct('\\smallskip\\hrule\\smallskip\n', file=file, append=TRUE) if(dovbox) cat('}\n', file=file, append=TRUE) } if(length(mv <- at$missing.vars)) { ct('\\smallskip\\noindent Variables with all observations missing:\\ \\smallskip\n', file=file, append=TRUE) mv <- latexTranslate(mv) mv <- paste0('\\texttt{',mv,'}') mv <- paste(mv, collapse=', ') ct(mv, file=file, append=TRUE) } spc <- if(spacing == 0) '}\n' else '}\\end{spacing}\n' ct(spc, file=file, append=TRUE) } else { val <- object$values potentiallyLong <- length(val) && ! is.matrix(val) && length(val) != 10 || ! all(names(val)== c("L1","L2","L3","L4","L5","H5","H4","H3","H2","H1")) dovbox <- TRUE # was ! potentiallyLong if(dovbox) cat('\\vbox{', file=file, append=TRUE) latex.describe.single(object, vname=first.word(expr=at$descript), file=file, append=TRUE, size=size, tabular=tabular, lspace=lspace, ...) if(dovbox) cat('}\n', file=file, append=TRUE) spc <- if(spacing == 0) '\n' else '\\end{spacing}\n' ct(spc, file=file, append=TRUE) } structure(list(file=file, style=c('setspace','relsize')), class='latex') } latex.describe.single <- function(object, title=NULL, vname, file, append=FALSE, size='small', tabular=TRUE, greek=TRUE, lspace=c(0,0), ...) { ct <- function(..., file, append=FALSE) { if(file=='') cat(...) else cat(..., file=file, append=append) invisible() } oldw <- options('width') options(width=if(size == 'small') 95 else 85) on.exit(options(oldw)) wide <- switch(size, normalsize = 73, # was 66 small = 95, # was 73 scriptsize =110, # was 93 73) Values <- object$values ## Put graph on its own line if length of label > 3.5 inches ## For normalsize there are 66 characters per 4.8 in. standard width z <- latexTranslate(object$descript, '&', '\\&', greek=greek) ## If any math mode ($ not preceeded by \) don't put label part in bold des <- if(! length(grep('[^\\]\\$', z))) paste0('\\textbf{', z, '}') else { ## Get text before : (variable name) sp <- strsplit(z, ' : ')[[1]] vnm <- sp[1] rem <- paste(sp[-1], collapse=':') paste0('\\textbf{', vnm, '}: ', rem) } if(length(object$units)) des <- paste0(des, '{\\smaller[1] [', latexTranslate(object$units),']}') if(length(object$format)) des <- paste0(des, '{\\smaller~~Format:', latexTranslate(object$format), '}') desbas <- paste(object$descript, if(length(object$units)) paste0(' [', object$units, ']'), if(length(object$format)) paste0(' Format:', object$format)) ct('\\noindent', des, sep='', file=file, append=append) lco <- if(length(Values)) length(Values$frequency) else 0 if(lco > 2) { counts <- Values$frequency maxcounts <- max(counts) ## Scale distinct values to range from 1 : lco va <- Values$value if(! is.numeric(va)) va <- 1 : lco else { rang <- range(va) va <- 1 + (lco - 1) * (va - rang[1]) / diff(rang) } ## \mbox{~~~} makes \hfill work ct(if(nchar(desbas)/(wide / 4.8) > (4.8 - 1.5))' \\\\ \\mbox{~~~} \n', '\\setlength{\\unitlength}{0.001in}\\hfill', '\\begin{picture}(1.5,.1)(1500,0)', '\\linethickness{0.6pt}\n', sep='', file=file, append=TRUE) ## Todo: may need to label limits used since are pretty()'d versions for(i in 1 : lco) { ct('\\put(', round(1000 * (va[i] - 1) * 1.5 / lco),',0){\\line(0,1){', max(1, round(1000 * counts[i] / maxcounts * .1)), '}}\n', sep='', file=file, append=TRUE) } ct('\\end{picture}\n', file=file, append=TRUE) } else ct('\n', file=file, append=TRUE) sz <- '' if(tabular) { ml <- nchar(paste(object$counts, collapse=' ')) if(ml > 90) tabular <- FALSE else if(ml > 80) sz <- '[2]' } ct('\n{\\smaller', sz, '\n', sep='', file=file, append=TRUE) if(tabular) { if(lspace[1] != 0) ct('\\vspace{', -lspace[1], 'ex}\n', sep='', file=file, append=TRUE) ct('\\begin{tabular}{', paste(rep('r',length(object$counts)),collapse=''),'}\n', file=file, append=TRUE) ct(paste(latexTranslate(names(object$counts)), collapse='&'), '\\\\\n', file=file, append=TRUE) ct(paste(latexTranslate(object$counts), collapse='&'), '\\end{tabular}\n', file=file, append=TRUE) } vs <- if(lspace[2] != 0) function() ct('\\vspace{', -lspace[2], 'ex}\n', sep='', file=file, append=TRUE) else function() {} if(file != '') sink(file, append=TRUE) verb <- 0 if(! tabular) { vs() cat('\\begin{verbatim}\n'); verb <- 1 print(object$counts, quote=FALSE) } R <- formatdescribeSingle(object, lang='latex', verb=verb, lspace=lspace, ...) cat(R, sep='\n') cat('}\n') ## ends \smaller if(file != '') sink() invisible() } html.describe <- function(object, size=85, tabular=TRUE, greek=TRUE, scroll=FALSE, rows=25, cols=100, ...) { at <- attributes(object) m <- markupSpecs$html center <- m$center bold <- m$bold code <- m$code br <- m$br lspace <- m$lspace sskip <- m$smallskip hrule <- m$hrulethin fsize <- m$size mnb <- function(x) m$color(x, col='MidnightBlue') R <- c(m$unicode, m$style()) ## define thinhr (and others not needed here) R <- c(R, paste0('<title>', at$descript, ' Descriptives')) if(length(at$dimensions)) { R <- c(R, mnb(center(bold(paste(htmlTranslate(at$descript), sskip, at$dimensions[2], ' Variables', lspace, at$dimensions[1],' Observations'))))) if(length(at$naprint)) R <- c(R, '', at$naprint) R <- c(R, hrule) vnames <- at$names i <- 0 for(z in object) { i <- i + 1 if(! length(z)) next r <- html.describe.single(z, ## vname=vnames[i], tabular=tabular, greek=greek, size=size, ...) R <- c(R, r, hrule) } if(length(mv <- at$missing.vars)) { R <- c(R, sskip, 'Variables with all observations missing:', br, sskip) mv <- paste(code(htmlTranslate(mv)), collapse=', ') R <- c(R, mv) } if(scroll) R <- m$scroll(R, size=size, rows=rows, cols=cols, name=at$descript) } else R <- c(R, html.describe.single(object, tabular=tabular, greek=greek, size=size, ...)) rendHTML(R) } html.describe.single <- function(object, size=85, tabular=TRUE, greek=TRUE, ...) { m <- markupSpecs$html center <- m$center bold <- m$bold code <- m$code br <- m$br lspace <- m$lspace sskip <- m$smallskip fsize <- m$size smaller<- m$smaller pngfile <- paste(tempdir(), 'needle1234567890a.png', sep='/') oldw <- options('width') options(width=if(size < 90) 95 else 85) on.exit(options(oldw)) wide <- if(size >= 90) 73 else if(size >= 75) 95 else 110 z <- htmlTranslate(object$descript, greek=greek) des <- if(! length(grep(':', z))) bold(z) else { ## Get text before : (variable name) sp <- strsplit(z, ' : ')[[1]] vnm <- sp[1] rem <- paste(sp[-1], collapse=':') paste0(bold(vnm), ': ', rem) } if(length(object$units)) des <- m$varlabel(des, htmlTranslate(object$units)) if(length(object$format)) des <- paste0(des, lspace, smaller(paste0('Format:', htmlTranslate(object$format)))) Values <- object$values lco <- if(length(Values)) length(Values$frequency) else 0 if(lco > 2) { counts <- Values$frequency maxcounts <- max(counts) counts <- counts / maxcounts ## Scale distinct values to range from 1 : lco va <- Values$value if(! is.numeric(va)) va <- 1 : lco else { rang <- range(va) va <- 1 + (lco - 1) * (va - rang[1]) / diff(rang) } w <- if(lco >= 50) 150 / lco else 3 des <- paste0(des, m$rightAlign(tobase64image(pngNeedle(counts, x=va, w=w, h=13, lwd=2, file=pngfile)))) } R <- des sz <- size if(tabular) { ml <- nchar(paste(object$counts, collapse=' ')) if(ml > 90) tabular <- FALSE else if(ml > 80) sz <- round(0.875 * size) } if(tabular) { d <- as.data.frame(as.list(object$counts)) colnames(d) <- names(object$counts) tab <- html(d, file=FALSE, align='c', align.header='c', bold.header=FALSE, col.header='MidnightBlue', border=0, translate=TRUE, size=sz) R <- c(R, tab) } else R <- c(R, htmlVerbatim(object$counts, size=sz)) R <- c(R, formatdescribeSingle(object, lang='html', ...)) R } dataDensityString <- function(x, nint=30) { x <- as.numeric(x) x <- x[! is.na(x)] if(length(x) < 2) return('') r <- range(x) x <- floor(nint * (x-r[1])/(r[2]-r[1])) x <- pmin(tabulate(x), 37) paste0(format(r[1]),' <', paste(substring(' 1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', x+1,x+1), collapse=''), '> ',format(r[2])) } contents <- function(object, ...) UseMethod('contents') contents.data.frame <- function(object, sortlevels=FALSE, id=NULL, range=NULL, values=NULL, ...) { dfname <- deparse(substitute(object)) nam <- names(object) d <- dim(object) n <- length(nam) fl <- nas <- integer(n) cl <- sm <- lab <- un <- longlab <- character(n) Lev <- list() for(i in 1:n) { x <- object[[i]] at <- attributes(x) if(length(at$label)) lab[i] <- at$label if(length(at$longlabel)) longlab[i] <- at$longlabel if(length(at$units)) un[i] <- at$units atl <- at$levels fl[i] <- length(atl) cli <- at$class[at$class %nin% c('labelled', 'factor')] if(length(cli)) cl[i] <- cli[1] sm[i] <- storage.mode(x) nas[i] <- sum(is.na(x)) if(length(atl)) { if(sortlevels) atl <- sort(atl) if(length(Lev)) for(j in 1 : length(Lev)) { w <- Lev[[j]] if(! is.name(w) && is.logical(all.equal(w, atl))) { atl <- as.name(names(Lev)[j]) break } } Lev[[nam[i]]] <- atl } } w <- list(Labels = if(any(lab != '')) lab, Units = if(any(un != '')) un, Levels = if(any(fl > 0)) fl, Class = if(any(cl != '')) cl, Storage= sm, NAs = if(any(nas > 0)) nas ) w <- w[sapply(w, function(x)length(x) > 0)] ## R does not remove NULL elements from a list structure(list(contents=data.frame(w, row.names=nam), dim=d, maxnas=max(nas), id=id, rangevar=range, valuesvar=values, unique.ids = if(length(id) && id %in% nam) length(unique(object[[id]])), range = if(length(range) && range %in% nam) paste(as.character(range(object[[range]], na.rm=TRUE)), collapse='-'), values = if(length(values) && values %in% nam) paste(if(is.factor(object[[values]])) levels(object[[values]]) else sort(unique(object[[values]])), collapse=' '), dfname=dfname, Levels=Lev, longLabels=if(any(longlab != '')) structure(longlab, names=nam)), class='contents.data.frame') } print.contents.data.frame <- function(x, sort=c('none','names','labels','NAs'), prlevels=TRUE, maxlevels=Inf, number=FALSE, ...) { sort <- match.arg(sort) if(prType() == 'html') return(html.contents.data.frame(x, sort=sort, prlevels=prlevels, maxlevels=maxlevels, number=number, ...) ) d <- x$dim maxnas <- x$maxnas cat('\nData frame:', x$dfname, '\t', d[1],' observations and ', d[2], ' variables Maximum # NAs:', maxnas, '\n', sep='') if(length(x$id)) cat('Distinct ', x$id, ':', x$unique.ids, '\t', sep='') if(length(x$rangevar)) cat(x$rangevar, ' range:', x$range, '\t', sep='') if(length(x$valuesvar))cat(x$valuesvar, ':', x$values, sep='') cat('\n\n') cont <- x$contents nam <- row.names(cont) if(number) row.names(cont) <- paste(format(1:d[2]), row.names(cont)) switch(sort, names={ cont <- cont[order(nam),,drop=FALSE] }, labels={ if(length(cont$Labels)) cont <- cont[order(cont$Labels, nam),, drop=FALSE] }, NAs={ if(maxnas > 0) cont <- cont[order(cont$NAs, nam),, drop=FALSE] }) if(length(cont$Levels)) cont$Levels <- ifelse(cont$Levels == 0, '', format(cont$Levels)) print(cont) if(prlevels && length(L <- x$Levels)) { cat('\n') nam <- names(L) w <- .Options$width - max(nchar(nam)) - 5 reusingLevels <- sapply(L, is.name) fullLevels <- which(! reusingLevels) namf <- lin <- names(L[fullLevels]) ## separate multiple lines per var with \n for print.char.matrix j <- 0 for(i in fullLevels) { j <- j + 1 varsUsingSame <- NULL if(sum(reusingLevels)) { for(k in which(reusingLevels)) if(L[[k]] == namf[j]) varsUsingSame <- c(varsUsingSame, nam[k]) if(length(varsUsingSame)) namf[j] <- paste(c(namf[j], varsUsingSame), collapse='\n') } Li <- L[[i]] if(length(Li) > maxlevels) Li <- c(Li[1 : maxlevels], '...') lin[j] <- paste(pasteFit(Li, width=w), collapse='\n') } z <- cbind(Variable=namf, Levels=lin) print.char.matrix(z, col.txt.align='left', col.name.align='left', row.names=TRUE, col.names=TRUE) } longlab <- x$longLabels if(length(longlab)) { if(existsFunction('strwrap')) for(i in 1:length(longlab)) { if(longlab[i] != '') longlab[i] <- paste(strwrap(longlab[i],width=.85*.Options$width ), collapse='\n') } i <- longlab != '' nam <- names(longlab) z <- cbind(Variable=nam[i], 'Long Label'=longlab[i]) print.char.matrix(z, col.names=TRUE, row.names=FALSE, cell.align='left') } invisible() } html.contents.data.frame <- function(object, sort=c('none', 'names', 'labels', 'NAs'), prlevels=TRUE, maxlevels=Inf, levelType=c('list', 'table'), number=FALSE, nshow=TRUE, ...) { sort <- match.arg(sort) levelType <- match.arg(levelType) mu <- markupSpecs$html lspace <- mu$lspace lspace <- ' ' # override - browsers were not rendering correctly hrule <- mu$hrule d <- object$dim maxnas <- object$maxnas R <- paste0('', object$dfname, ' Contents') if(nshow) { R <- c(R, paste0(hrule, '

Data frame:', object$dfname, '

', d[1], ' observations and ', d[2], ' variables, maximum # NAs:',maxnas, lspace, lspace) ) if(length(object$id)) R <- paste0(R, 'Distinct ', object$id, ':', object$unique.ids, lspace, lspace) if(length(object$rangevar)) R <- paste0(R, object$rangevar, ' range:', object$range, lspace, lspace) if(length(object$valuesvar)) R <- paste0(R, object$valuesvar, ':', object$values, lspace, lspace) R <- c(R, hrule) } else R <- paste0(hrule, '

Data frame:', object$dfname, '

', ' Variables:', d[2], hrule) cont <- object$contents nam <- row.names(cont) if(number) { rn <- paste(format(1:d[2]), row.names(cont)) nbsp <- htmlSpecial('nbsp') rn <- sedit(rn, ' ', paste0(nbsp, nbsp)) row.names(cont) <- rn } switch(sort, names={cont <- cont[order(nam),,drop=FALSE]}, labels={ if(length(cont$Labels)) cont <- cont[order(cont$Labels, nam),,drop=FALSE] }, NAs={ if(maxnas>0) cont <- cont[order(cont$NAs,nam),,drop=FALSE] }) link <- matrix('', nrow=nrow(cont), ncol=1+ncol(cont), dimnames=list(dimnames(cont)[[1]], c('Name', dimnames(cont)[[2]]))) longlab <- object$longLabels if(length(longlab)) { longlab <- longlab[longlab != ''] link[names(longlab),'Name'] <- paste('#longlab',names(longlab),sep='.') } L <- object$Levels Lnames <- names(L) if(length(cont$Levels)) { cont$Levels <- ifelse(cont$Levels==0, '', format(cont$Levels)) namUsed <- sapply(L, function(z) if(is.name(z)) as.character(z) else '') reusingLevels <- namUsed != '' fullLevels <- which(! reusingLevels) namUsed <- ifelse(reusingLevels, namUsed, Lnames) names(namUsed) <- Lnames link[,'Levels'] <- ifelse(cont$Levels=='', '', paste('#levels',namUsed[nam],sep='.')) } adj <- rep('l', length(cont)) adj[names(cont) %in% c('NAs','Levels')] <- 'r' if(! nshow) { cont$NAs <- NULL link <- link[, colnames(link) != 'NAs', drop=FALSE] adj <- adj[names(adj) != 'NAs'] } out <- html(cont, file=FALSE, rownames=TRUE, link=link, border=2, col.just=adj, ...) R <- c(R, as.character(out), hrule) if(prlevels && length(L) > 0) { if(levelType=='list') { R <- c(R, '
Category Levels
') for(i in fullLevels) { l <- L[[i]] nami <- Lnames[i] w <- nami if(sum(reusingLevels)) for(k in which(reusingLevels)) if(L[[k]] == nami) w <- c(w, Lnames[k]) R <- c(R, paste0('
', paste(w, collapse=', '), '
')) if(length(l) > maxlevels) l <- c(l[1 : maxlevels], '...') for(k in l) R <- c(R, paste0('
  • ', k, '
  • \n')) } } else { ## Function to split a character vector x as evenly as ## possible into n elements, pasting multiple elements ## together when needed evenSplit <- function(x, n) { indent <- function(z) if(length(z) == 1) z else c(z[1], paste0(' ', z[-1])) m <- length(x) if(m <= n) return(c(indent(x), rep('',n-m))) totalLength <- sum(nchar(x)) + (m-1)*3.5 ## add indent, comma, space lineLength <- ceiling(totalLength/n) y <- pasteFit(x, sep=', ', width=lineLength) m <- length(y) if(m > n) for(j in 1:10) { lineLength <- round(lineLength*1.1) y <- pasteFit(x, sep=', ', width=lineLength) m <- length(y) if(m <= n) break } ## Take evasive action if needed if(m == n) indent(y) else if(m < n) c(indent(y), rep('', n - m)) else c(paste(x, collapse=', '), rep('', n - 1)) } nam <- names(L) v <- lab <- lev <- character(0) j <- 0 for(i in fullLevels) { j <- j + 1 l <- L[[i]] if(length(l) > maxlevels) l <- c(l[1 : maxlevels], '...') nami <- nam[i] v <- c(v, nami) w <- nami if(sum(reusingLevels)) for(k in which(reusingLevels)) if(L[[k]] == nam[i]) w <- c(w, nam[k]) lab <- c(lab, evenSplit(w, length(l))) lev <- c(lev, l) } z <- cbind(Variable=lab, Levels=lev) out <- html(z, file=FALSE, link=ifelse(lab=='','',paste('levels',v,sep='.')), linkCol='Variable', linkType='name', border=2,...) R <- c(R, as.character(out), hrule) } } i <- longlab != '' if(any(i)) { nam <- names(longlab)[i] names(longlab) <- NULL lab <- paste('longlab', nam, sep='.') z <- cbind(Variable=nam, 'Long Label'=longlab[i]) out <- html(z, file=FALSE, link=lab, linkCol='Variable', linkType='name', ...) R <- c(R, as.character(out), hrule) } rendHTML(R) } contents.list <- function(object, dslabels=NULL, ...) { nam <- names(object) if(length(dslabels)) { dslabels <- dslabels[nam] names(dslabels) <- NULL } g <- function(w) { if(length(w)==0 || is.null(w)) c(Obs=0, Var=if(is.null(w)) NA else length(w), Var.NA=NA) else c(Obs=length(w[[1]]), Var=length(w), Var.NA=sum(sapply(w, function(x) sum(is.present(x))==0))) } v <- t(sapply(object, g)) structure(list(contents=if(length(dslabels)) data.frame(Label=dslabels,Obs=v[,'Obs'], Var=v[,'Var'],Var.NA=v[,'Var.NA'], row.names=nam) else data.frame(Obs=v[,'Obs'],Var=v[,'Var'], Var.NA=v[,'Var.NA'], row.names=nam)), class='contents.list') } print.contents.list <- function(x, sort=c('none','names','labels','NAs','vars'), ...) { sort <- match.arg(sort) cont <- x$contents nam <- row.names(cont) cont <- cont[ switch(sort, none=1:length(nam), names=order(nam), vars=order(cont$Var), labels=order(cont$Label, nam), NAs=order(cont$Var.NA,nam)),] print(cont) invisible() } Hmisc/R/rcspline.restate.s0000644000176200001440000001416114112731553015174 0ustar liggesusersrcspline.restate <- function(knots, coef, type=c("ordinary","integral"), x="X", lx=nchar(x),norm=2, columns=65, before="& &", after="\\", begin="", nbegin=0, digits=max(8,.Options$digits)) { type <- match.arg(type) k <- length(knots) if(k<3) stop("must have >=3 knots in a restricted cubic spline") p <- length(coef) if(p == k) { Intc <- coef[1] coef <- coef[-1] p <- p-1 } else Intc <- 0 if(k-1 != p) stop("coef must be of length # knots - 1") knotnk <- knots[k] knotnk1 <- knots[k-1] knot1 <- knots[1] kd <- if(norm==0) 1 else if(norm==1)(knotnk-knotnk1)^3 else (knotnk-knot1)^2 coef[-1] <- coef[-1]/kd d <- c(0, knots-knotnk)[1:p] coefk <- sum(coef*d)/(knotnk-knotnk1) d <- c(0, knots-knotnk1)[1:p] coefk1 <- sum(coef*d)/(knotnk1-knotnk) if(!length(names(coef))) names(coef) <- paste(x,1:length(coef),sep="") coef <- c(coef, coefk, coefk1) names(coef)[k] <- "1st restricted coef" names(coef)[k+1] <- "2nd restricted coef" if(type=="integral") coef <- c(.5*coef[1],.25*coef[-1]) cof <- formatSep(coef, digits) kn <- formatSep(-knots, digits) if(Intc!=0) { txt <- txt2 <- formatSep(Intc, digits) if(type=="integral") { txt <- paste(txt, "* x") txt2 <- paste(txt2, '*', x) } if(coef[1]>=0) { txt <- paste(txt, "+"); txt2 <- paste(txt2, '+') } } else txt <- txt2 <- "" if(cof[1]!=0) { txt <- paste(txt, cof[1], if(type=="ordinary")"* x" else "* x^2", sep="") txt2 <- paste(txt2, cof[1], if(type=="ordinary") paste("*",x) else paste("*",x,"^2"), sep="") } for(i in 2:(p+2)) { nam <- paste("pmax(x", if(knots[i-1]<0) "+" else NULL, if(knots[i-1]!=0) kn[i-1] else NULL, ",0)^", if(type=="ordinary")"3" else "4", sep="") nam2 <- paste("pmax(",x, if(knots[i-1]<0) "+" else NULL, if(knots[i-1]!=0) kn[i-1] else NULL, ",0)^", if(type=="ordinary")"3" else "4", sep="") z <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" else NULL, cof[i], "*", nam, sep="") z2 <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" else NULL, cof[i], "*", nam2, sep="") txt <- paste(txt , z, sep="") txt2<- paste(txt2, z2, sep="") } func <- parse(text=paste('function(x)', txt)) cof <- formatSep(coef, digits) kn <- formatSep(-knots, digits) lcof <- nchar(cof) cof <- latexSN(cof) cur <- begin; colcnt <- nbegin; tex <- NULL if(Intc!=0) { fint <- formatSep(Intc, digits) if(type=="integral") { fint <- paste(fint, x) colcnt <- colcnt+2 } cur <- paste(cur, fint, sep="") colcnt <- colcnt + nchar(fint) if(coef[1]>0) { cur <- paste(cur, " + ", sep=""); colcnt <- colcnt+3 } } if(coef[1]!=0) { sp <- if(substring.location(cof[1],"times")$first > 0) "\\:" else NULL cur <- paste(cur, cof[1], sp, x, if(type=="integral") "^2", sep="") ##\:=medium space in LaTeX colcnt <- colcnt+lcof[1]+lx+(type=="integral") } tex.names <- character(p+2) size <- lx+lcof[-1]+nchar(kn)+3 for(i in 2:(p+2)) { nam <- paste("(", x, if(knots[i-1]<0) "+" else NULL, if(knots[i-1]!=0) kn[i-1] else NULL, ")_{+}^{", if(type=="ordinary")"3}" else "4}", sep="") q <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" else NULL, cof[i], nam, sep="") n <- size[i-1] if(colcnt+n > columns) { tex <- c(tex, cur) cur <- "" colcnt <- 0 } cur <- paste(cur, q, sep="") colcnt <- colcnt+n } tex <- c(tex, cur) tex <- paste(before, tex, after) if(Intc!=0) coef <- c(Intercept=Intc, coef) attr(coef, "knots") <- knots attr(coef, "function") <- func attr(coef, "function.text") <- txt2 attr(coef, "latex") <- tex names(colcnt) <- NULL attr(coef, "columns.used") <- colcnt coef } rcsplineFunction <- function(knots, coef=numeric(0), norm=2, type=c('ordinary', 'integral')) { type <- match.arg(type) k <- length(knots) kd <- if(norm==0) 1 else if(norm==1) knots[k]-knots[k-1] else (knots[k]-knots[1])^.66666666666666666666666 f <- function(x, knots, coef, kd, type) { k <- length(knots) knotnk <- knots[k] knotnk1 <- knots[k - 1] knot1 <- knots[1] if(length(coef) < k) coef <- c(0, coef) if(type == 'ordinary') { y <- coef[1] + coef[2] * x for(j in 1 : (k - 2)) y <- y + coef[j + 2] * (pmax((x - knots[j]) / kd, 0) ^ 3 + ((knotnk1 - knots[j]) * pmax((x - knotnk) / kd, 0) ^ 3 - (knotnk - knots[j]) * (pmax((x - knotnk1) / kd, 0) ^ 3)) / (knotnk - knotnk1)) return(y) } y <- coef[1] * x + 0.5 * coef[2] * x * x for(j in 1 : (k - 2)) y <- y + 0.25 * coef[j + 2] * kd * (pmax((x - knots[j]) / kd, 0) ^ 4 + ((knotnk1 - knots[j]) * pmax((x - knotnk) / kd, 0) ^ 4 - (knotnk - knots[j]) * (pmax((x - knotnk1) / kd, 0) ^ 4)) / (knotnk - knotnk1)) y } formals(f) <- list(x=numeric(0), knots=knots, coef=coef, kd=kd, type=type) f } Hmisc/R/hidingTOC.r0000644000176200001440000001710413417776562013536 0ustar liggesusers#' Moving and Hiding Table of Contents #' #' Moving and hiding table of contents for Rmd HTML documents #' #' \code{hidingTOC} creates a table of contents in a Rmd document that #' can be hidden at the press of a button. It also generate buttons that allow #' the hiding or unhiding of the diffrent level depths of the table of contents. #' #' @param buttonLabel the text on the button that hides and unhides the #' table of contents. Defaults to \code{Contents}. #' @param levels the max depth of the table of contents that it is desired to #' have control over the display of. (defaults to 3) #' @param posCollapse if \code{'margin'} then display the depth select buttons #' vertically along the side of the page choosen by \code{buttonSide}. If #' \code{'top'} then display the depth select buttons horizontally under the #' button that hides the TOC. Defaults to \code{'margin'}. \code{'bottom'} is #' currently unimplemented. #' @param tocSide which side of the page should the table of contents be placed #' on. Can be either \code{'right'} or \code{'left'}. Defaults to #' \code{'right'} #' @param buttonSide which side of the page should the button that hides the TOC #' be placed on. Can be either \code{'right'} or \code{'left'}. Defaults to #' \code{'right'} #' @param hidden Logical should the table of contents be hidden at page load #' Defaults to \code{FALSE} #' #' @return a HTML formated text string to be inserted into an markdown document #' @author Thomas Dupont #' @examples #' \dontrun{ #' hidingTOC() #' } #' @export hidingTOC <- function(buttonLabel="Contents", levels=3, tocSide=c('right','left'), buttonSide=c('right','left'), posCollapse=c('margin','top','bottom'), hidden=FALSE) { ## Make javascript functions that controll the hiding and unhiding of ## different levels of the TOC ## This is done by switching on elements with id equal to TOC or class equal ## tocify-subheader and with attribute data-tag values less then or equal to ## the requested level and by switching off elements with attribute data-tag ## values greater then the requested level. makeLevelExpandFun <- function(level, maxLevels) { ## Sanity check to make sure that level is never greater then maxLevels if (level > maxLevels) stop("level value ", level, " is greater then maxLevels value ", maxLevels) ## There are 2 special cases. return(if (level == 1L) { ## Where the reqested level equals 1. Unhide the element with id ## equal to TOC and hide the elements with class equal to ## tocify-subheader. 'function expandLevel1(){$("#TOC").toggle(true);$(".tocify-subheader").toggle(false)}' } else if (level == maxLevels) { ## Where the requested level is equal to maxLevels then just unhide ## all elements with id equal to TOC or class equal to ## tocify-subheader. paste0('function expandLevel', level, '(){$("#TOC,.tocify-subheader").toggle(true)}') } else { ## General case level greater then 1 and less then maxLevels. Unhide ## the elements with id equal to TOC or class equal to ## tocify-subheader with attribute data-tag values less then or ## equal to the requested level. Then hide elements with class ## tocify-subheader with attribute data-tag values greater then the ## requested level but less then or equal to maxLevels. paste0("function expandLevel", level, '(){$("#TOC,', paste0('.tocify-subheader[data-tag=',seq.int(2L, level),']', collapse=','), '").toggle(true);$("', paste0('.tocify-subheader[data-tag=',seq.int(level+1L, maxLevels),']', collapse=','), '").toggle(false)}') }) } ## basic HTML skeleton to inwhich to place various values skeleton <- '
    %s
    %s
    ' buttonSide <- match.arg(buttonSide) tocSide <- match.arg(tocSide) posCollapse <- match.arg(posCollapse) if(posCollapse == 'bottom') { stop("arguement posCollapse = bottom is not supported yet") } if(tocSide != buttonSide) stop("non-symmetric values for tocSide and buttonSide are not supported") if(!missing(hidden) && (length(hidden) == 0L || (!is.logical(hidden) && !is.numeric(hidden)))) stop("hidden must be of logical type") levelSequence <- seq_len(levels) ## CSS text cssText <- paste0(".toc-level-select-group{clear:both}#TOC{position:fixed;top:0;", tocSide, ':0;margin:', switch(posCollapse, margin = '23px ', top = '44px '), switch(posCollapse, margin = '20px ', top = '0px '), '20px ', switch(posCollapse, margin = '20px', top = '0px'), ';z-index:9', if(hidden) ";display:none", '}#toc-controls{position:fixed;top:0;', buttonSide, ':0;margin:0px}.col-md-3{width: 0%}.col-md-9{width: 100%}', 'div.container-fluid.main-container{max-width:none;margin-left:0px;margin-right:none}') ## Generate the javascript text needed for the TOC level display buttons' ## functions. scriptText <- paste0(vapply(levelSequence, makeLevelExpandFun, "", maxLevels=levels), collapse="") ## Which side the buttons should be pulled to. pullClass <- if(buttonSide == "right") { "pull-right" } else { "pull-left" } ## Generate the hiding button HTML text. buttonText <- paste0('') ## Generate the level buttons' HTML text. levelButtonText <- paste0('
    ', paste0('', collapse=switch(posCollapse, margin = "
    ", top="", stop("Unknown value for posCollapse ", posCollapse))), "
    ") return(sprintf(skeleton, cssText, scriptText, buttonText, levelButtonText)) } Hmisc/R/list.tree.s0000644000176200001440000000772613067145734013641 0ustar liggesuserslist.tree <- function(struct,depth=-1, numbers=FALSE, maxlen=22, maxcomp=12, attr.print=TRUE, front="", fill=". ", name.of, size=TRUE) { if(depth==0) return() opts <- options('digits') options(digits=5) on.exit(options(opts)) if (missing(name.of)) name.of <- deparse(substitute(struct)) len <- length(struct) cat(front,name.of,"=",storage.mode(struct),len) if(size) cat(" (",object.size(struct)," bytes)",sep="") if(is.array(struct)) cat("=", if(length(dimnames(struct))) "named", "array",paste(dim(struct),collapse=" X ")) if(is.ts(struct)) cat("= time series",tsp(struct)) if(is.factor(struct)) cat("= factor (",length(levels(struct))," levels)",sep="") if(length(attr(struct,'class'))>0) cat("(",attr(struct,'class'),")") if(is.atomic(struct) && !is.character(struct)&& len>0 && maxlen>0) { field <- "=" for(i in 1:length(struct)) { field <- paste(field,format(as.vector(struct[i]))) if(nchar(field)>maxlen-6) { field <- paste(field,"..."); break } } cat(field,"\n",sep="") } else if(is.character(struct) && len>0 && maxlen>0) cat("=",substring(struct[1:(last <- max(1,(1:len) [cumsum(nchar(struct)+1)0) { structnames <- names(struct) if(!length(structnames)) structnames <- rep("",len) noname <- structnames=="" structnames[noname] <- paste("[[",(1:length(structnames))[noname],"]]",sep="") for (i in 1:min(length(structnames),maxcomp)) if (mode(struct[[i]])=="argument" | mode(struct[[i]])=="unknown") cat(front,fill," ",structnames[i]," = ", as.character(struct[[i]])[1],"\n",sep="") else list.tree(struct[[i]],depth=depth-1,numbers,maxlen,maxcomp, attr.print, if(numbers) paste(front,i,sep=".") else paste(front,fill,sep=""), fill,structnames[i],size=FALSE) if(length(structnames)>maxcomp) cat(front,fill," ... and ",length(structnames)-maxcomp, " more\n",sep="") } attribs <- attributes(struct) attribnames <- names(attribs) if(length(attribnames)>0 && attr.print) for (i in (1:length(attribnames)) [attribnames!="dim" & attribnames!="dimnames" & attribnames!="levels" & attribnames!="class" & attribnames!="tsp" & (attribnames!="names" | mode(struct)!="list")]) list.tree(attribs[[i]],depth-1,numbers,maxlen,maxcomp,attr.print, if(numbers) paste(front,i,sep="A") else paste(front,"A ",sep=""), fill,attribnames[i],size=FALSE) invisible() } ############################################################################## expr.tree <- function(struct,front="",fill=". ",name.of,numbers=FALSE,depth=-1, show.comment=FALSE) { if (missing(name.of)) name.of <- deparse(substitute(struct)) else if(is.atomic(struct) | is.name(struct)) name.of <- paste(name.of,deparse(struct)) cat(front,"",name.of,"=",mode(struct),length(struct),"\n") if(depth!=0 && is.recursive(struct) ) { structlength <- length(struct) structnames <- names(struct) if(length(structnames)==0) structnames <- rep("",structlength) if(structlength>0) for (i in 1:length(structnames)) { if((mode(struct[[i]])!="missing" || is.function(struct)) && (mode(struct[[i]])!="comment" || show.comment)) expr.tree(struct[[i]], if(numbers) paste(front,i,sep=".") else paste(front,fill,sep=""), fill,structnames[i],numbers,"depth"=depth-1) } } invisible(character(0)) } Hmisc/R/gbayes.s0000644000176200001440000001365413006204222013154 0ustar liggesusersgbayes <- function(mean.prior, var.prior, m1, m2, stat, var.stat, n1, n2, cut.prior, cut.prob.prior=.025) { if(!missing(cut.prior)) var.prior <- ((cut.prior - mean.prior)/qnorm(1 - cut.prob.prior))^2 if(!is.function(var.stat)) { vs <- var.stat if(!missing(n1)) stop('may not specify n1,n2 when var.stat is not a function') } else vs <- var.stat(m1,m2) var.post <- 1/(1/var.prior + 1/vs) mean.post <- (mean.prior/var.prior + stat/vs)*var.post result <- list(mean.prior=mean.prior, var.prior=var.prior, mean.post=mean.post, var.post=var.post) if(!missing(n1)) { mean.pred <- mean.post var.pred <- var.post + var.stat(n1,n2) result$mean.pred <- mean.pred result$var.pred <- var.pred } structure(result, class='gbayes') } plot.gbayes <- function(x, xlim, ylim, name.stat='z', ...) { obj <- x pred <- length(obj$mean.pred)>0 if(missing(xlim)) xlim <- obj$mean.post + c(-6,6)*sqrt(obj$var.post) x <- seq(xlim[1], xlim[2], length=200) y1 <- dnorm(x,obj$mean.prior,sqrt(obj$var.prior)) y2 <- dnorm(x,obj$mean.post, sqrt(obj$var.post)) plot(x, y1, xlab=name.stat, ylab='Density',type='l',lty=1, ylim=if(missing(ylim)) range(c(y1,y2)) else ylim) curves <- vector('list',2+pred) names(curves) <- c('Prior','Posterior', if(pred)' Predictive') curves[[1]] <- list(x=x,y=y1) lines(x, y2, lty=2) curves[[2]] <- list(x=x,y=y2) if(pred) { y <- dnorm(x,obj$mean.pred,sqrt(obj$var.pred)) lines(x, y, lty=3) curves[[3]] <- list(x=x,y=y) } labcurve(curves, ...) invisible() } gbayes2 <- function(sd, prior, delta.w=0, alpha=0.05, upper=Inf, prior.aux=NULL) { if(!is.function(prior)) stop('prior must be a function') z <- qnorm(1-alpha/2) prod <- function(delta, prior, delta.w, sd, z, prior.aux) { (1 - pnorm((delta.w - delta)/sd + z)) * if(length(prior.aux)) prior(delta, prior.aux) else prior(delta) } ww <- 'value' ip <- if(length(prior.aux)) integrate(prior, -Inf, upper, prior.aux=prior.aux)[[ww]] else integrate(prior, -Inf, upper)[[ww]] if(abs(ip - 1) > .01) warning(paste('integrate failed to obtain 1.0 for integral of prior.\nDivided posterior probability by the integral it did obtain (', format(ip),').\nTry specifying upper=.',sep='')) integrate(prod, delta.w, upper, prior=prior, delta.w=delta.w, sd=sd, z=z, prior.aux=prior.aux)[[ww]] } ## v = variance of Xn after future obs. gbayesMixPredNoData <- function(mix=NA, d0=NA, v0=NA, d1=NA, v1=NA, what=c('density','cdf')) { what <- match.arg(what) g <- function(delta, v, mix, d0, v0, d1, v1, dist) { if(mix==1) { pv <- 1/(1 / v0 + 1 / v) dist(delta, d0, sqrt(pv)) } else if(mix == 0) { pv <- 1/(1 / v1 + 1 / v) dist(delta, d1, sqrt(pv)) } else { pv0 <- 1/(1 / v0 + 1 / v) pv1 <- 1/(1 / v1 + 1 / v) mix * dist(delta, d0, sqrt(pv0)) + (1-mix) * dist(delta, d1, sqrt(pv1)) } } formals(g) <- list(delta=numeric(0), v=NA, mix=mix, d0=d0, v0=v0, d1=d1, v1=v1, dist=NA) g } gbayesMixPost <- function(x=NA, v=NA, mix=1, d0=NA, v0=NA, d1=NA, v1=NA, what=c('density','cdf','postmean')) { what <- match.arg(what) g <- function(delta, x, v, mix=1, d0, v0, d1, v1, dist) { if(mix == 1) { pv <- 1 / (1 / v0 + 1 / v) if(what == 'postmean') (d0 / v0 + x / v) * pv else dist(delta, (d0 / v0 + x / v) * pv, sqrt(pv)) } else if(mix == 0) { pv <- 1 / (1 / v1 + 1 / v) if(what == 'postmean') (d1 / v1 + x / v) * pv else dist(delta, (d1 / v1 + x / v) * pv, sqrt(pv)) } else { prior.odds <- mix / (1 - mix) pv0 <- 1 / (1 / v0 + 1 / v) pv1 <- 1 / (1 / v1 + 1 / v) # Until 2016-10-17 had omitted v+ in two sqrt below likelihood.ratio <- dnorm(x, d0, sqrt(v + v0)) / dnorm(x, d1, sqrt(v + v1)) post.odds <- prior.odds * likelihood.ratio mixp <- post.odds / (1 + post.odds) if(what == 'postmean') mixp * (d0 / v0 + x / v) * pv0 + (1-mixp) * (d1 / v1 + x / v) * pv1 else mixp * dist(delta, (d0 / v0 + x / v) * pv0, sqrt(pv0)) + (1-mixp)* dist(delta, (d1 / v1 + x / v) * pv1, sqrt(pv1)) } } formals(g) <- list(delta=numeric(0), x=x, v=v, mix=mix, d0=d0, v0=v0, d1=d1, v1=v1, dist=switch(what, density = dnorm, cdf = pnorm, postmean= NULL)) g } gbayesMixPowerNP <- function(pcdf, delta, v, delta.w=0, mix, interval, nsim=0, alpha=0.05) { if(nsim==0) { ## Solve for statistic x such that the posterior cdf at ## (delta.w,x)=alpha/2 g <- function(x, delta.w, v, alpha, pcdf, mix) { pcdf(delta.w, x, v, mix) - alpha/2 } formals(g) <- list(x=numeric(0), delta.w=delta.w, v=v, alpha=alpha, pcdf=pcdf, mix=if(missing(mix)) as.list(pcdf)$mix else mix) x <- uniroot(g, interval=interval)$root c('Critical value'=x, Power=1 - pnorm(x, delta, sqrt(v))) } else { x <- rnorm(nsim, delta, sqrt(v)) probs <- if(missing(mix)) pcdf(delta.w, x, v) else pcdf(delta.w, x, v, mix=mix) pow <- mean(probs <= alpha/2) se <- sqrt(pow*(1-pow)/nsim) c(Power=pow, 'Lower 0.95'=pow-1.96*se, 'Upper 0.95'=pow+1.96*se) } } gbayes1PowerNP <- function(d0, v0, delta, v, delta.w=0, alpha=0.05) { pv <- 1/(1/v0 + 1/v) z <- qnorm(alpha/2) 1 - pnorm(v*( (delta.w - sqrt(pv)*z)/pv - d0/v0 ), delta, sqrt(v)) } Hmisc/R/latexTabular.s0000644000176200001440000000355512766302172014350 0ustar liggesuserslatexTabular <- function(x, headings=colnames(x), align =paste(rep('c',ncol(x)),collapse=''), halign=paste(rep('c',ncol(x)),collapse=''), helvetica=TRUE, translate=TRUE, hline=0, center=FALSE, ...) { if(! (is.matrix(x) || is.data.frame(x))) x <- as.matrix(x) nc <- ncol(x) if(translate) for(i in 1 : nc) if(is.factor(x[, i]) || is.character(x[, i])) x[, i] <- latexTranslate(x[, i]) if(length(list(...))) x <- format.df(x, ...) xhalign <- substring(halign, 1:nchar(halign), 1:nchar(halign)) w <- paste0(if(center) '\\begin{center}', '\\begin{tabular}{', align, '}') if(hline == 2) w <- paste0(w, '\\hline') if(helvetica) w <- paste('{\\fontfamily{phv}\\selectfont', w) if(length(headings)) { h <- strsplit(headings, split='\n') ## strsplit returns character(0) for "" for(i in 1 : length(h)) if(! length(h[[i]])) h[[i]] <- '' maxl <- max(sapply(h, length)) H <- character(maxl) for(i in 1 : maxl) { lab <- sapply(h, function(x) if(length(x) < i) '' else x[i]) if(translate) lab <- latexTranslate(lab) H[i] <- if(halign != align) paste0(paste(paste0('\\multicolumn{1}{', xhalign, '}{', lab, '}'), collapse='&'), '\\\\') else paste0(paste(lab, collapse='&'), '\\\\') } H <- paste(H, collapse='\n') if(hline > 0) H <- paste0(H, '\\hline') } v <- paste0(apply(x, 1, paste, collapse='&'), '\\\\') if(hline == 2) v <- c(v, '\\hline') v <- paste(v, collapse='\n') # v <- paste(paste0(v, '\\\\'), if(hline == 2) '\\hline'), collapse='\n') if(length(headings)) v <- paste(H, v, sep='\n') paste0(w, '\n', v, '\n\\end{tabular}', if(center) '\n\\end{center}', if(helvetica) '}') } Hmisc/R/epi.s0000644000176200001440000000651412243661443012472 0ustar liggesusers## $Id$ ## Relative risk estimation from binary responses ## See http://www.csm.ornl.gov/~frome/ES/RRMHex/MHanalysis.txt and ## http://www.csm.ornl.gov/~frome/ES/RRMHex for related code mhgr <- function(y, group, strata, conf.int=.95) { group <- as.factor(group) i <- is.na(y) | is.na(group) | is.na(strata) if(any(i)) { i <- !i y <- y[i] group <- group[i] strata <- strata[i] } N <- tapply(y, list(group,strata), length) if(nrow(N) != 2) stop('only works for 2 groups') N[is.na(N)] <- 0 s <- tapply(y, list(group,strata), sum) s[is.na(s)] <- 0 n <- N[1,] m <- N[2,] x <- s[1,] y <- s[2,] N <- m + n tk<- x + y R <- x*m/N S <- y*n/N D <- (m*n*tk - x*y*N)/N/N rr <- sum(R)/sum(S) varlog <- sum(D)/(sum(R)*sum(S)) sigma <- sqrt(varlog) z <- -qnorm((1-conf.int)/2) ci <- rr*c(exp(-z*sigma), exp(z*sigma)) structure(list(rr=rr, ci=ci, conf.int=conf.int, N=table(group)), class='mhgr') } print.mhgr <- function(x, ...) { cat('Mantel-Haenszel Risk Ratio and', x$conf.int, 'Greenland-Robins Confidence Interval\n\n') cat('Common Relative Risk:', x$rr, 'CI:', x$ci, '\n\n') cat('N in Each Group\n\n') print(x$N) invisible() } lrcum <- function(a, b, c, d, conf.int=0.95) { if(any(is.na(a+b+c+d))) stop('NAs not allowed') if(min(a,b,c,d)==0) { warning('A frequency of zero exists. Adding 0.5 to all frequencies.') a <- a + .5 b <- b + .5 c <- c + .5 d <- d + .5 } lrpos <- a/(a+c) / (b/(b+d)) lrneg <- c/(a+c) / (d/(b+d)) zcrit <- qnorm((1+conf.int)/2) varloglrpos <- 1/a - 1/(a+c) + 1/b - 1/(b+d) varloglrneg <- 1/d - 1/(b+d) + 1/c - 1/(a+c) upperlrpos <- exp(log(lrpos) + zcrit*sqrt(varloglrpos)) lowerlrpos <- exp(log(lrpos) - zcrit*sqrt(varloglrpos)) upperlrneg <- exp(log(lrneg) + zcrit*sqrt(varloglrneg)) lowerlrneg <- exp(log(lrneg) - zcrit*sqrt(varloglrneg)) lrposcum <- cumprod(lrpos) lrnegcum <- cumprod(lrneg) varloglrposcum <- cumsum(varloglrpos) varloglrnegcum <- cumsum(varloglrneg) upperlrposcum <- exp(log(lrposcum) + zcrit*sqrt(varloglrposcum)) lowerlrposcum <- exp(log(lrposcum) - zcrit*sqrt(varloglrposcum)) upperlrnegcum <- exp(log(lrnegcum) + zcrit*sqrt(varloglrnegcum)) lowerlrnegcum <- exp(log(lrnegcum) - zcrit*sqrt(varloglrnegcum)) structure(llist(lrpos, upperlrpos, lowerlrpos, lrneg, upperlrneg, lowerlrneg, lrposcum, upperlrposcum, lowerlrposcum, lrnegcum, upperlrnegcum, lowerlrnegcum, conf.int), class='lrcum') } print.lrcum <- function(x, dec=3, ...) { ci <- x$conf.int l <- paste('Lower', ci) u <- paste('Upper', ci) a <- with(x, cbind(lrpos, lowerlrpos, upperlrpos, lrposcum, lowerlrposcum, upperlrposcum)) b <- with(x, cbind(lrneg, lowerlrneg, upperlrneg, lrnegcum, lowerlrnegcum, upperlrnegcum)) a <- round(a, dec) b <- round(b, dec) colnames(a) <- c('LR+', l, u, 'Cum. LR+', l, u) colnames(b) <- c('LR-', l, u, 'Cum. LR-', l, u) rownames(a) <- rownames(b) <- rep('', nrow(a)) print(a) cat('\n') print(b) invisible() } Hmisc/R/dataRep.s0000644000176200001440000001256512250442016013267 0ustar liggesusersdataRep <- function(formula, data, subset, na.action) { call <- match.call() nact <- NULL y <- match.call(expand.dots=FALSE) if(missing(na.action)) y$na.action <- na.delete y[[1]] <- as.name("model.frame") X <- eval(y, sys.parent()) nact <- attr(X,"na.action") n <- nrow(X) nam <- names(X) p <- length(nam) types <- character(p) parms <- character(p) pctl <- vector('list',p) margfreq <- vector('list',p) Xu <- vector('list',p) for(j in 1:p) { namj <- nam[j] xj <- X[[j]] if(is.character(xj)) xj <- as.factor(xj) if(is.factor(xj)) { parms[[j]] <- paste(levels(xj),collapse=' ') types[j] <- 'exact categorical' } else if(inherits(xj,'roundN')) { atr <- attributes(xj) nam[j] <- atr$name types[j] <- 'round' parms[j] <- paste('to nearest',format(atr$tolerance)) if(length(w <- atr$clip)) parms[j] <- paste(parms[j],', clipped to [', paste(format(w),collapse=','),']',sep='') pctl[[j]] <- atr$percentiles } else { types[j] <- 'exact numeric' parms[j] <- '' pctl[[j]] <- quantile(xj, seq(0,1,by=.01)) } margfreq[[j]] <- table(xj) Xu[[j]] <- sort(unique(xj)) X[[j]] <- xj } names(types) <- names(parms) <- names(pctl) <- names(margfreq) <- names(Xu) <- nam Xu <- expand.grid(Xu) m <- nrow(Xu) count <- integer(m) for(i in 1:m) { matches <- rep(TRUE,n) for(j in 1:p) matches <- matches & (as.character(X[[j]]) == as.character(Xu[[j]][i])) count[i] <- sum(matches) } if(any(count==0)) { s <- count > 0 Xu <- Xu[s,] count <- count[s] m <- sum(s) } structure(list(call=call, formula=formula, n=n, names=nam, types=types, parms=parms, margfreq=margfreq, percentiles=pctl, X=Xu, count=count, na.action=nact), class='dataRep') } roundN <- function(x, tol=1, clip=NULL) { pct <- quantile(x, seq(0,1,by=.01), na.rm=TRUE) name <- deparse(substitute(x)) lab <- attr(x, 'label') if(!length(lab)) lab <- name if(!missing(clip)) x <- pmin(pmax(x,clip[1]),clip[2]) structure(as.single(tol*round(x/tol)), tolerance=tol, clip=clip, percentiles=pct, name=name, label=lab, class='roundN') } as.data.frame.roundN <- as.data.frame.vector '[.roundN' <- function(x, i, ...) { atr <- attributes(x) x <- unclass(x)[i] attributes(x) <- atr x } print.dataRep <- function(x, long=FALSE, ...) { cat("\n") cat("Data Representativeness n=",x$n,"\n\n", sep='') dput(x$call) cat("\n") if(length(z <- x$na.action)) naprint(z) specs <- data.frame(Type=x$types, Parameters=x$parms, row.names=x$names) cat('Specifications for Matching\n\n') print.data.frame(specs) X <- x$X if(long) { X$Frequency <- x$count cat('\nUnique Combinations of Descriptor Variables\n\n') print.data.frame(X) } else cat('\n',nrow(X), 'unique combinations of variable values were found.\n\n') invisible() } predict.dataRep <- function(object, newdata, ...) { n <- object$n count <- object$count if(missing(newdata)) return(count) pctl <- object$percentiles margfreq <- object$margfreq p <- length(margfreq) m <- nrow(newdata) nam <- object$names types <- object$types X <- object$X ##Xn <- if(length(model.frame.default$Des)) 3Aug02 ## model.frame(object$formula, newdata, na.action=na.keep, Des=FALSE) else Xn <- model.frame(object$formula, newdata, na.action=na.keep) names(Xn) <- nam worst.margfreq <- rep(1e8, m) pct <- matrix(NA, m, p, dimnames=list(row.names(Xn),nam)) for(j in 1:p) { xj <- Xn[[j]] freq <- margfreq[[nam[j]]][as.character(xj)] freq[is.na(freq)] <- 0 pct[,j] <- if(types[j]=='exact categorical') 100*freq/n else approx(pctl[[nam[j]]], seq(0,100,by=1), xout=newdata[[nam[j]]], rule=2)$y worst.margfreq <- pmin(worst.margfreq, freq) } cnt <- integer(m) for(i in 1:m) { matches <- rep(TRUE,nrow(X)) for(j in 1:p) { matches <- matches & (as.character(X[[j]]) == as.character(Xn[[j]][i])) } s <- sum(matches) if(s > 1) warning('more than one match to original data combinations') cnt[i] <- if(s) count[matches] else 0 } if(any(cnt > worst.margfreq)) warning('program logic error') structure(list(count=cnt, percentiles=pct, worst.margfreq=worst.margfreq, newdata=newdata), class='predict.dataRep') } print.predict.dataRep <- function(x, prdata=TRUE, prpct=TRUE, ...) { if(prdata) { dat <- x$newdata dat$Frequency <- x$count dat$Marginal.Freq <- x$worst.margfreq cat('\nDescriptor Variable Values, Estimated Frequency in Original Dataset,\nand Minimum Marginal Frequency for any Variable\n\n') print.data.frame(dat) } else { cat('\nFrequency in Original Dataset\n\n') print(x$count) cat('\nMinimum Marginal Frequency for any Variable\n\n') print(x$worst.margfreq) } if(prpct) { cat('\n\nPercentiles for Continuous Descriptor Variables,\nPercentage in Category for Categorical Variables\n\n') print(round(x$percentiles)) } invisible() } Hmisc/R/misc.get.s0000644000176200001440000001764613023765441013436 0ustar liggesusersspss.get <- function(file, lowernames=FALSE, datevars=NULL, use.value.labels=TRUE, to.data.frame=TRUE, max.value.labels=Inf, force.single=TRUE, allow=NULL, charfactor=FALSE, reencode=NA) { w <- read.spss(file, use.value.labels=use.value.labels, to.data.frame=to.data.frame, max.value.labels=max.value.labels, reencode=reencode) a <- attributes(w) vl <- a$variable.labels nam <- a$names nam <- makeNames(a$names, unique=TRUE, allow=allow) if(lowernames) nam <- casefold(nam) names(w) <- nam lnam <- names(vl) if(length(vl)) for(i in 1:length(vl)) { n <- lnam[i] lab <- vl[i] if(lab != '' && lab != n) label(w[[i]]) <- lab } attr(w, 'variable.labels') <- NULL if(force.single || length(datevars) || charfactor) for(v in nam) { x <- w[[v]] changed <- FALSE if(v %in% datevars) { x <- importConvertDateTime(x, 'date', 'spss') changed <- TRUE } else if(all(is.na(x))) { storage.mode(x) <- 'integer' changed <- TRUE } else if(!(is.factor(x) || is.character(x))) { if(all(is.na(x))) { storage.mode(x) <- 'integer' changed <- TRUE } else if(max(abs(x),na.rm=TRUE) <= (2^31-1) && all(floor(x) == x, na.rm=TRUE)) { storage.mode(x) <- 'integer' changed <- TRUE } } else if(charfactor && is.character(x)) { if(length(unique(x)) < .5*length(x)) { x <- sub(' +$', '', x) # remove trailing blanks x <- factor(x, exclude='') changed <- TRUE } } if(changed) w[[v]] <- x } w } csv.get <- function(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL, dateformat='%F', fixdates=c('none','year'), comment.char = "", autodates=TRUE, allow=NULL, charfactor=FALSE, sep=',', skip=0, vnames=NULL, labels=NULL, ...){ fixdates <- match.arg(fixdates) if(length(vnames)) vnames <- scan(file, what=character(0), skip=vnames-1, nlines=1, sep=sep, quiet=TRUE) if(length(labels)) labels <- scan(file, what=character(0), skip=labels-1, nlines=1, sep=sep, quiet=TRUE) w <- if(length(vnames)) read.csv(file, check.names=FALSE, comment.char=comment.char, header=FALSE, col.names=vnames, skip=skip, sep=sep, ...) else read.csv(file, check.names=FALSE, comment.char=comment.char, sep=sep, skip=skip, ...) n <- nam <- names(w) m <- makeNames(n, unique=TRUE, allow=allow) if(length(labels)) n <- labels if(lowernames) m <- casefold(m) changed <- any(m != nam) if(changed) names(w) <- m if(autodates) { tmp <- w names(tmp) <- NULL for(i in 1:length(tmp)) { if(! is.character(tmp[[1]])) next } } cleanup.import(w, labels=if(length(labels))labels else if(changed)n else NULL, datevars=datevars, datetimevars=datetimevars, dateformat=dateformat, fixdates=fixdates, charfactor=charfactor) } stata.get <- function(file, lowernames=FALSE, convert.dates=TRUE, convert.factors=TRUE, missing.type=FALSE, convert.underscore=TRUE, warn.missing.labels=TRUE, force.single=TRUE, allow=NULL, charfactor=FALSE, ...) { ## depends on the read.dta function from foreign ## Function to convert the elements of w into more compact ## data storage types. convertObjs <- function(x, charfactor, force.single) { ## Date is not nessarely a integer but it ignores any ## fraction it might have if((inherits(x, 'Date') || is.factor(x)) && storage.mode(x) != 'integer') { storage.mode(x) <- 'integer' } else if(charfactor && is.character(x)) { ## If x is a character and arg charfactor is TRUE then ## convert x to a factor if the number of unique values of x is less ## than half the total number of values in x if(length(unique(x)) < length(x) / 2) { x <- sub(' +$', '', x) # remove trailing blanks x <- factor(x, exclude='') } } else if(is.numeric(x)) { if(all(is.na(x))) { ## if all values are NA then convert to integer because ## it is 4 bytes instead of 8 storage.mode(x) <- 'integer' } else if(force.single && max(abs(x), na.rm=TRUE) <= (2^31-1) && all(floor(x) == x, na.rm=TRUE)) { ## convert x to integer if arg force.single is TRUE and the maximum ## absolute value of x is less then maximum value that an integer ## can store. storage.mode(x) <- 'integer' } } return(x) } ## A function to create additional attributes to add to the elements of ## w create.attribs <- function(var.label, val.label, format, label.table) { attribs <- list() if(format != '') { attribs$format <- format } ## Translate var labels into Hmisc var lables if(var.label != '') { attribs$label <- var.label } ## The label.table values are found by looking a the checking to see ## if there is a non-empty value in val.labels. That value corrasponds ## a named element in label.table. ## Check to see if val.label is not empty and it is one of the ## names in label.table and that its value is not NULL if(val.label != '' && val.label %in% names(label.table) && !is.null(label.table[[val.label]])) { attribs$value.label.table <- label.table[[val.label]] } return(attribs) } ## Read the stata file into w w <- read.dta(file, convert.dates=convert.dates, convert.factors=convert.factors, missing.type=missing.type, convert.underscore=convert.underscore, warn.missing.labels=warn.missing.labels, ...) ## extract attributes from w a <- attributes(w) num.vars <- length(w) ## Do translate attributes names into R names nam <- makeNames(a$names, unique=TRUE, allow=allow) if(lowernames) nam <- casefold(nam, upper=FALSE) a$names <- nam ## If var.labels is empty then create a empty char vector. if(!length(a$var.labels)) { a$var.labels <- character(num.vars) } ## If val.labels is empty then create an empty char vector. if(length(a$val.labels)) { val.labels <- a$val.labels } else { val.labels <- character(num.vars) } ## create list of attributes for the elements in w. An mapply is faster ## then a for loop in large data sets. attribs <- mapply(FUN=create.attribs, var.label=a$var.labels, val.label=val.labels, format=a$formats, MoreArgs=list(label.table=a$label.table), SIMPLIFY=FALSE) ## clear var.labels attribute attr(w, 'var.labels') <- NULL ## Convert the elements of w as needed w <- lapply(w, FUN=convertObjs, force.single=force.single, charfactor=charfactor) ## strip off the naming info for w w <- unname(w) ## add the new attributes to the current attributes of ## the elements of w for(i in seq(along.with=w)) { ## Set the label for the element if('label' %in% names(attribs[[i]])) { label(w[[i]]) <- attribs[[i]]$label ## clear the label value from attribs[[i]] attribs[[i]]$label <- NULL } ## combine the new attribs with the current attributes consolidate(attributes(w[[i]])) <- attribs[[i]] } ## add the names, rownames, class variables, and some extra stata ## info back to w stata.info <- a[c('datalabel','version','time.stamp','val.labels','label.table')] attributes(w) <- c(a[c('names','row.names','class')], stata.info=list(stata.info)) return(w) } Hmisc/R/latexTherm.s0000644000176200001440000000665714112731327014036 0ustar liggesuserslatexTherm <- function(y, name, w=.075, h=.15, spacefactor=1/2, extra=.07, file='', append=TRUE) { ct <- function(..., append=TRUE) cat(..., file=file, append=append, sep='') ct('\\def\\', name, '{\n', append=append) tab <- attr(y, 'table') if(length(tab)) { ct('\\protect\\tooltipn{\n') } ct('\\setlength{\\unitlength}{.001in}\n') k <- length(y) W <- k * w + (k-1) * spacefactor * w z <- function(a) round(a * 1000) ct('\\begin{picture}(', z(W + extra), ',', z(h + extra), ')\n') x <- 0 for(i in 1 : k) { b <- y[i] if(! is.na(b)) { if(b < 1) { # Draw frame if not completely filled ct('\\linethickness{.05pt}\n') ct('\\put(', z(x), ', 0){\\line(1, 0){', z(w), '}}\n') ct('\\put(', z(x + w), ', 0){\\line(0, 1){', z(h), '}}\n') ct('\\put(', z(x + w), ',', z(h), '){\\line(-1, 0){', z(w), '}}\n') ct('\\put(', z(x), ',', z(h), '){\\line(0, -1){', z(h), '}}\n') } if(b > 0) { ct('\\linethickness{', w, 'in}\n') ct('\\put(', z(x + w / 2), ', 0){\\line(0,1){', z(h * b), '}}\n') } } x <- x + w + spacefactor * w } ct('\\end{picture}', if(length(tab)) '}{\n', tab, if(length(tab)) '}', '}\n') } latexNeedle <- function(y, x=NULL, col='black', href=0.5, name, w=.05, h=.15, extra=0, file='', append=TRUE) { ct <- function(..., append=TRUE) cat(..., file=file, append=append, sep='') ct('\\def\\', name, '{%\n', append=append) tab <- attr(y, 'table') if(length(tab)) { ct('\\protect\\tooltipn{%\n') } ct('\\setlength{\\unitlength}{.001in}%\n') k <- length(y) col <- rep(col, length.out=k) W <- max(k, 2) * w z <- function(a) round(a * 1000) ct('\\begin{picture}(', z(W + extra), ',', z(h), ')%\n') ## Draw grayscale frame ct('\\linethickness{.05pt}\\color[gray]{0.85}%\n') ct('\\put(0,0){\\line(1,0){', z(W), '}}%\n') # ct('\\put(', z(W), ',0){\\line(0,1){', z(h), '}}%\n') ct('\\put(', z(W), ',', z(h), '){\\line(-1,0){', z(W), '}}%\n') # ct('\\put(0,', z(h), '){\\line(0,-1){', z(h), '}}%\n') ## Draw horizontal reference lines if(length(href)) for(hr in href) ct('\\put(0,', z(h * hr), '){\\line(1,0){', z(W), '}}%\n') ## Draw vertical needles ## If x is given, scale to w / 2 to k * w / 2 x <- if(length(x)) { r <- range(x) w / 2 + (k - 1) * w / 2 * (x - r[1]) / diff(r) } else seq(w / 2, k * w / 2, length.out=k) ct('\\linethickness{1.55pt}%\n') for(i in 1 : k) { b <- y[i] if(! is.na(b)) { co <- paste(round(col2rgb(col[i]) / 255, 3), collapse=',') ct('\\color[rgb]{', co, '}') ct('\\put(', z(x[i]), ',0){\\line(0,1){', z(h * b), '}}%\n') } } ct('\\end{picture}', if(length(tab)) '}{%\n', tab, if(length(tab)) '}', '}%\n') } pngNeedle <- function(y, x=NULL, col='black', href=0.5, lwd=3.5, w=6, h=18, file=tempfile(fileext='.png')) { k <- length(y) col <- rep(col, length.out=k) png(file, width=1 + k * w, height=h) par(mar=rep(0,4)) plot.new() par(usr=c(0, 1, 0, 1)) if(length(href)) { href <- c(0, href, 1) abline(h=href, col=gray(0.8)) } ## If x is given, scale to [0.025, 0.975] x <- if(length(x)) { r <- range(x) 0.025 + 0.95 * (x - r[1]) / diff(r) } else seq(0.025, 0.975, length.out=k) for(i in 1 : k) lines(c(x[i], x[i]), c(0, y[i]), col=col[i], lwd=lwd) dev.off() invisible(file) } Hmisc/R/makeNstr.s0000644000176200001440000000035712243661443013500 0ustar liggesusersmakeNstr <- function(char, len) { mapply(function(char, len) { if(is.na(len)) { '\n' } else if(len == 0) { '' } else { paste(rep.int(x=char, times=len), collapse='') } }, char, len, USE.NAMES=FALSE) } Hmisc/R/num.intercepts.s0000644000176200001440000000063712243661443014673 0ustar liggesusersnum.intercepts <- function(fit, type=c('fit', 'var', 'coef')) { type <- match.arg(type) nrp <- fit$non.slopes if(!length(nrp)) { nm1 <- names(fit$coef)[1] nrp <- 1*(nm1=="Intercept" | nm1=="(Intercept)") } if(type == 'fit') return(nrp) w <- if(type == 'var') fit$var else fit$coefficients i <- attr(w, 'intercepts') li <- length(i) if(!li) return(nrp) if(li == 1 && i == 0) 0 else li } Hmisc/R/find.matches.s0000644000176200001440000001165112250355511014250 0ustar liggesusersfind.matches <- function(x, y, tol=rep(0,ncol(y)), scale=tol, maxmatch=10) { ##if(length(dim(x))==0) x <- matrix(x, nrow=1) 10may02 if(!is.matrix(x)) x <- as.matrix(x) n <- nrow(x) p <- ncol(x) if(!is.matrix(y)) y <- as.matrix(y) ## 10may02 if(p != ncol(y)) stop("number of columns of x and y must match") ny <- nrow(y) rown <- dimnames(x)[[1]] ry <- dimnames(y)[[1]] matches <- matrix(if(length(ry)) "" else 0, n, maxmatch, dimnames=list(rown, paste("Match #",1:maxmatch,sep=""))) distance <- matrix(NA, n, maxmatch, dimnames=list(rown, paste("Distance #",1:maxmatch,sep=""))) if(length(ry)==0) ry <- 1:ny scale <- ifelse(scale==0,1,tol) ones <- rep(1,p) mx <- 0 for(i in 1:n) { dif <- abs(y - rep(x[i,], rep.int(ny,p))) toll <- rep(tol, rep.int(nrow(dif),p)) which <- (1:ny)[((dif > toll) %*% ones)==0] lw <- length(which) if(lw) { scaled <- dif[which,,drop=FALSE]/rep(scale, rep.int(lw,p)) dist <- (scaled^2) %*% ones lw <- min(lw,maxmatch) mx <- max(mx,lw) d <- order(dist)[1:lw] matches[i,1:lw] <- ry[which[d]] distance[i,1:lw] <- dist[d] } } structure(list(matches=matches[,1:mx], distance=distance[,1:mx]), class="find.matches") } print.find.matches <- function(x, digits=.Options$digits, ...) { cat("\nMatches:\n\n") print(x$matches, quote=FALSE) cat("\nDistances:\n\n") print(x$distance, digits=digits) invisible() } summary.find.matches <- function(object, ...) { mat <- object$matches dist <- object$distance cat("Frequency table of number of matches found per observation\n\n") m <- (!is.na(dist)) %*% rep(1,ncol(mat)) print(table(m)) cat("\nMedian minimum distance by number of matches\n\n") print(tapply(dist[m>0,1], m[m>0], median)) ta <- table(mat[m>0,1]) ta <- ta[ta>1] if(length(ta)) { cat("\nObservations selected first more than once (with frequencies)\n\n") print(ta) } else cat("\nNo observations selected first more than once\n\n") invisible() } matchCases <- function(xcase, ycase, idcase=names(ycase), xcontrol, ycontrol, idcontrol=names(ycontrol), tol=NULL, maxobs=max(length(ycase),length(ycontrol))*10, maxmatch=20, which=c('closest','random')) { if(!length(tol)) stop('must specify tol') if((length(xcase)!=length(ycase)) || (length(xcontrol)!=length(ycontrol))) stop('lengths of xcase, ycase and of xcontrol, ycontrol must be same') which <- match.arg(which) ycase <- as.matrix(ycase) ycontrol <- as.matrix(ycontrol) if(!length(idcase)) idcase <- 1:length(ycase) if(!length(idcontrol)) idcontrol <- 1:length(ycontrol) idcase <- as.character(idcase) idcontrol <- as.character(idcontrol) j <- is.na(ycase %*% rep(1,ncol(ycase))) | is.na(xcase) if(any(j)) { warning(paste(sum(j),'cases removed due to NAs')) ycase <- ycase[!j,,drop=FALSE] xcase <- xcase[!j] idcase <- idcase[!j] } j <- is.na(ycontrol %*% rep(1,ncol(ycontrol))) | is.na(xcontrol) if(any(j)) { warning(paste(sum(j),'controls removed due to NAs')) ycontrol <- ycontrol[!j,,drop=FALSE] xcontrol <- xcontrol[!j] idcontrol <- idcontrol[!j] } idCase <- id <- character(maxobs) type <- factor(rep(NA,maxobs), c('case','control')) x <- numeric(maxobs) y <- matrix(NA, ncol=ncol(ycase), nrow=maxobs) last <- 0 ncase <- length(ycase) ncontrol <- length(ycontrol) matches <- integer(ncase) for(i in 1:ncase) { s <- abs(xcontrol-xcase[i]) <= tol nmatch <- sum(s) if(nmatch > maxmatch) { s <- (1:ncontrol)[s] ## next line was sample(j,...) 4jun02 if(which=="random") s <- sample(s, maxmatch, replace=FALSE) else { errors <- abs(xcontrol[s]-xcase[i]) serrors <- order(errors) s <- (s[serrors])[1:maxmatch] } nmatch <- maxmatch } matches[i] <- nmatch if(!nmatch) next end <- last + nmatch + 1 if(end > maxobs) stop(paste('needed maxobs >',maxobs)) start <- last+1 last <- end idCase[start:end] <- rep(idcase[i], nmatch+1) type[start:end] <- c('case',rep('control',nmatch)) id[start:end] <- c(idcase[i], idcontrol[s]) x[start:end] <- c(xcase[i], xcontrol[s]) y[start:end,] <- rbind(ycase[i,,drop=FALSE], ycontrol[s,,drop=FALSE]) } cat('\nFrequencies of Number of Matched Controls per Case:\n\n') print(table(matches)) cat('\n') structure(list(idcase=idCase[1:end], type=type[1:end], id=id[1:end], x=x[1:end], y=drop(y[1:end,])), row.names=as.character(1:end), class='data.frame') } Hmisc/R/responseSummary.s0000644000176200001440000002133612250441455015125 0ustar liggesusers## $Id$ responseSummary <- function(formula, data, na.action=na.pass, FUN=function(y) sapply(y, mean), fun, overall=TRUE, continuous=10, na.rm=TRUE, na.include=TRUE, g, quantile.groups=4, groups=quantile.groups, nmin=0, ...) { func.call <- match.call() ## Print warnings for obsolete function arguments if(!missing(g)) { warning("argument g is depricated; use quantile.groups instead", immediate. = TRUE) quantile.groups <- g } if(!missing(fun)) { warning("argument fun is depreicated; use FUN instead", immediate. = TRUE) FUN <- fun } ## create model.frame call to create data.frame needed to use formula. m <- GetModelFrame(formula=formula,specials="stratify", default.na.action=na.action) Terms <- attr(m, "terms") ## Extract response and remove from model Y <- model.extract(m, "response") if(is.null(Y)) stop("must have a variable on the left hand side of the formula") yname <- names(m)[1] m <- m[-1] ylabel <- valueLabel(Y) yunit <- valueUnit(Y) ## extract stratified variables from m or create a blank ## strat if non exists. if(!is.null(attr(Terms, 'specials')$stratify)) { temp <- untangle.specials(Terms, 'stratify') if(length(temp$vars) == 1) stratified <- m[[temp$vars]] else { stratified <- stratify(m[,temp$vars]) } ## Get labels and names of stratified variables stratified.Tags <- valueTags(stratified) newTerms <- drop.terms(Terms, dropx=temp$terms) } else { stratified <- factor(rep('',nrow(m))) stratified.Tags <- NULL newTerms <- delete.response(Terms) } ## Number of stratified terms nstratified <- length(levels(stratified)) ## Create X from m using newTerms. X <- GetModelFrame(formula=newTerms, default.na.action=na.action) ## Throw warning if name overall exists in X if("Overall" %in% names(X) && overall) stop("Data Frame contains a column named 'Overall'; Name confilcts with 'overall=TRUE' argument in function") funlab <- NULL ## Check to see if fun = "%" if(!is.function(FUN)) { if (FUN == '%') { FUN <- function(y) { stats <- 100 * apply(y, 2, mean) names(stats) <- paste(dimnames(y)[[2]], "%") stats } funlab <- paste("% of", yname) } else FUN <- match.fun(FUN) } ## Compute number of descriptive statistics per cell ## find vector of rows that are NA s <- is.na(Y) if(is.matrix(s)) s <- as.vector(s %*% rep(1, ncol(s)), mode="logical") ## Run fun on non NA elements of Y if(is.matrix(Y)) stats <- FUN(Y[!s,, drop=FALSE]) else stats <- FUN(Y[!s, drop=FALSE]) nstats <- length(stats) ## Create the names of the columns of summary output dn <- dimnames(stats) if(length(dn) == 2) name.stats <- as.vector(outer(dn[[1]], dn[[2]], FUN=function(a,b) paste(b,a))) else name.stats <- names(stats) if(is.null(name.stats)) { if(nstats == 1) name.stats <- yname else name.stats <- paste(yname, 1:nstats, sep="") } ## Figure out the funlab name if(is.null(funlab)) funlab <- yname ## find number of missing observations numberMissing <- sum(s) if(numberMissing) { if(is.matrix(Y)) Y <- Y[!s,, drop=FALSE] else Y <- Y[!s, drop=FALSE] X <- X[!s,, drop=FALSE] stratified <- stratified[!s] } ## Compute total number of columns ncolumns <- nstratified * (1 + nstats) colNames <- rep(c('N', name.stats), nstratified) ## Initialize default values n <- NROW(X) subsetX <- function(x, ...) { tags <- valueTags(x) if(length(x) == 0) { return(x) } if(!is.matrix(x)) { ## Find all na's in x s <- is.na(x) ## If x is not a category make it into one if(! is.factor(x)) { ## Find the all the unique non-null values of x xUnique <- unique(x[!is.na(x)]) ## If the number of unique values is less then then number deemed ## to be continuous treat as a factor if(length(xUnique) < continuous) x <- factor(x) else x <- cut2(x, g=quantile.groups, ...) } if(is.function(na.include) && any(s)) x <- na.include(x) if(nmin > 0) { nn <- table(x) levels(x) <- ifelse(nn >= nmin, names(nn), NA) } } else { cnames <- colnames(x) if(!length(cnames)) stop("matrix variable must have column dimnames") if(! is.logical(x)){ ## Coerce x to logical if(is.numeric(x)) x <- x==1 else { x <- structure(casefold(x), dim=dim(x)) x <- x=='present' | x=='yes' | x=='true' } } colnames(x) <- cnames attr(x, "levels") <- cnames ## see if there are any stragulars if(nmin > 0) { nn <- apply(x, 2, sum, na.rm=TRUE) x <- x[,nn >= nmin] } ## Convert the true falses to column name or NA x <- ifelse(x, rep(cnames, each=n), NA) } valueTags(x) <- tags return(x) } ## Subset X X <- lapply(X, FUN=subsetX, ...) ## if(is.matrix(Y)) { ## Y <- split(Y, row(Y)) ## ## procY <- function(y) do.call(rbind, y) ## } else { ## procY <- function(y) y ## } comp.stats <- function(grouped.y) { ans <- c(length(grouped.y), FUN(grouped.y)) names(ans) <- c('N', name.stats) ans } ## create stats for each element of X processX <- function(x) { if(is.mChoice(x)) { } else { xstats <- tapply(Y, list(x, stratified), FUN=comp.stats) } valueTags(xstats) <- valueTags(x) xstats } Xstats <- lapply(X, FUN=processX) ## if overall selected add Overall column if(overall) { overall <- tapply(Y, stratified, FUN=comp.stats) overall <- matrix(overall, ncol=dim(overall), dimnames=list(NULL, dimnames(overall)[[1]])) Xstats$Overall <- overall } # str(Xstats) newAttr <- list(terms=Terms, call=match.call(), n=n, nmissing=numberMissing, yname=yname, ylabel=ylabel, ycolnames=colnames(Y), funlab=funlab, stratified.Tags=stratified.Tags, stratified.levels=levels(stratified)) attributes(Xstats) <- c(attributes(Xstats), newAttr) class(Xstats)<- 'responseSummary' return(Xstats) } print.responseSummary <- function(x, valueNames = c('labels','names'), vnames, printUnits = TRUE, prUnits, abbreviate.dimnames=FALSE, prefix.width, min.colwidth, formatArgs=NULL, ...) { if(missing(valueNames) && !missing(vnames)){ warning("argument vnames is depricated; use valueNames instead", immediate. = TRUE) valueNames <- vnames } if(missing(printUnits) && !missing(prUnits)){ warning("argument prUnits is depricated; use printUnits instead", immediate. = TRUE) printUnits <- prUnits } x.orig <- x ## fuzy match value of varNames to default options valueNames <- match.arg(valueNames) ## Get attributes of x for further use xattribs <- attributes(x) attributes(x) <- NULL ## Set useLabels flag to TRUE if user wants to use labels ## instead of names useLabel <- valueNames == 'labels' if(useLabel && !is.null(xattribs$ylabel)) { yname <- xattribs$ylabel } else { yname <- xattribs$yname } cat(yname) ## If more then one stratifed levels make by line if(length(xattribs$stratified.levels) > 1) { if(useLabel && !is.null(xattribs$stratified.Tags$label)) { strat.name <- xattribs$stratified.Tags$label } else { strat.name <- xattribs$stratifed.Tags$label } cat(' by', strat.name) } cat(' N=', xattribs$n, sep='') if(xattribs$nmissing) { cat(' ,', xattribs$nmissing, 'Missing') } cat('\n\n') if(useLabel) { labels <- unlist(lapply(x, function(x) if(is.null(lab <- valueLabel(x))) NA else lab)) names(x) <- ifelse(is.na(labels), xattribs$names, labels) } print.char.list(x, abbreviate.dimnames=abbreviate.dimnames, print.it=TRUE, ...) invisible(x.orig) } latex.responseSummary <- function(object, title=first.word(deparse(substitute(object))), caption, trios, vnames=c('labels', 'names'), prn=TRUE, prUnits=TRUE, rowlabel='', cdec=2, ncaption=TRUE, ...) { ## Fix lazy evaluation title <- title } Hmisc/R/strwrap.s0000644000176200001440000000506712243661443013421 0ustar liggesusersif(!exists('strwrap')) { strwrap <- function (x, width = 0.9 * getOption("width"), indent = 0, exdent = 0, prefix = "", simplify = TRUE) { indentString <- paste(rep.int(" ", indent), collapse = "") exdentString <- paste(rep.int(" ", exdent), collapse = "") y <- list() z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]") for (i in seq(along = z)) { yi <- character(0) for (j in seq(along = z[[i]])) { words <- z[[i]][[j]] nc <- nchar(words, type = "w") if (any(is.na(nc))) { nc0 <- nchar(words) nc[is.na(nc)] <- nc0[is.na(nc)] } if (any(nc == 0)) { zLenInd <- which(nc == 0) zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$", words) + 1))] if (length(zLenInd) > 0) { words <- words[-zLenInd] nc <- nc[-zLenInd] } } if (length(words) == 0) { yi <- c(yi, "", prefix) next } currentIndex <- 0 lowerBlockIndex <- 1 upperBlockIndex <- integer(0) lens <- cumsum(nc + 1) first <- TRUE maxLength <- width - nchar(prefix, type = "w") - indent while (length(lens) > 0) { k <- max(sum(lens <= maxLength), 1) if (first) { first <- FALSE maxLength <- maxLength + indent - exdent } currentIndex <- currentIndex + k if (nc[currentIndex] == 0) upperBlockIndex <- c(upperBlockIndex, currentIndex - 1) else upperBlockIndex <- c(upperBlockIndex, currentIndex) if (length(lens) > k) { if (nc[currentIndex + 1] == 0) { currentIndex <- currentIndex + 1 k <- k + 1 } lowerBlockIndex <- c(lowerBlockIndex, currentIndex + 1) } if (length(lens) > k) lens <- lens[-(1:k)] - lens[k] else lens <- NULL } nBlocks <- length(upperBlockIndex) s <- paste(prefix, c(indentString, rep.int(exdentString, nBlocks - 1)), sep = "") for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]], collapse = " "), sep = "") yi <- c(yi, s, prefix) } y <- c(y, list(yi[-length(yi)])) } if (simplify) y <- unlist(y) y } } Hmisc/R/bpower.s0000644000176200001440000000461312243661443013211 0ustar liggesusersbpower <- function(p1, p2, odds.ratio, percent.reduction, n, n1, n2, alpha=.05) { if(!missing(odds.ratio)) p2 <- p1*odds.ratio/(1-p1+p1*odds.ratio) else if(!missing(percent.reduction)) p2 <- p1*(1-percent.reduction/100) if(!missing(n)) { n1 <- n2 <- n/2 } z <- qnorm(1-alpha/2) q1 <- 1-p1 q2 <- 1-p2 pm <- (n1*p1+n2*p2)/(n1+n2) ds <- z*sqrt((1/n1 + 1/n2)*pm*(1-pm)) ex <- abs(p1-p2) sd <- sqrt(p1*q1/n1+p2*q2/n2) c(Power = 1-pnorm((ds-ex)/sd)+pnorm((-ds-ex)/sd) ) } bsamsize <- function(p1, p2, fraction=.5, alpha=.05, power=.8) { z.alpha <- qnorm(1-alpha/2) z.beta <- qnorm(power) ratio <- (1-fraction)/fraction p <- fraction*p1+(1-fraction)*p2 n1 <- (z.alpha * sqrt((ratio+1) * p * (1-p)) + z.beta * sqrt(ratio * p1 * (1-p1) + p2 * (1 - p2)) )^2/ratio/((p1-p2)^2) n2 <- ratio*n1 c(n1=n1, n2=n2) } ballocation <- function(p1, p2, n, alpha=.05) { q1 <- 1-p1 q2 <- 1-p2 f.minvar.diff <- 1/(1+sqrt(p2*q2/(p1*q1))) f.minvar.ratio <- 1/(1+sqrt(p1*q2/p2/q1)) z <- c(fraction.group1.min.var.diff=f.minvar.diff, fraction.group1.min.var.ratio=f.minvar.ratio, fraction.group1.min.var.logodds=1-f.minvar.diff) if(!missing(n)) { possf <- seq(.001,.999,length=1000) pow <- bpower(p1, p2, n1=n*possf, n2=n*(1-possf), alpha=alpha) ## fun <- function(f, n, p1, p2, alpha) bpower(p1, p2, n1=f*n, n2=(1-f)*n, alpha=alpha) ## f.maxpow <- optimize(fun, lower=.01, upper=.99, maximum=T, ## n=n, p1=p1, p2=p2, alpha=alpha)$maximum f <- possf[pow==max(pow)] f <- f[abs(f-.5)==min(abs(f-.5))] z <- c(z, fraction.group1.max.power=f[1]) } z } bpower.sim <- function(p1, p2, odds.ratio, percent.reduction, n, n1, n2, alpha=.05, nsim=10000) { if(!missing(odds.ratio)) p2 <- p1*odds.ratio/(1-p1+p1*odds.ratio) else if(!missing(percent.reduction)) p2 <- p1*(1-percent.reduction/100) if(!missing(n)) { n1 <- n2 <- round(n/2) } n <- n1+n2 if(length(p1)+length(p2)+length(n1)+length(n2)+length(alpha)+length(nsim)!=6) stop('all arguments must have length 1') chi2 <- qchisq(1-alpha, 1) d1 <- rbinom(nsim, n1, p1) d2 <- rbinom(nsim, n2, p2) chisq <- n*(d1*(n2-d2)-(n1-d1)*d2)^2/(d1+d2)/(n-d1-d2)/n1/n2 power <- mean(chisq>chi2) se <- sqrt(power*(1-power)/nsim) c(Power=power,Lower=power-1.96*se,Upper=power+1.96*se) } Hmisc/R/samplesize.bin.s0000644000176200001440000000155112243661443014634 0ustar liggesusers## Rick Chappell <> Asst. Professor, Depts. of Statistics and Human Oncology ## <> University of Wisconsin at Madison <> chappell@stat.wisc.edu ## (608) 263-5572 / 262-2733 <> take logs samplesize.bin <- function(alpha, beta, pit, pic, rho=.5) { ## alpha is the scalar ONE-SIDED test size, or two-sided size/2 ## beta is a scalar or vector of powers ## pit is the hypothesized treatment probability of success ## pic is the hypothesized control probability of success ## returns required TOTAL sample size, using arcsin transformation ## rho is the proportion of the sample devoted to treated group (0 0 nmiss[j] <- sum(isna) omit <- omit | isna } } if(any(omit)) { rn <- row.names(frame) frame <- frame[!omit,,drop=FALSE] names(nmiss) <- names(frame) ## a %ia% b terms are included - delete them since main effects ## already counted (next 2 stmts reinstated 27Oct93) i <- grep("%ia%", names(nmiss)) if(length(i)>0) nmiss <- nmiss[-i] attr(frame,"nmiss") <- nmiss # for backward compatibility temp <- seq(omit)[omit] names(temp) <- rn[omit] na.info <- list(nmiss=nmiss, omit=temp, na.detail.response=y.detail) class(na.info) <- "delete" attr(frame, "na.action") <- na.info } frame } naprint.delete <- function(x, ...) { if(length(g <- x$nmiss)) { cat("Frequencies of Missing Values Due to Each Variable\n") print(g) cat("\n") } if(length(g <- x$na.detail.response)) { cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n") print(unclass(g)) cat("\n") } invisible() } globalVariables("naresid.omit") naresid.delete <- napredict.delete <- function(omit, x, ...) { omit <- omit$omit if(exists('naresid.omit')) naresid.omit(omit, x) else { if(!existsFunction('naresid.exclude')) naresid.exclude <- getFromNamespace('naresid.exclude','stats') naresid.exclude(omit, x) } } nafitted.delete <- function(obj, x) { omit <- obj$omit if(exists('naresid.omit')) naresid.omit(omit, x) else getFromNamespace('naresid.exclude','stats')(omit, x) } Hmisc/R/histbackback.s0000644000176200001440000000365213361120173014316 0ustar liggesusershistbackback <- function(x, y, brks = NULL, xlab = NULL, axes = TRUE, probability = FALSE, xlim = NULL, ylab='', ...) { if(length(xlab)) xlab <- rep(xlab, length = 2) if(is.list(x)) { namx <- names(x) y <- x[[2]] if(!length(xlab)) { if(length(namx)) xlab <- namx[1:2] else { xlab <- deparse(substitute(x)) xlab <- paste(xlab, c("x", "y"), sep = "$") } } x <- x[[1]] } else if(!length(xlab)) xlab <- c(deparse(substitute(x)), deparse(substitute(y))) if(!length(brks)) brks <- hist(c(x, y), plot = FALSE)$breaks ll <- hist(x, breaks = brks, plot = FALSE) rr <- hist(y, breaks = brks, plot = FALSE) if(probability) { ll$counts <- ll$density rr$counts <- rr$density } if(length(xlim) == 2) xl <- xlim else { xl <- pretty(range(c( - ll$counts, rr$counts))) xl <- c(xl[1], xl[length(xl)]) } if(length(ll$counts) > 0) { barplot(-ll$counts, xlim=xl, space=0, horiz=TRUE, axes=FALSE, col=0, ...) par(new = TRUE) } if(length(rr$counts) > 0) barplot(rr$counts, xlim=xl, space=0, horiz=TRUE, axes=FALSE, col=0, ...) if(axes) { mgp.axis(1, at=pretty(xl), labels=format(abs(pretty(xl)))) del <- (brks[2]-brks[1] - (brks[3]-brks[2]))/2 brks[1] <- brks[1] + del brks[-1] <- brks[-1] - del at <- 0 : (length(brks) - 1) pb <- pretty(brks) atpb <- approxExtrap(brks, at, xout=pb)$y mgp.axis(2, at=atpb, labels=format(pb)) title(xlab = xlab[1], adj = (-0.5 * xl[1])/( - xl[1] + xl[2])) title(xlab = xlab[2], adj = (-xl[1] + 0.5 * xl[2])/(-xl[1] + xl[2])) if(ylab!='') title(ylab=ylab) } abline(v = 0) box() invisible(list(left = ll$counts, right = rr$counts, breaks = brks)) } Hmisc/R/popower.s0000644000176200001440000003202514244662625013412 0ustar liggesuserspopower <- function(p, odds.ratio, n, n1, n2, alpha=.05) { if(missing(n)) n <- n1 + n2 else { n1 <- n2 <- n / 2 } p <- p[! is.na(p)] if(abs(sum(p) - 1) > .00001) stop('probabilities in p do not add up to 1') z <- qnorm(1 - alpha / 2) A <- n2 / n1 ps <- 1 - sum(p ^ 3) V <- n1 * n2 * n / 3 / ((n + 1) ^ 2) * ps power <- pnorm(abs(logb(odds.ratio)) * sqrt(V) - z) eff <- ps / (1 - 1 / n / n) structure(list(power=power, eff=eff, approx.se=1./sqrt(V)), class='popower') } print.popower <- function(x, ...) { cat('Power:',round(x$power,3), '\nEfficiency of design compared with continuous response:', round(x$eff, 3), '\nApproximate standard error of log odds ratio:', round(x$approx.se, 4), '\n\n') invisible() } posamsize <- function(p, odds.ratio, fraction=.5, alpha=.05, power=.8) { p <- p[!is.na(p)] if(abs(sum(p) - 1) > .00001) stop('probabilities in p do not add up to 1') A <- (1 - fraction) / fraction log.or <- logb(odds.ratio) z.alpha <- qnorm(1 - alpha / 2) z.beta <- qnorm(power) ps <- 1 - sum(p ^ 3) n <- 3 * ((A + 1) ^ 2) * (z.alpha + z.beta) ^ 2 / A / (log.or ^ 2) / ps eff <- ps / (1 - 1 / n / n) structure(list(n=n, eff=eff), class='posamsize') } print.posamsize <- function(x, ...) { cat('Total sample size:',round(x$n, 1), '\nEfficiency of design compared with continuous response:', round(x$eff, 3),'\n\n') invisible() } pomodm <- function(x=NULL, p, odds.ratio=1) { if(length(x) && (length(x) != length(p))) stop('p and x must have same length') if(length(x) && any(diff(x) <= 0)) stop('x is not sorted or has duplicates') if(abs(sum(p) - 1) > .00001) stop('probabilities do not sum to 1') ## Compute cumulative probabilities (exceedances) cp <- function(p) c(1, 1 - cumsum(p)[- length(p)]) ## Compute individual probabilities given exceedance probabilities ip <- function(ep) { p <- c(-diff(ep), ep[length(ep)]) if(abs(sum(p) - 1.) > 1e-7) stop('logic error') p } ## Function to alter a vector of individual probabilities by a given ## odds ratio on the exceedance probabilities pmod <- function(p, or) { ## Compute exceedence probabilities ep <- cp(p) ## Apply odds ratio ep <- plogis(qlogis(ep) + log(or)) ## Get back to individual probabilities ip(ep) } ## Convert individual probabilities under the odds ratio p <- pmod(p, odds.ratio) if(! length(x)) return(p) ## Compute mean and weighted median xbar <- sum(p * x) xmed1 <- approx(cumsum(p), x, xout=0.5)$y xmed2 <- approx(rev(cumsum(rev(p))), x, xout=0.5)$y xmed <- (xmed1 + xmed2) / 2 c(mean=xbar, median=xmed) } simPOcuts <- function(n, nsim=10, odds.ratio=1, p) { if(abs(sum(p) - 1.) > 1e-5) stop('probabilities in p must sum to 1') p0 <- p p1 <- pomodm(p=p0, odds.ratio=odds.ratio) lp <- length(p) yval <- if(length(names(p))) names(p) else as.character(1 : lp) or <- matrix(NA, nrow=nsim, ncol=lp - 1, dimnames=list(paste('Simulation', 1 : nsim), paste0('y>=', yval[-1]))) for(i in 1 : nsim) { y0 <- sample(1 : lp, n / 2, prob=p0, replace=TRUE) y1 <- sample(1 : lp, n / 2, prob=p1, replace=TRUE) for(ycut in 2 : lp){ prop0 <- mean(y0 >= ycut) prop1 <- mean(y1 >= ycut) or[i, ycut - 1] <- (prop1 / (1. - prop1)) / (prop0 / (1. - prop0)) } } or } simRegOrd <- function(n, nsim=1000, delta=0, odds.ratio=1, sigma, p=NULL, x=NULL, X=x, Eyx, alpha=0.05, pr=FALSE) { if (!requireNamespace("rms", quietly = TRUE)) stop("This function requires the 'rms' package.") if(length(x) && (length(x) != n)) stop('x must be omitted or have length n') if((n %% 2) != 0) stop('n must be an even integer') treat <- c(rep(0, n / 2), rep(1, n / 2)) X <- cbind(X, treat) betas <- se <- rep(NA, nsim) if(length(p)) { p1 <- pomodm(p=p, odds.ratio=odds.ratio) yordinal <- 0 : (length(p) - 1) } xb <- delta * treat + (if(length(x)) Eyx(x) else 0) for(i in 1 : nsim) { if(pr) cat('Iteration', i, '\r') y <- xb + rnorm(n, mean=0, sd=sigma) if(length(p)) { ## Override y with length(p) - 1 ordinal categories yo <- ifelse(treat == 0, sample(yordinal, n, prob=p, replace=TRUE), sample(yordinal, n, prob=p1, replace=TRUE)) ymax <- max(y) y <- ifelse(yo == 0, y, ceiling(ymax) + yo) } f <- rms::orm.fit(X, y, maxit=20) if(! f$fail) { betas[i] <- coef(f)[length(coef(f))] ## coef of treatment k <- nrow(f$var) se[i] <- sqrt(f$var[k, k]) } } if(pr) cat('\n') pvals <- 1 - pchisq((betas / se) ^ 2, 1) power <- mean(pvals < alpha, na.rm=TRUE) list(n=n, delta=delta, sigma=sigma, power=power, betas=betas, se=se, pvals=pvals) } propsPO <- function(formula, odds.ratio=NULL, ref=NULL, data=NULL, ncol=NULL, nrow=NULL) { v <- all.vars(formula) d <- model.frame(formula, data=data) y <- d[[v[1]]] yl <- label(y, default=v[1]) y <- as.factor(y) d[[v[1]]] <- y x <- d[[v[2]]] xl <- label(x, default=v[2]) s <- sn <- NULL sl <- NULL if(length(v) > 2) { if(length(odds.ratio)) stop('odds ratio may not be specified when a stratification variable is included') s <- d[[v[3]]] sl <- label(s, default=v[3]) s <- as.factor(s) sn <- 's' } names(d) <- c('y', 'x', sn) ## ggplot2 bar chart puts first category at the top ## Let's put it at the bottom revo <- function(z) { z <- as.factor(z) factor(z, levels=rev(levels(as.factor(z)))) } # For each x compute the vector of proportions of y categories # Assume levels are in order # Put numerators and denominators into a character string so that # data.table [ operator can operate on one variable # The delimiter is a single space g <- function(y) { tab <- table(y) structure(paste(tab, rep(sum(tab), length(tab))), names=names(tab)) } atxt <- function(d, strat=NULL, or=FALSE) { if(! or) { z <- d$prop num <- as.numeric(sub(' .*', '', z)) # get first number denom <- as.numeric(sub('.* ', '', z)) # get second number d$prop <- num / denom } d$txt <- paste0(sl, if(length(strat)) ': ', as.character(strat), if(length(strat)) '
    ', xl, ': ', as.character(d$x), '
    ', yl, ': ', as.character(d$y), '
    ', 'Proportion: ', round(d$prop, 3), if(! or) '\u2003', if(! or) markupSpecs$html$frac(num, denom, size=90)) d } d <- data.table(d) if(! length(s)) { # stratification variable not present p <- d[, as.list(g(y)), by=x] pm <- melt(p, id=1, variable.name='y', value.name='prop') pm <- atxt(pm) plegend <- guides(fill=guide_legend(title=yl)) if(! length(odds.ratio)) { gg <- ggplot(pm, aes(x=as.factor(x), y=prop, fill=revo(y), label=txt)) + geom_col() + plegend + xlab(xl) + ylab('Proportion') return(gg) } } else { # stratification variable present; odds ratio must be absent p <- d[, as.list(g(y)), by=.(x, s)] pm <- melt(p, id=c('x', 's'), variable.name='y', value.name='prop') pm <- atxt(pm, pm$s) plegend <- guides(fill=guide_legend(title=yl)) gg <- ggplot(pm, aes(x=as.factor(x), y=prop, fill=revo(y), label=txt)) + facet_wrap(~ s, ncol=ncol, nrow=nrow) + geom_col() + plegend + xlab(xl) + ylab('Proportion') return(gg) } ## odds.ratio is present if(! length(ref)) ref <- p$x[1] pfx <- as.matrix(p[x == ref, -1]) propfirstx <- as.numeric(sub(' .*', '', pfx)) / as.numeric(sub('.* ', '', pfx)) .g. <- function(x) { w <- pomodm(p=propfirstx, odds.ratio=odds.ratio(x)) names(w) <- levels(y) w } pa <- d[, as.list(.g.(x)), by=x] pma <- melt(pa, id=1, variable.name='y', value.name='prop') pma <- atxt(pma, or=TRUE) w <- rbind(cbind(type=1, pm), cbind(type=2, pma)) w$type <- factor(w$type, 1 : 2, c('Observed', 'Asssuming Proportional Odds')) ggplot(w, aes(x=as.factor(x), y=prop, fill=revo(y), label=txt)) + geom_col() + facet_wrap(~ type, nrow=2) + plegend + xlab(xl) + ylab('Proportion') } utils::globalVariables(c('.', 'prop')) propsTrans <- function(formula, data=NULL, labels=NULL, arrow='\u2794', maxsize=12, ncol=NULL, nrow=NULL) { v <- all.vars(formula) d <- model.frame(formula, data=data) y <- as.factor(d[[v[1]]]) x <- d[[v[2]]] xlab <- label(x, default=v[2]) id <- as.character(d[[v[3]]]) uid <- sort(unique(id)) nid <- length(uid) times <- if(is.factor(x)) levels(x) else sort(unique(x)) nt <- length(times) itrans <- integer(0) Prev <- Cur <- Frac <- character(0) prop <- frq <- numeric(0) mu <- markupSpecs$html arrowbr <- paste0(' ', arrow, '
    ') arrow <- paste0(' ', arrow, ' ') for(it in 2 : nt) { prev <- cur <- rep(NA, nid) names(prev) <- names(cur) <- uid i <- x == times[it - 1] j <- x == times[it] prev[id[i]] <- as.character(y[i]) cur [id[j]] <- as.character(y[j]) tab <- table(prev, cur) rowf <- rowSums(tab) tab <- as.data.frame(tab) # tab <- subset(tab, Freq > 0) tab$denom <- rowf[tab$prev] tab$prop <- tab$Freq / tab$denom frq <- c(frq, tab$Freq) Prev <- c(Prev, as.character(tab$prev)) Cur <- c(Cur, as.character(tab$cur)) prop <- c(prop, tab$Freq / tab$denom) Frac <- c(Frac, paste0(round(tab$Freq / tab$denom, 3), mu$lspace, mu$frac(tab$Freq, tab$denom, size=90))) k <- length(tab$Freq) itrans <- c(itrans, rep(it, k)) } Prev <- factor(Prev, levels(y)) Cur <- factor(Cur, levels(y)) trans <- factor(itrans, 2 : nt, labels=paste0(xlab, ' ', times[1 : (nt - 1)], arrow, times[2 : nt])) transp <- factor(itrans, 2 : nt, labels=paste0(xlab, ' ', times[1 : (nt - 1)], arrow, times[2 : nt])) w <- data.frame(trans, transp, Prev, Cur, prop, Frac, frq, txt=if(! length(labels)) ifelse(Prev == Cur, paste0('Stay at:', as.character(Prev)), paste0(as.character(Prev), arrowbr, as.character(Cur))) else ifelse(Prev == Cur, paste0('Stay at:', labels[as.integer(Prev)]), paste0(labels[as.integer(Prev)], arrowbr, labels[as.integer(Cur)]))) w$txt <- paste0(w$transp, '
    ', w$txt, '
    ', w$Frac) w <- subset(w, frq > 0) ggplot(w, aes(x=Prev, y=Cur, size=prop, label=txt)) + facet_wrap(~ trans, ncol=ncol, nrow=nrow) + geom_point() + scale_size(range = c(0, maxsize)) + xlab('Previous State') + ylab('Current State') + guides(size = guide_legend(title='Proportion')) # ggplot(w, aes(x=Prev, y=prop, fill=Cur)) + # facet_wrap(~ trans, ncol=ncol, nrow=nrow) + # geom_col() + #scale_size(range = c(0, 12)) + # xlab('Previous State') + ylab('Proportion') + # guides(fill = guide_legend(title='Current State')) } multEventChart <- function(formula, data=NULL, absorb=NULL, sortbylast=FALSE, colorTitle=label(y), eventTitle='Event', palette='OrRd', eventSymbols=c(15, 5, 1:4, 6:10), timeInc=min(diff(unique(x))/2)) { v <- all.vars(formula) d <- model.frame(formula, data=data) y <- as.factor(d[[v[1]]]) y <- factor(y, levels=rev(levels(y))) x <- d[[v[2]]] xlab <- label(x, default=v[2]) id <- as.factor(d[[v[3]]]) # Optionally sort subjects by last status, assuming levels of y # are in ascending order of badness if(sortbylast) { last <- tapply(1 : length(y), id, function(i) { times <- x[i] status <- y[i] as.integer(status[which.max(times)]) }) i <- order(last, levels(id), decreasing=TRUE) id <- factor(id, levels=levels(id)[i]) } else id <- factor(id, levels=rev(levels(id))) ab <- as.character(y) %in% absorb nay <- setdiff(levels(y), absorb) event <- y event[! ab] <- NA y[ab] <- NA de <- subset(data.frame(id, x, y, event), ! is.na(event)) ggplot(mapping=aes(x = id, y = x, fill = y)) + scale_fill_brewer(colorTitle, palette=palette, direction=-1, breaks=nay) + geom_segment(aes(x = id, xend = id, y = 0, yend = x - timeInc), lty = 3) + geom_tile(width = timeInc) + scale_y_continuous(breaks=min(x) : max(x)) + geom_point(aes(x = id, y = x - timeInc, shape = event), data=de) + scale_shape_manual(eventTitle, values=eventSymbols[1 : length(absorb)], labels=c(absorb)) + guides(fill=guide_legend(override.aes=list(shape=NA), order=1)) + coord_flip() + labs(y=xlab, x='') } utils::globalVariables('txt') Hmisc/R/cnvrt.coords.s0000644000176200001440000000704112243661443014335 0ustar liggesuserscnvrt.coords <- function (x, y = NULL, input = c("usr", "plt", "fig", "dev", "tdev")) { input <- match.arg(input) xy <- xy.coords(x, y, recycle = TRUE) cusr <- par("usr") cplt <- par("plt") cfig <- par("fig") cdin <- par("din") comi <- par("omi") cdev <- c(comi[2]/cdin[1], (cdin[1] - comi[4])/cdin[1], comi[1]/cdin[2], (cdin[2] - comi[3])/cdin[2]) if (input == "usr") { usr <- xy plt <- list() plt$x <- (xy$x - cusr[1])/(cusr[2] - cusr[1]) plt$y <- (xy$y - cusr[3])/(cusr[4] - cusr[3]) fig <- list() fig$x <- plt$x * (cplt[2] - cplt[1]) + cplt[1] fig$y <- plt$y * (cplt[4] - cplt[3]) + cplt[3] dev <- list() dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] tdev <- list() tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] return(list(usr = usr, plt = plt, fig = fig, dev = dev, tdev = tdev)) } if (input == "plt") { plt <- xy usr <- list() usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] fig <- list() fig$x <- plt$x * (cplt[2] - cplt[1]) + cplt[1] fig$y <- plt$y * (cplt[4] - cplt[3]) + cplt[3] dev <- list() dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] tdev <- list() tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] return(list(usr = usr, plt = plt, fig = fig, dev = dev, tdev = tdev)) } if (input == "fig") { fig <- xy plt <- list() plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) usr <- list() usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] dev <- list() dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] tdev <- list() tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] return(list(usr = usr, plt = plt, fig = fig, dev = dev, tdev = tdev)) } if (input == "dev") { dev <- xy fig <- list() fig$x <- (dev$x - cfig[1])/(cfig[2] - cfig[1]) fig$y <- (dev$y - cfig[3])/(cfig[4] - cfig[3]) plt <- list() plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) usr <- list() usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] tdev <- list() tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] return(list(usr = usr, plt = plt, fig = fig, dev = dev, tdev = tdev)) } if (input == "tdev") { tdev <- xy dev <- list() dev$x <- (tdev$x - cdev[1])/(cdev[2] - cdev[1]) dev$y <- (tdev$y - cdev[3])/(cdev[4] - cdev[3]) fig <- list() fig$x <- (dev$x - cfig[1])/(cfig[2] - cfig[1]) fig$y <- (dev$y - cfig[3])/(cfig[4] - cfig[3]) plt <- list() plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) usr <- list() usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] tdev <- list() tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] return(list(usr = usr, plt = plt, fig = fig, dev = dev, tdev = tdev)) } } Hmisc/R/summaryM.s0000644000176200001440000011271314370730127013525 0ustar liggesuserssummaryM <- function(formula, groups=NULL, data=NULL, subset, na.action=na.retain, overall=FALSE, continuous=10, na.include=FALSE, quant=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975), nmin=100, test=FALSE, conTest=conTestkw, catTest=catTestchisq, ordTest=ordTestpo) { marg <- length(data) && '.marginal.' %in% names(data) if(marg) formula <- update(formula, .~. + .marginal.) formula <- Formula(formula) Y <- if(!missing(subset) && length(subset)) model.frame(formula, data=data, subset=subset, na.action=na.action) else model.frame(formula, data=data, na.action=na.action) # mf <- match.call(expand.dots=FALSE) # m <- match(c('formula', 'data', 'subset', 'na.action'), names(mf), 0) # mf <- mf[c(1, m)] # if(missing(na.action)) mf$na.action <- na.retain # formula <- Formula(formula) # mf[[1]] <- as.name('model.frame') # mf$formula <- formula # Y <- eval(mf, parent.frame()) X <- model.part(formula, data=Y, rhs=1) Y <- model.part(formula, data=Y, lhs=1) getlab <- function(x, default) { lab <- attr(x, 'label') if(!length(lab) || lab=='') default else lab } if(marg) { xm <- X$.marginal. X$.marginal. <- NULL } else xm <- rep('', nrow(X)) if(length(X)) { xname <- names(X) if(length(xname) == 1 && ! length(groups)) groups <- xname if(! length(groups) && length(xname) > 1) { warnings('Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.') groups <- xname[1] } svar <- if(length(xname) == 1) factor(rep('.ALL.', nrow(X))) else do.call('interaction', list(X[setdiff(xname, groups)], sep=' ')) group <- X[[groups]] glabel <- getlab(group, groups) } else { svar <- factor(rep('.ALL.', nrow(Y))) group <- rep('', nrow(Y)) # y1 + y2 ~ 1, no grouping groups <- group.freq <- NULL glabel <- '' } quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975 )) nv <- ncol(Y) nameY <- names(Y) R <- list() for(strat in levels(svar)) { instrat <- svar == strat n <- integer(nv) type <- n comp <- dat <- vector("list", nv) names(comp) <- names(dat) <- nameY labels <- Units <- vector("character", nv) if(test) { testresults <- vector('list', nv) names(testresults) <- names(comp) } gr <- group[instrat] xms <- xm[instrat] ## Need to ignore marginal summaries in N unless stratifying by ## the variable that is marginalized over if(all(xms != '')) xms <- rep('', length(xms)) group.freq <- table(gr) group.freq <- group.freq[group.freq > 0] if(overall) group.freq <- c(group.freq, Combined=sum(group.freq)) for(i in 1 : nv) { w <- Y[instrat, i] if(length(attr(w, "label"))) labels[i] <- attr(w, "label") if(length(attr(w, 'units'))) Units[i] <- attr(w, 'units') if(is.character(w)) w <- as.factor(w) if(!inherits(w, 'mChoice')) { if(!is.factor(w) && !is.logical(w) && length(unique(w[! is.na(w)])) < continuous) w <- as.factor(w) s <- !is.na(w) if(na.include && !all(s) && length(levels(w))) { w <- na.include(w) levels(w)[is.na(levels(w))] <- 'NA' s <- rep(TRUE, length(s)) } n[i] <- sum(s & xms == '') w <- w[s] g <- gr[s, drop=TRUE] if(is.factor(w) || is.logical(w)) { tab <- table(w, g) if(test) { if(is.ordered(w)) testresults[[i]] <- ordTest(g, w) else testresults[[i]] <- catTest(tab) } if(nrow(tab) == 1) { b <- casefold(dimnames(tab)[[1]], upper=TRUE) pres <- c('1', 'Y', 'YES', 'PRESENT') abse <- c('0', 'N', 'NO', 'ABSENT') jj <- match(b, pres, nomatch=0) if(jj > 0) bc <- abse[jj] else { jj <- match(b, abse, nomatch=0) if(jj > 0) bc <- pres[jj] } if(jj) { tab <- rbind(tab, rep(0, ncol(tab))) dimnames(tab)[[1]][2] <- bc } } if(overall) tab <- cbind(tab, Combined=apply(tab, 1, sum)) comp[[i]] <- tab type[i] <- 1 } else { sfn <- function(x, quant) { ## Won't lose precision in quantile names with digits=15 y <- c(quantile(x,quant), Mean=mean(x), SD=sqrt(var(x)), N=sum(!is.na(x))) names(y) <- c(paste0(formatC(100 * quant, format='fg', width=1, digits=15), '%'), 'Mean', 'SD', 'N') y } qu <- tapply(w, g, sfn, simplify=TRUE, quants) if(test) testresults[[i]] <- conTest(g, w) if(overall) qu$Combined <- sfn(w, quants) comp[[i]] <- matrix(unlist(qu), ncol=length(quants) + 3, byrow=TRUE, dimnames=list(names(qu), c(format(quants), 'Mean', 'SD', 'N'))) if(any(group.freq <= nmin)) dat[[i]] <- lapply(split(w, g), nmin=nmin, function(x, nmin) if(length(x) <= nmin) x else NULL) type[i] <- 2 } } else { w <- as.numeric(w) == 1 ## multiple choice variables ## n[i] <- nrow(w) n[i] <- sum(! is.na(apply(w, 1, sum)) & xms == '') g <- as.factor(gr) ncat <- ncol(w) tab <- matrix(NA, nrow=ncat, ncol=length(levels(g)), dimnames=list(dimnames(w)[[2]], levels(g))) if(test) { pval <- numeric(ncat) names(pval) <- dimnames(w)[[2]] d.f. <- stat <- pval } for(j in 1 : ncat) { tab[j,] <- tapply(w[,j], g, sum, simplify=TRUE, na.rm=TRUE) if(test) { tabj <- rbind(table(g)-tab[j,],tab[j,]) st <- catTest(tabj) pval[j] <- st$P stat[j] <- st$stat d.f.[j] <- st$df } } if(test) testresults[[i]] <- list(P=pval, stat = stat, df = d.f., testname = st$testname, namefun = st$namefun, statname = st$statname, latexstat = st$latexstat, plotmathstat= st$plotmathstat) if(overall) tab <- cbind(tab, Combined=apply(tab,1,sum)) comp[[i]] <- tab type[i] <- 3 } } labels <- ifelse(nchar(labels), labels, names(comp)) R[[strat]] <- list(stats=comp, type=type, group.freq=group.freq, labels=labels, units=Units, quant=quant, data=dat, N=sum(!is.na(gr) & xms ==''), n=n, testresults=if(test)testresults) } structure(list(results=R, group.name=groups, group.label=glabel, call=call, formula=formula), class="summaryM") } plot.summaryM <- function(x, vnames = c('labels', 'names'), which = c('both', 'categorical', 'continuous'), vars = NULL, xlim = c(0,1), xlab = 'Proportion', pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE, main, ncols=2, prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001, conType = c('bp', 'dot', 'raw'), cex.means = 0.5, cex=par('cex'), height='auto', width=700, ...) { obj <- x vnames <- match.arg(vnames) which <- match.arg(which) conType <- match.arg(conType) if(grType() == 'plotly') return(plotpsummaryM(x, vnames=vnames, which=which, vars=vars, xlim=xlim, xlab=xlab, exclude1=exclude1, ncols=ncols, prtest=prtest, pdig=3, eps=0.001, height=height, width=width)) html <- FALSE ul <- vnames=='labels' if(is.logical(prtest) && !prtest) prtest <- 'none' for(strat in names(x$results)) { obj <- x$results[[strat]] test <- obj$testresults if(!length(test)) prtest <- 'none' varNames <- names(obj$stats) vn <- if(ul) obj$labels else varNames Units <- obj$units nw <- if(lg <- length(obj$group.freq)) lg else 1 gnames <- names(obj$group.freq) if(missing(main)) main <- if(strat != '.ALL.') strat else if(nw == 1) '' else paste('Proportions', 'Stratified by', x$group.label) pch <- rep(pch, length.out=nw) lab <- vnd <- z <- nmiss <- vnamd <- NULL type <- obj$type; n <- obj$n opar <- par() on.exit(setParNro(opar)) npages <- 0 if(which != 'continuous' && any(type %in% c(1, 3))) { ftstats <- NULL for(i in (1:length(type))[type %in% c(1, 3)]) { nam <- vn[i] tab <- obj$stats[[i]] if(nw == 1) tab <- as.matrix(tab) nr <- nrow(tab) denom <- if(type[i] == 1) apply(tab, 2, sum) else obj$group.freq y <- sweep(tab, 2, denom, FUN='/') lev <- dimnames(y)[[1]] exc <- exclude1 && (nr == 2) jstart <- if(exc) 2 else 1 rl <- casefold(lev) binary <- type[i] == 1 && exc && (all(rl %in% c("0", "1")) | all(rl %in% c("false", "true"))| all(rl %in% c("absent", "present"))) for(j in jstart : nrow(y)) { if(nw==1) z <- rbind(z, y[j,]) else { yj <- rep(NA, nw) names(yj) <- gnames yj[names(y[j,])] <- y[j,] z <- rbind(z, yj) } lab <- c(lab, if(binary) '' else lev[j]) vnd <- c(vnd, nam) vnamd <- c(vnamd, varNames[i]) } if(any(prtest != 'none')) { fts <- formatTestStats(test[[varNames[i]]], type[i] == 3, if(type[i] == 1) 1 else 1 : nr, prtest = prtest, plotmath= TRUE, pdig=pdig, eps=eps) ftstats <- c(ftstats, fts, if(type[i] == 1 && nr - exc - 1 > 0) rep(expression(''), nr - exc - 1)) } } dimnames(z) <- list(lab, dimnames(z)[[2]]) dotchart3(z, groups=factor(vnd, levels=unique(vnd)), xlab=xlab, xlim=xlim, auxdata=if(!any(prtest == 'none')) ftstats, pch=pch, ...) if(main == '' && strat != '.ALL.') title(strat) else if(main != '') title(main) npages <- npages + 1 setParNro(opar) ## Dummy key if only one column, so won't use another Key from an ## earlier run if(nw < 2) { Key1 <- function(...)invisible(NULL) .setKey(Key1) } else { ##set up for key() if > 1 column Key3 <- function(x=NULL, y=NULL, lev, pch) { oldpar <- par('usr', 'xpd') par(usr=c(0, 1, 0, 1), xpd=NA) on.exit(par(oldpar)) if(is.list(x)) { y <- x$y x <- x$x } ## Even though par('usr') shows 0,1,0,1 after lattice draws ## its plot, it still needs resetting if(!length(x)) x <- 0 if(!length(y)) y <- 1 ## because of formals() rlegend(x, y, legend=lev, pch=pch, ...) invisible() } formals(Key3) <- list(x=NULL,y=NULL,lev=names(obj$group.freq), pch=pch) .setKey(Key3) } } ncont <- sum(type==2) if(which != 'categorical' && ncont) { mf <- par('mfrow') if(length(mf) == 0) mf <- c(1, 1) if(ncont > 1 & max(mf) == 1) { mf <- if(ncont <= 2) c(2,1) else if(ncont <= 4) c(2,2) else if(ncont <= 6) c(2,3) else if(ncont <= 9) c(3,3) else c(4,3) ## if(ncont <= 12)c(4,3) else if(ncont <= 16) c(4,4) else c(5,4) nr <- mf[1] m <- par('mar') par(mfrow=mf) } npages <- npages + ceiling(sum(type == 2) / prod(mf)) for(i in (1 : length(type))[type == 2]) { nam <- labelPlotmath(vn[i], Units[i]) st <- obj$stats[[i]] if(nw==1) st <- as.matrix(st) N <- st[, 'N'] if(conType == 'dot') { quantile.columns <- dimnames(st)[[2]] %nin% c('Mean', 'SD', 'N') st <- st[,quantile.columns, drop=FALSE] xlim <- range(st) ns <- as.numeric(dimnames(st)[[2]]) l <- 1 : length(ns) q1 <- l[abs(ns - .25) < .001] med <- l[abs(ns - .5) < .001] q3 <- l[abs(ns - .75) < .001] st <- st[,c(q1, med, q3), drop=FALSE] dotchart3(st, xlim=xlim, xlab=nam, pch=c(91, 16, 93), auxtitle='N', auxdata=N) Key2 <- function(x=NULL, y=NULL, quant, ...) { quant <- format(quant) txt <- paste0('(0.25, 0.5, 0.75) quantiles shown\n', 'x-axes scaled to (',min(quant),',', max(quant), ') quantiles') if(length(x)) { if(is.list(x)) { y <- x$y; x <- x$x } text(x, y, txt, cex=.8, adj=0, ...) } else mtitle(lr=txt, cex.l=.8, line=1, ...) invisible() } formals(Key2) <- list(x=NULL, y=NULL, quant=obj$quant) .setKey2(Key2) } else if(conType == 'bp') { st <- st[, colnames(st) != 'N', drop=FALSE] mw <- max(strwidth(N, 'inches', cex=cex)) omai <- par('mai') mai <- omai mai[4] <- .3 + 1.1 * mw par(mai=mai) bpplt(st, xlab=nam, cex.points=cex.means) upedge <- par('usr')[4] outerText('N', upedge + strheight('N', cex=cex) / 2, cex=cex) outerText(N, length(N) : 1, cex=cex) par(mai=omai) } else stripChart(obj$data[[i]], xlab=nam) if(all(prtest != 'none')) { fts <- formatTestStats(test[[varNames[i]]], prtest=prtest, plotmath=TRUE, pdig=pdig, eps=eps) title(fts, line=.5) } } } } invisible(npages) } print.summaryM <- function(...) { lang <- prType() switch(lang, plain = printsummaryM(...), latex = latex.summaryM(...), html = latex.summaryM(..., html=TRUE) ) } printsummaryM <- function(x, digits, prn=any(n != N), what=c('proportion', '%'), pctdig=if(what == '%') 0 else 2, npct=c('numerator', 'both', 'denominator', 'none'), exclude1=TRUE, vnames=c("labels", "names"), prUnits=TRUE, sep="/", abbreviate.dimnames=FALSE, prefix.width=max(nchar(lab)), min.colwidth, formatArgs=NULL, round=NULL, prtest=c('P', 'stat', 'df', 'name'), prmsd=FALSE, long=FALSE, pdig=3, eps=0.001, prob=c(0.25, 0.5, 0.75), prN=FALSE, ...) { npct <- match.arg(npct) vnames <- match.arg(vnames) what <- match.arg(what) if(is.logical(prtest) && !prtest) prtest <- 'none' obj <- x if(! length(obj$results)) return() for(strat in names(obj$results)) { x <- obj$results[[strat]] stats <- x$stats nv <- length(stats) cstats <- lab <- character(0) nn <- integer(0) type <- x$type n <- x$n N <- x$N nams <- names(stats) labels <- x$labels Units <- x$units test <- x$testresults if(!length(test)) prtest <- 'none' nw <- if(lg <- length(x$group.freq)) lg else 1 gnames <- names(x$group.freq) if(!missing(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } cstats <- NULL for(i in 1 : nv) { nn <- c(nn, n[i]) nam <- if(vnames == "names") nams[i] else labels[i] if(prUnits && nchar(Units[i])) nam <- paste0(nam,' [', gsub('\\*', ' ', Units[i]),']') tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]] else NULL if(type[i] %in% c(1, 3)) { cs <- formatCats(stats[[i]], nam, tr, type[i], if(length(x$group.freq)) x$group.freq else x$n[i], what, npct, pctdig, exclude1, long, prtest, pdig=pdig, eps=eps) nn <- c(nn, rep(NA, nrow(cs) - 1)) } else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd, sep, formatArgs, round, prtest, pdig=pdig, eps=eps, prob=prob, prN=prN) cstats <- rbind(cstats, cs) } lab <- dimnames(cstats)[[1]] gl <- names(x$group.freq) gl <- if(length(gl)) paste0(gl," \n(N=", x$group.freq, ")") else "" if(length(test) && !all(prtest == 'none')) gl <- c(gl, if(length(prtest) == 1 && prtest != 'stat') if(prtest == 'P')'P-value' else prtest else ' Test\nStatistic') nc <- nchar(cstats) spaces <- substring(" ", 1, (max(nc) - nc + 1) / 2) ## center strings dc <- dim(cstats) cstats <- paste0(spaces, cstats) dim(cstats) <- dc if(prn) { cnn <- format(nn) cnn[is.na(nn)] <- '' cstats <- cbind(cnn, cstats) gl <- c('N', gl) } cstats <- rbind(gl, cstats) dimnames(cstats) <- list(c('',lab), NULL) if(strat != '.ALL.') cat('\n', strat, '\n', sep='') cat("\n\nDescriptive Statistics", if(length(x$group.label)) paste(" by", x$group.label) else paste0(" (N=", x$N,")"), "\n\n", sep="") if(missing(min.colwidth)) min.colwidth <- max(min(nchar(gl)), min(nc[nc > 0])) print.char.matrix(cstats, col.names=FALSE, col.txt.align='left', ...) } invisible(cstats) } latex.summaryM <- function(object, title=first.word(deparse(substitute(object))), file=paste(title, 'tex', sep='.'), append=FALSE, digits, prn = any(n!=N), what=c('proportion', '%'), pctdig=if(what == '%') 0 else 2, npct=c('numerator', 'both', 'denominator', 'slash', 'none'), npct.size=if(html) mspecs$html$smaller else 'scriptsize', Nsize =if(html) mspecs$html$smaller else 'scriptsize', exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE, middle.bold=FALSE, outer.size=if(html) mspecs$html$smaller else 'scriptsize', caption, rowlabel="", rowsep=html, insert.bottom=TRUE, dcolumn=FALSE, formatArgs=NULL, round=NULL, prtest=c('P', 'stat', 'df', 'name'), prmsd=FALSE, msdsize=if(html) function(x) x else NULL, brmsd=FALSE, long=FALSE, pdig=3, eps=.001, auxCol=NULL, table.env=TRUE, tabenv1=FALSE, prob=c(0.25, 0.5, 0.75), prN=FALSE, legend.bottom=FALSE, html=FALSE, mspecs=markupSpecs, ...) { if(! length(object$results)) return() if(! append) cat('', file=file) append <- TRUE what <- match.arg(what) npct <- match.arg(npct) vnames <- match.arg(vnames) if(is.logical(prtest) && ! prtest) prtest <- 'none' strats <- names(object$results) probdef <- c(0.25, 0.5, 0.75) if(length(prob) != 3) { prob <- probdef } lang <- if(html) 'html' else 'latex' specs <- mspecs[[lang]] math <- specs$math spc <- specs$lspace bold <- specs$bold sup <- specs$sup br <- specs$br plminus <- specs$plminus Npct.size <- npct.size; NNsize <- Nsize; Outer.size <- outer.size if(! is.function(npct.size)) npct.size <- function(x) paste0('{\\', Npct.size, ' ', x, '}') if(! is.function(Nsize)) Nsize <- function(x) paste0('{\\', NNsize, ' ', x, '}') if(! is.function(outer.size)) outer.size <- function(x) paste0('{\\', Outer.size,' ', x, '}') ## Add this back if revert to previous chisq function in markupSpecs ## if(html) cat(specs$styles) ## define html styles such as xscript for chisq maxlablen <- 0 istr <- 0 Lab <- character(0) Cstats <- NULL n.tspanner <- integer(0) for(strat in strats) { istr <- istr + 1 x <- object$results[[strat]] stats <- x$stats nv <- length(stats) cstats <- lab <- character(0) nn <- integer(0) type <- x$type n <- x$n N <- x$N nams <- names(stats) labels <- x$labels maxlablen <- max(maxlablen, nchar(labels)) Units <- x$units nw <- if(lg <- length(x$group.freq)) lg else 1 gnames <- names(x$group.freq) test <- x$testresults if(!length(test)) prtest <- 'none' gt1.test <- if(all(prtest == 'none')) FALSE else length(unique(sapply(test,function(a)a$testname))) > 1 if(!missing(digits)) { oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) } if(missing(caption)) caption <- paste0("Descriptive Statistics", if(length(x$group.label)) paste(" by", x$group.label) else math(paste0(" (N=", x$N, ")"))) if(! middle.bold) bold <- function(x) x cstats <- NULL testUsed <- auxc <- character(0) rows.per.var <- integer(0) for(i in 1:nv) { if(length(auxCol)) auxc <- c(auxc, auxCol[[1]][i]) nn <- c(nn, n[i]) nam <- if(vnames == "names") nams[i] else labels[i] if(prUnits && nchar(Units[i]) > 0) nam <- specs$varlabel(nam, Units[i], hfill=TRUE) tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]] else NULL if(length(test) && all(prtest != 'none')) testUsed <- unique(c(testUsed, tr$testname)) if(type[i] %in% c(1, 3)) { cs <- formatCats(stats[[i]], nam, tr, type[i], if(length(x$group.freq)) x$group.freq else x$n[i], what, npct, pctdig, exclude1, long, prtest, lang=lang, testUsed=testUsed, npct.size=npct.size, pdig=pdig, eps=eps, footnoteTest=gt1.test) nn <- c(nn, rep(NA, nrow(cs) - 1)) rows.per.var <- c(rows.per.var, nrow(cs)) } else { cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd, prtest=prtest, formatArgs=formatArgs, round=round, lang=lang, testUsed=testUsed, middle.bold=middle.bold, outer.size=outer.size, msdsize=msdsize, brmsd=brmsd, pdig=pdig, eps=eps, footnoteTest=gt1.test, prob=prob, prN=prN) rows.per.var <- c(rows.per.var, 1) } cstats <- rbind(cstats, cs) if(length(auxc) && nrow(cstats) > 1) auxc <- c(auxc, rep(NA, nrow(cs) - 1)) } lab <- dimnames(cstats)[[1]] gl <- names(x$group.freq) if(!length(gl)) gl <- " " if(! html) { lab <- latexTranslate(lab, c(" "), c("~"), greek=TRUE) gl <- latexTranslate(gl, greek=TRUE) } extracolheads <- if(any(gl != " ")) c(if(prn)'', math(paste0('N=', x$group.freq))) else NULL if(length(test) && !all(prtest == 'none')) { gl <- c(gl, if(length(prtest) == 1 && prtest != 'stat') if(prtest == 'P') 'P-value' else prtest else 'Test Statistic') if(length(extracolheads)) extracolheads <- c(extracolheads, '') } dimnames(cstats) <- list(NULL, gl) cstats <- data.frame(cstats, check.names=FALSE, stringsAsFactors=FALSE) if(length(gl) == 1 && gl == '') colnames(cstats) <- ' ' ## override V1 col.just <- rep("c", length(gl)) if(dcolumn && all(prtest != 'none') && gl[length(gl)] %in% c('P-value', 'Test Statistic')) col.just[length(col.just)] <- '.' if(prn) { cstats <- data.frame(N=nn, cstats, check.names=FALSE, stringsAsFactors=FALSE) col.just <- c("r", col.just) } noib <- is.logical(insert.bottom) && ! insert.bottom defs <- NULL if(is.character(insert.bottom)) defs <- insert.bottom else { if(any(type == 2)) { if(identical(prob, probdef)) defs <- paste0(outer.size(math('a')), ' ', bold(math('b')), ' ', outer.size(math('c')), ' represent the lower quartile ', math('a'), ', the median ', math('b'), ', and the upper quartile ', math('c'), ' for continuous variables.') else { prob <- sprintf("%1.0f\\%%", 100 * prob) defs <- paste0(outer.size(math('a')), ' ', bold(math('b')), ' ', outer.size(math('c')), ' represent the ', prob[1], ' quantile ', math('a'), ' the ', prob[2], ' quantile ', math('b'), ' and the ', prob[3], ' quantile ', math('c'), ' for continuous variables.') } if(prmsd) defs <- paste0(defs, spc, if(html) paste0(math(paste0('x', specs$space, plminus, ' s')), ' represents ', specs$xbar, specs$space, plminus, ' 1 SD.') else '$x\\pm s$ represents $\\bar{X}\\pm 1$ SD.') } if(prn) defs <- c(defs, if(length(defs)) spc, paste0(math('N'), ' is the number of non-missing values.')) if(any(type == 1) && npct == 'numerator') defs <- c(defs, 'Numbers after proportions are frequencies.') if(length(testUsed)) { if(html) defs <- c(defs, br, if(length(testUsed) == 1)'Test used:' else 'Tests used:', if(length(testUsed) == 1) paste(testUsed, 'test') else paste(paste0(sup(1 : length(testUsed)), testUsed, ' test'), collapse='; '), '.') else defs <-c(defs, if(length(testUsed) == 1)'\\noindent Test used:' else '\\indent Tests used:', if(length(testUsed) == 1) paste(testUsed, 'test') else paste(paste0('$^{', 1 : length(testUsed),'}$', testUsed, ' test'), collapse='; '), '.') } } legend <- NULL if(! html) legend <- defs else if(! noib) insert.bottom <- paste(defs, collapse=' ') if(length(auxc)) { if(length(auxc) != nrow(cstats)) stop(paste0('length of auxCol (',length(auxCol[[1]]), ') is not equal to number or variables in table (', nv,').')) auxcc <- format(auxc) auxcc[is.na(auxc)] <- '' cstats <- cbind(auxcc, cstats) nax <- names(auxCol) heads <- get2rowHeads(nax) names(cstats)[1] <- heads[[1]] if(length(col.just)) col.just <- c('r', col.just) if(length(extracolheads)) extracolheads <- c(heads[2], extracolheads) } if(length(legend) && (html || ! table.env)) legend[1] <- paste0('\n', legend[1]) laststrat <- strat == strats[length(strats)] finalcaption <- NULL finallegend <- NULL if((! tabenv1 && table.env) || (tabenv1 && istr == 1)) { finalcaption <- caption if(((! tabenv1 && laststrat) || (tabenv1 && istr == 1)) && !legend.bottom) { finalcaption <- paste(finalcaption, paste(legend, collapse=' '), sep='. ') } } if(! noib && laststrat && ! table.env) { finallegend <- legend } else if(legend.bottom) { finallegend <- paste(legend, collapse=' ') } if(html) { heads <- colnames(cstats) if(length(extracolheads)) heads <- paste(heads, extracolheads, sep=br) Cstats <- rbind(Cstats, cstats) Lab <- c(Lab, lab) n.tspanner <- c(n.tspanner, length(lab)) } else { w <- latex(cstats, title=title, file=file, append=TRUE, caption=finalcaption, rowlabel=rowlabel, table.env=(! tabenv1 && table.env) || (tabenv1 && istr == 1), col.just=col.just, numeric.dollar=FALSE, insert.bottom=finallegend, rowname=lab, dcolumn=dcolumn, extracolheads=extracolheads, extracolsize=NNsize, insert.top=if(strat != '.ALL.') strat, ...) if(tabenv1 && istr == 1) cat('\\clearpage\n', file=file, append=TRUE) else if(istr < length(strats)) cat('\\Needspace{2.7in}\n', file=file, append=TRUE) ## trieds to avoid page break in middle of stratum attr(w, 'legend') <- legend } } if(! html) { attr(w, 'nstrata') <- istr return(w) } cs <- c(paste0('width:', round(0.85*maxlablen), 'ex;'), rep('padding: 0 7px 0 7px;', ncol(Cstats))) ## was rep('padding-left:3ex;'... if(length(strats) > 1) { tspanner <- ifelse(strats == '.ALL', bold('Overall'), strats) w <- htmlTable::htmlTable(Cstats, header=heads, caption = paste(finalcaption, finallegend), rowlabel = rowlabel, # n.rgroup = if(rowsep) rows.per.var, align = col.just, rnames=Lab, tspanner=tspanner, n.tspanner=n.tspanner, tfoot=insert.bottom, css.cell=cs, escape.html=FALSE) } else w <- htmlTable::htmlTable(Cstats, header=heads, caption = paste(finalcaption, finallegend), rowlabel = rowlabel, # n.rgroup = if(rowsep) rows.per.var, align = col.just, rnames=lab, tfoot = insert.bottom, css.cell = cs, escape.html=FALSE) rendHTML(w) } html.summaryM <- function(object, ...) latex.summaryM(object, file='', html=TRUE, ...) plotpsummaryM <- function(x, vnames = c('labels', 'names'), which = c('both', 'categorical', 'continuous'), vars=NULL, xlim = c(0,1), xlab = 'Proportion', exclude1 = TRUE, main=NULL, ncols=2, prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001, height=NULL, width=NULL) { if (!requireNamespace("plotly")) stop("This function requires the 'plotly' package.") obj <- x vnames <- match.arg(vnames) which <- match.arg(which) html <- TRUE ul <- vnames=='labels' if(is.logical(prtest) && !prtest) prtest <- 'none' stratnames <- names(x$results) nstrat <- length(stratnames) gcat <- vector('list', nstrat) names(gcat) <- stratnames ## Create annotations to simulate strata titles for dot charts # if(nstrat > 1) { # annotations <- list() # xx <- (1 : nstrat) / (nstrat + 1) # for(i in 1 : nstrat) # annotations[[i]] <- list(x=xx[i], y=1.05, text=stratnames[i], # xref='paper', yref='paper', showarrow=FALSE) # } for(strat in stratnames) { obj <- x$results[[strat]] test <- obj$testresults if(!length(test)) prtest <- 'none' varNames <- names(obj$stats) vn <- if(ul) obj$labels else varNames Units <- obj$units nw <- if(lg <- length(obj$group.freq)) lg else 1 gnames <- names(obj$group.freq) ggl <- obj$group.label if(! length(main)) main <- if(strat != '.ALL.') strat else if(nw == 1) '' else paste('Proportions', 'Stratified by', obj$group.label) lab <- vnd <- z <- Frac <- nmiss <- vnamd <- NULL type <- obj$type; n <- obj$n gcon <- NULL iv <- which(type %in% c(1, 3)) if(length(vars)) iv <- iv[intersect(vars, 1 : length(iv))] if(which != 'continuous' && length(iv)) { ftstats <- NULL for(i in iv) { nam <- vn[i] tab <- obj$stats[[i]] if(nw == 1) tab <- as.matrix(tab) nr <- nrow(tab) denom <- if(type[i] == 1) apply(tab, 2, sum) else obj$group.freq y <- sweep(tab, 2, denom, FUN='/') frac <- sweep(tab, 2, denom, FUN=markupSpecs$html$frac, size=95) dim(frac) <- dim(y) ## paste loses these dimnames(frac) <- dimnames(y) lev <- dimnames(y)[[1]] exc <- exclude1 && (nr == 2) jstart <- if(exc) 2 else 1 rl <- casefold(lev) binary <- type[i] == 1 && exc && (all(rl %in% c("0", "1")) | all(rl %in% c("false", "true"))| all(rl %in% c("absent", "present"))) for(j in jstart : nrow(y)) { if(nw==1) { z <- rbind(z, y[j,]) Frac <- rbind(Frac, frac[j,]) } else { yj <- rep(NA, nw) names(yj) <- gnames yj[names(y[j,])] <- y[j,] z <- rbind(z, yj) fj <- rep('', nw) names(fj) <- gnames fj[names(frac[j,])] <- frac[j,] Frac <- rbind(Frac, fj) } lab <- c(lab, if(binary) '' else lev[j]) vnd <- c(vnd, nam) vnamd <- c(vnamd, varNames[i]) } if(any(prtest != 'none')) { fts <- formatTestStats(test[[varNames[i]]], type[i] == 3, if(type[i] == 1) 1 else 1 : nr, prtest = prtest, lang = 'html', pdig = pdig, eps=eps) ftstats <- c(ftstats, fts, if(type[i] == 1 && nr - exc - 1 > 0) rep('', nr - exc - 1)) } } dimnames(z) <- dimnames(Frac) <- list(lab, dimnames(z)[[2]]) if(! any(prtest == 'none')) Frac[, 1] <- paste0(Frac[, 1], '
    ', ftstats) gcat[[strat]] <- dotchartp(z, groups=factor(vnd, levels=unique(vnd)), xlab=xlab, xlim=xlim, auxdata=Frac, auxwhere='hover', dec=3, height=if(length(height) && height == 'auto') plotlyParm$heightDotchart(nrow(z)) else height, width=width, layoutattr=FALSE && nstrat > 1) } iv <- which(type == 2) if(length(vars)) iv <- iv[intersect(vars, 1 : length(iv))] if(which != 'categorical' && length(iv)) { if(nstrat > 1) warning('only plots last stratum for continuous variables') icon <- iv ii <- 0 p <- list() for(i in icon) { ii <- ii + 1 nam <- markupSpecs$html$varlabel(vn[i], Units[i], size=73) st <- obj$stats[[i]] if(nw==1) st <- as.matrix(st) N <- st[, 'N'] teststat <- if(all(prtest != 'none')) formatTestStats(test[[varNames[i]]], prtest=prtest, lang='html', pdig=pdig, eps=eps) p[[ii]] <- bppltp(stats=st, xlab=nam, teststat=teststat, showlegend=FALSE) } nrows <- ceiling(length(p) / ncols) gcon <- plotly::subplot(p, shareY=TRUE, nrows=nrows, titleX=TRUE, margin=.1) if(FALSE) { if(! length(height)) height <- min(1000, 275 * nrows) if(! length(width)) width <- min(900, 400 * ncols) gcon <- plotly::layout(gcon, height=height, width=width) ## and note: height and width are now arguments to plot_ly } } } if(! is.null(gcat)) { # plotly objects have length 0 gcat <- if(nstrat == 1) gcat[[1]] else plotly::subplot(gcat, shareY=TRUE, titleX=TRUE, nrows=1, margin=.1) # else { # lo <- attr(gcat[[1]], 'layout') # gcat <- plotly::subplot(gcat, shareY=TRUE, # titleX=TRUE, nrows=1, margin=.1) # ann <- list() # for(i in 1 : nstrat) # ann[[i]] <- list(x= i / (nstrat + 1), y=1.05, # text=stratnames[i], showarrow=FALSE, # xref='paper', yref='paper') # lo$xaxis1 <- lo$xaxis # lo$xaxis1$title <- 'this is it' # lo$axis2 <- lo$axis1 # lo <- c(lo, ann) # do.call(plotly::layout, lo) # } } if(! is.null(gcat) && ! is.null(gcon)) # plotly objects have length 0 list(Categorical = gcat, Continuous = gcon) else if(! is.null(gcat)) gcat else gcon } Hmisc/R/ggfreqScatter.r0000644000176200001440000001110614235726331014507 0ustar liggesusersggfreqScatter <- function(x, y, by=NULL, bins=50, g=10, cuts=NULL, xtrans = function(x) x, ytrans = function(y) y, xbreaks = pretty(x, 10), ybreaks = pretty(y, 10), xminor = NULL, yminor = NULL, xlab=as.character(substitute(x)), ylab=as.character(substitute(y)), fcolors=viridis::viridis(10), nsize=FALSE, stick=FALSE, html=FALSE, prfreq=FALSE, ...) { xlab <- if(! missing(xlab)) xlab else if(label(x) != '') label(x, plot=TRUE, html=html) else xlab ylab <- if(! missing(ylab)) ylab else if(label(y) != '') label(y, plot=TRUE, html=html) else ylab nx <- is.numeric(x); ny <- is.numeric(y) xbreaks <- if(nx) xbreaks; ybreaks <- if(ny) ybreaks bins <- rep(bins, length.out=2) bypres <- length(by) > 0 if(! bypres) by <- rep(0, times=length(x)) i <- ! (is.na(x) | is.na(y)) x <- xtrans(x[i]); y <- ytrans(y[i]) by <- by[i] if(nx) { rx <- range(x) sx <- diff(rx) / bins[1] x <- rx[1] + sx * round((x - rx[1]) / sx) } if(ny) { ry <- range(y) sy <- diff(ry) / bins[2] y <- ry[1] + sy * round((y - ry[1]) / sy) } k <- subset(as.data.frame(table(by, x, y)), Freq > 0) if(nx) k$x <- as.numeric(as.character(k$x)) if(ny) k$y <- as.numeric(as.character(k$y)) if(prfreq) print(table(k$Freq)) if(stick) { if(! ny) stop('stick=TRUE only works with numeric y') Y <- k$y f <- k$Freq m <- max(f) z <- 1.15 * m / sy k$y1 <- Y - f / z / 2 k$y2 <- Y + f / z / 2 k$y3 <- ifelse(f == m, NA, Y - m / z / 2) k$y4 <- ifelse(f == m, NA, Y - f / z / 2) k$y5 <- ifelse(f == m, NA, Y + f / z / 2) k$y6 <- ifelse(f == m, NA, Y + m / z / 2) w <- ggplot(k, aes(x=x, y=y, label=Freq)) + geom_segment(aes(x=x, y=y1, xend=x, yend=y2, color=I('black')), data=k) + geom_segment(aes(x=x, y=y3, xend=x, yend=y4, color=I('lightgray')), data=k) + geom_segment(aes(x=x, y=y5, xend=x, yend=y6, color=I('lightgray')), data=k) + xlab(xlab) + ylab(ylab) + labs(caption=paste0('Maximum frequency:', m)) if(bypres) w <- w + facet_wrap(~ by) return(w) } if(g == 0) { w <- if(nsize) ggplot(k, aes(x=x, y=y, size=Freq ^ 0.25, label=Freq)) + geom_point(...) + scale_size_continuous() + xlab(xlab) + ylab(ylab) + guides(size = guide_legend(title='Frequency')) else ggplot(k, aes(x=x, y=y, label=Freq, color=Freq ^ 0.25)) + geom_point(...) + scale_color_gradientn(colors=fcolors) + guides(alpha = FALSE, color = guide_legend(title='Frequency')) + xlab(xlab) + ylab(ylab) if(bypres) w <- w + facet_wrap(~ by) return(w) } k$fg <- if(length(cuts)) cut2(k$Freq, cuts=cuts) else cut2(k$Freq, g=g) ufreq <- sort(unique(k$Freq)) few <- length(ufreq) <= 15 brn <- if(few) ufreq else unique(quantile(k$Freq, seq(0, g) / g)) w <- if(nsize) ggplot(k, aes(x=x, y=y, size=Freq ^ 0.25, label=Freq)) + geom_point(...) + scale_size_continuous(breaks=brn ^ 0.25, labels=round(brn)) + xlab(xlab) + ylab(ylab) + guides(size = guide_legend(title='Frequency')) else ggplot(k, aes(x=x, y=y, label=Freq, color=if(few) Freq else as.integer(fg))) + # k$Freq geom_point(...) + scale_color_gradientn(colors=fcolors, breaks=if(few) ufreq else 1 : length(levels(k$fg)), labels=if(few) ufreq else levels(k$fg)) + guides(alpha = FALSE, color = guide_legend(title='Frequency')) + xlab(xlab) + ylab(ylab) if(nx) w <- w + scale_x_continuous(breaks=xtrans(xbreaks), labels=format(xbreaks), minor_breaks=if(length(xminor)) xtrans(xminor)) if(ny) w <- w + scale_y_continuous(breaks=ytrans(ybreaks), labels=format(ybreaks), minor_breaks=if(length(yminor)) ytrans(yminor)) if(bypres) w <- w + facet_wrap(~ by) w } utils::globalVariables(c('fg','y1','y2','y3','y4','y5','y6')) Hmisc/R/rcorrp.cens.s0000644000176200001440000001366613101441065014147 0ustar liggesusers## Computes rank correlation measures between a variable X and a possibly ## censored Surv variable Y ## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5) ## See Harrell et al JAMA 1984(?) ## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal ## gamma-type rank correlation) ## No. This is the version extended to paired predictions ## method=1: concordance=delta x1 < delta x2 ## method=2: concordance=x1 concordant and x2 discordant rcorrp.cens <- function(x1, x2, S, outx=FALSE, method=1) { if(is.Surv(S)) { if(attr(S, 'type') != 'right') stop('only handles right censored times') } else S <- cbind(S, rep(1, length(S))) y <- S[,1] event <- S[,2] if(length(x1)!=length(x2)) stop("x1 and x3 must have same length") if(length(y)!=length(x1)) stop("y must have same length as x") if(method!=1 & method!=2) stop("method must be 1 or 2") miss <- is.na(x1+x2+y+event) nmiss <- sum(miss) if(nmiss>0) { miss <- !miss x1 <- x1[miss] x2 <- x2[miss] y <- y[miss] event <- event[miss] } n <- length(x1) if(n<2) stop("<2 non-missing observations") ne <- sum(event) storage.mode(x1) <- "double" storage.mode(x2) <- "double" storage.mode(y) <- "double" storage.mode(event) <- "logical" storage.mode(method) <- "integer" storage.mode(outx) <- "logical" z <- .Fortran(F_cidxcp,x1,x2,y,event,length(x1),method,outx, nrel=double(1),nuncert=double(1), c1=double(1),c2=double(1),gamma1=double(1),gamma2=double(1), gamma=double(1),sd=double(1),c12=double(1),c21=double(1)) r <- c(z$gamma,z$sd,z$c12,z$c21,n,nmiss,ne,z$nrel,z$nuncert,z$c1,z$c2, z$gamma1,z$gamma2) names(r) <- c("Dxy","S.D.","x1 more concordant","x2 more concordant", "n","missing","uncensored", "Relevant Pairs","Uncertain","C X1","C X2","Dxy X1","Dxy X2") r } improveProb <- function(x1, x2, y) { s <- is.na(x1+x2+y) if(any(s)) { s <- !s x1 <- x1[s] x2 <- x2[s] y <- y[s] } n <- length(y) y <- as.numeric(y) u <- sort(unique(y)) if(length(u) != 2 || u[1] != 0 || u[2] != 1) stop('y must have two values: 0 and 1') r <- range(x1,x2) if(r[1] < 0 || r[2] > 1) stop('x1 and x2 must be in [0,1]') a <- y==1 b <- y==0 na <- sum(a) nb <- sum(b) d <- x2 - x1 nup.ev <- sum(d[a] > 0); pup.ev <- nup.ev/na nup.ne <- sum(d[b] > 0); pup.ne <- nup.ne/nb ndown.ev <- sum(d[a] < 0); pdown.ev <- ndown.ev/na ndown.ne <- sum(d[b] < 0); pdown.ne <- ndown.ne/nb nri.ev <- pup.ev - pdown.ev # se.nri.ev <- sqrt((pup.ev + pdown.ev)/na) # old est under H0 v.nri.ev <- (nup.ev + ndown.ev)/(na^2) - ((nup.ev - ndown.ev)^2)/(na^3) se.nri.ev <- sqrt(v.nri.ev) z.nri.ev <- nri.ev/se.nri.ev nri.ne <- pdown.ne - pup.ne # se.nri.ne <- sqrt((pdown.ne + pup.ne)/nb) # old est under H0 v.nri.ne <- (ndown.ne + nup.ne)/(nb^2) - ((ndown.ne - nup.ne)^2)/(nb^3) se.nri.ne <- sqrt(v.nri.ne) z.nri.ne <- nri.ne/se.nri.ne nri <- pup.ev - pdown.ev - (pup.ne - pdown.ne) # old estimate under H0: # se.nri <- sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb) se.nri <- sqrt(v.nri.ev + v.nri.ne) z.nri <- nri/se.nri improveSens <- sum(d[a])/na improveSpec <- -sum(d[b])/nb idi <- mean(d[a]) - mean(d[b]) var.ev <- var(d[a])/na var.ne <- var(d[b])/nb se.idi <- sqrt(var.ev + var.ne) z.idi <- idi/se.idi structure(llist(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri, nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne, improveSens, improveSpec, idi, se.idi, z.idi, labels=FALSE), class='improveProb') } print.improveProb <- function(x, digits=3, conf.int=.95, ...) { cat('\nAnalysis of Proportions of Subjects with Improvement in Predicted Probability\n\n') cat('Number of events:', x$na,'\tNumber of non-events:', x$nb, '\n\n') p <- matrix(c(x$pup.ev, x$pup.ne, x$pdown.ev, x$pdown.ne), dimnames=list(c( 'Increase for events (1)', 'Increase for non-events (2)', 'Decrease for events (3)', 'Decrease for non-events (4)'), 'Proportion')) cat('Proportions of Positive and Negative Changes in Probabilities\n\n') print(p, digits=digits) zpci <- function(m, se, conf.int) { z <- qnorm((1+conf.int)/2) cbind(m/se, 2 * pnorm(- abs(m/se)), m - z*se, m + z*se) } p <- cbind(c(x$nri, x$nri.ev, x$nri.ne), c(x$se.nri, x$se.nri.ev, x$se.nri.ne), zpci(c(x$nri, x$nri.ev, x$nri.ne), c(x$se.nri, x$se.nri.ev, x$se.nri.ne), conf.int=conf.int)) low <- paste('Lower', conf.int) hi <- paste('Upper', conf.int) dimnames(p) <- list(c('NRI (1-3+4-2)', 'NRI for events (1-3)', 'NRI for non-events (4-2)'), c('Index', 'SE', 'Z', '2P', low, hi)) cat('\n\nNet Reclassification Improvement\n\n') print(p, digits=digits) cat('\n\nAnalysis of Changes in Predicted Probabilities\n\n') p <- matrix(c(x$improveSens, x$improveSpec), dimnames=list(c('Increase for events (sensitivity)', 'Decrease for non-events (specificity)'), 'Mean Change in Probability')) print(p, digits=digits) cat("\n\nIntegrated Discrimination Improvement\n (average of sensitivity and 1-specificity over [0,1];\n also is difference in Yates' discrimination slope)\n\n") p <- c(x$idi, x$se.idi, zpci(x$idi, x$se.idi, conf.int=conf.int)) names(p) <- c('IDI', 'SE', 'Z', '2P', low, hi) print(p, digits=digits) invisible() } Hmisc/R/rcorr.s0000644000176200001440000000243313733176563013051 0ustar liggesusersrcorr <- function(x, y, type=c("pearson","spearman")) { type <- match.arg(type) if(!missing(y)) x <- cbind(x, y) x[is.na(x)] <- 1e50 storage.mode(x) <- "double" p <- as.integer(ncol(x)) if(p < 1) stop("must have >1 column") n <- as.integer(nrow(x)) if(n < 5) stop("must have >4 observations") h <- .Fortran(F_rcorr, x, n, p, itype=as.integer(1+(type=="spearman")), hmatrix=double(p*p), npair=integer(p*p), double(n), double(n), double(n), double(n), double(n), integer(n)) npair <- matrix(h$npair, ncol=p) h <- matrix(h$hmatrix, ncol=p) h[h > 1e49] <- NA nam <- dimnames(x)[[2]] dimnames(h) <- list(nam, nam) dimnames(npair) <- list(nam, nam) P <- matrix(2 * (1 - pt(abs(h) * sqrt(npair - 2) / sqrt(pmax(1 - h * h, 0.)), npair - 2)), ncol=p) P[abs(h) == 1] <- 0 diag(P) <- NA dimnames(P) <- list(nam, nam) structure(list(r=h, n=npair, P=P), class="rcorr") } print.rcorr <- function(x, ...) { print(round(x$r,2)) n <- x$n if(all(n == n[1,1])) cat("\nn=", n[1,1], "\n\n") else { cat("\nn\n") print(n) } cat("\nP\n") P <- x$P P <- ifelse(P < .0001, 0, P) p <- format(round(P, 4)) p[is.na(P)] <- "" print(p, quote=FALSE) invisible() } Hmisc/R/format.pval.s0000644000176200001440000000231312243661443014137 0ustar liggesusers## Use R function for S-Plus, just changed to .Options format.pval <- function (x, pv=x, digits = max(1, .Options$digits - 2), eps = .Machine$double.eps, na.form = "NA", ...) { if ((has.na <- any(ina <- is.na(pv)))) pv <- pv[!ina] r <- character(length(is0 <- pv < eps)) if (any(!is0)) { rr <- pv <- pv[!is0] expo <- floor(log10(ifelse(pv > 0, pv, 1e-50))) fixp <- expo >= -3 | (expo == -4 & digits > 1) if (any(fixp)) rr[fixp] <- format(round(pv[fixp], digits = digits), ...) if (any(!fixp)) rr[!fixp] <- format(round(pv[!fixp], digits = digits), ...) r[!is0] <- rr } if (any(is0)) { digits <- max(1, digits - 2) if (any(!is0)) { nc <- max(nchar(rr)) if (digits > 1 && digits + 6 > nc) digits <- max(1, nc - 7) sep <- if (digits == 1 && nc <= 6) "" else " " } else sep <- if(digits == 1) "" else " " r[is0] <- paste("<", format(eps, digits = digits, ...), sep = sep) } if (has.na) { rok <- r r <- character(length(ina)) r[!ina] <- rok r[ina] <- na.form } r } Hmisc/R/areg.s0000644000176200001440000003362512250435350012630 0ustar liggesusers# $Id$ areg <- function(x, y, xtype=NULL, ytype=NULL, nk=4, B=0, na.rm=TRUE, tolerance=NULL, crossval=NULL) { yname <- deparse(substitute(y)) xname <- deparse(substitute(x)) ism <- is.matrix(x) if(!ism) { x <- as.matrix(x) if(!length(colnames(x))) colnames(x) <- xname } if(na.rm) { omit <- is.na(x %*% rep(1,ncol(x))) | is.na(y) nmiss <- sum(omit) if(nmiss) { x <- x[!omit,,drop=FALSE] y <- y[!omit] } } else nmiss <- 0 d <- dim(x) n <- d[1]; p <- d[2] xnam <- colnames(x) if(!length(xnam)) xnam <- paste('x', 1:p, sep='') nuy <- length(unique(y)) if(!length(ytype)) ytype <- if(is.factor(y) || is.character(y)) 'c' else if(nk==0 || (nuy < 3)) 'l' else 's' if(nk==0 && ytype=='s') ytype <- 'l' if(ytype=='s' && nk > 0 && nuy < 3) { warning('requested smooth transformation of y but less than 3 unique values; forced linear') ytype <- 'l' } if(!length(xtype)) xtype <- rep(if(nk==0)'l' else 's', p) xtype[nk==0 & xtype=='s'] <- 'l' names(xtype) <- xnam nux <- apply(x, 2, function(z) length(unique(z))) tooTied <- xtype=='s' & nux < nk if(any(tooTied)) { warning(paste('the following x variables have too few unique values for the value of nk;\nlinearity forced:', paste(xnam[tooTied], collapse=','))) xtype[nux] <- 'l' } fcancor <- function(X, Y) { ## colnames must exist for correct insertion of zeros for singular ## elements colnames(X) <- paste('x', 1:ncol(X), sep='') colnames(Y) <- paste('y', 1:ncol(Y), sep='') ## If canonical variate transformation of Y is descending in Y, ## negate all parameters f <- cancor(X, Y) f$r2 <- f$cor[1]^2 n <- nrow(Y); if(!length(n)) n <- length(y) varconst <- sqrt(n-1) ## cancor returns rows only for non-singular variables ## For first canonical variate insert zeros for singular variables xcoef <- 0. * X[1, ] b <- f$xcoef[, 1] xdf <- length(b) xcoef[names(b)] <- b xcoef <- c(intercept = -sum(xcoef * f$xcenter), xcoef) * varconst ycoef <- 0. * Y[1, ] b <- f$ycoef[, 1] ydf <- length(b) ycoef[names(b)] <- b ycoef <- c(intercept = -sum(ycoef * f$ycenter), ycoef) * varconst ty <- matxv(Y, ycoef) g <- lm.fit.qr.bare(Y, ty, singzero=TRUE) if(g$coefficients[2] < 0) { xcoef <- -xcoef ycoef <- -ycoef ty <- -ty } f$xcoef <- xcoef f$ycoef <- ycoef f$ty <- ty f$xdf <- xdf f$ydf <- ydf f } need2getinv <- FALSE xpxi <- NULL Y <- aregTran(y, ytype, nk, functions=TRUE) at <- attributes(Y) ytrans <- at$fun yinv <- at$inversefun ## NULL if type='s'; need coef yparms <- at$parms xdf <- ifelse(xtype=='l', 1, nk - 1) j <- xtype == 'c' if(any(j)) xdf[j] <- nux[j] - 1 names(xdf) <- xnam X <- matrix(NA, nrow=n, ncol=sum(xdf)) xparms <- list() j <- 0 xn <- character(0) for(i in 1 : p) { w <- aregTran(x[,i], xtype[i], nk) xparms[[xnam[i]]] <- attr(w, 'parms') m <- ncol(w) xdf[i] <- m X[, (j + 1) : (j + m)] <- w j <- j + m xn <- c(xn, paste(xnam[i], 1:m, sep='')) } ## See if rcpsline.eval could not get desired no. of knots due to ties if(ncol(X) > sum(xdf)) X <- X[, 1 : sum(xdf), drop=FALSE] covx <- covy <- r2opt <- r2boot <- madopt <- madboot <- medopt <- medboot <- NULL if(B > 0L) { r <- 1L + sum(xdf) barx <- rep(0., r) vname <- c('Intercept', xn) covx <- matrix(0, nrow=r, ncol=r, dimnames=list(vname,vname)) if(ytype != 'l') { r <- ncol(Y) + 1L bary <- rep(0., r) vname <- c('Intercept', paste(yname, 1 : (r - 1), sep='')) covy <- matrix(0, nrow=r, ncol=r, dimnames=list(vname, vname)) } } if(ytype == 'l') { f <- lm.fit.qr.bare(X, Y, tolerance=tolerance, xpxi=TRUE, singzero=TRUE) xcof <- f$coefficients r2 <- f$rsquared xpxi <- f$xpxi cof <- 1 ty <- y ydf <- 1 lp <- f$fitted.values res <- f$residuals mad <- mean (abs(y - lp)) med <- median(abs(y - lp)) if(B > 0) { r2opt <- madopt <- medopt <- 0 for(j in 1:B) { s <- sample(1:n, replace=TRUE) if(ytype=='c' && length(unique(y[s])) < nuy) stop('a bootstrap resample had too few unique values of y') xch <- which(xtype == 'c') if(length(xch)) { xtf <- apply(x[s, xch, drop=FALSE], 2, function(z)length(unique(z))) < nux[xch] if(any(xtf)) stop(paste('a bootstrap resample had too few unique values of the following x variables:', paste(xnam[xch[xtf]], collapse=','), sep='\n')) } g <- lm.fit.qr.bare(X[s,, drop=FALSE], Y[s], singzero=TRUE) b <- g$coefficients r2boot <- g$rsquared yhat <- matxv(X, b) r2orig <- cor(yhat, y)^2 r2opt <- r2opt + r2boot - r2orig er <- abs(Y[s] - g$fitted.values) madboot <- mean(er) medboot <- median(er) er <- abs(y - yhat) madorig <- mean(er) medorig <- median(er) madopt <- madopt + madboot - madorig barx <- barx + b b <- as.matrix(b) covx <- covx + b %*% t(b) } r2opt <- r2opt / B r2boot <- r2 - r2opt madopt <- madopt / B madboot <- mad - madopt medopt <- medopt / B medboot <- med - medopt barx <- as.matrix(barx / B) covx <- (covx - B * barx %*% t(barx)) / (B - 1) } } else { f <- fcancor(X, Y) r2 <- f$r2 xcof <- f$xcoef cof <- f$ycoef ty <- f$ty ydf <- f$ydf lp <- as.vector(matxv(X, xcof)) res <- as.vector(ty - lp) if(!length(yinv)) { ## spline transformation, need coef to get inverse y transform yy <- seq(min(y), max(y), length=1000) tyy <- ytrans(yy, coef=cof) yinv <- inverseFunction(yy, tyy) need2getinv <- TRUE } puy <- yinv(lp, what='sample') if(length(y) != length(puy)) stop('program logic error') mad <- mean (abs(y - puy)) med <- median(abs(y - puy)) if(B > 0) { r2opt <- madopt <- medopt <- 0 for(j in 1L : B) { s <- sample(1L : n, replace=TRUE) if(ytype=='c' && length(unique(y[s])) < nuy) stop('a bootstrap resample had too few unique values of y') xch <- which(xtype == 'c') if(length(xch)) { xtf <- apply(x[s, xch, drop=FALSE], 2, function(z)length(unique(z))) < nux[xch] if(any(xtf)) stop(paste('a bootstrap resample had too few unique values of the following x variables:', paste(xnam[xch[xtf]], collapse=','), sep='\n')) } f <- fcancor(X[s,, drop=FALSE], Y[s,, drop=FALSE]) bx <- f$xcoef by <- f$ycoef r2boot <- f$r2 xbeta <- matxv(X, bx) ybeta <- matxv(Y, by) r2orig <- cor(xbeta, ybeta)^2 r2opt <- r2opt + r2boot - r2orig puyall <- if(need2getinv) { tyyb <- ytrans(yy, coef=by) ## keeping constant knots yinvb <- inverseFunction(yy, tyyb) yinvb(xbeta, coef=by, what='sample') } else yinv(xbeta, coef=by) er <- abs(y[s] - puyall[s]) madboot <- mean(er) medboot <- median(er) er <- abs(y - puyall) madorig <- mean(er) medorig <- median(er) madopt <- madopt + madboot - madorig medopt <- medopt + medboot - medorig barx <- barx + bx bx <- as.matrix(bx) covx <- covx + bx %*% t(bx) bary <- bary + by by <- as.matrix(by) covy <- covy + by %*% t(by) } r2opt <- r2opt / B r2boot <- r2 - r2opt madopt <- madopt / B madboot <- mad - madopt medopt <- medopt / B medboot <- med - medopt barx <- as.matrix(barx / B) bary <- as.matrix(bary / B) covx <- (covx - B * barx %*% t(barx)) / (B - 1) covy <- (covy - B * bary %*% t(bary)) / (B - 1) } } j <- 0 beta <- xcof[-1] tx <- x xmeans <- list() for(i in 1:p) { m <- xdf[i] z <- matxv(X[, (j + 1) : (j + m), drop=FALSE], beta[(j + 1) : (j + m)]) mz <- mean(z) xmeans[[xnam[i]]] <- mz tx[,i] <- z - mz j <- j + m } r2cv <- madcv <- medcv <- NULL if(length(crossval)) { s <- sample(1:crossval, n, replace=TRUE) r2cv <- madcv <- medcv <- 0 for(j in 1:crossval) { g <- fcancor(X[s!=j,, drop=FALSE], Y[s!=j,, drop=FALSE]) bx <- g$xcoef by <- g$ycoef xbo <- matxv(X[s==j,, drop=FALSE], bx) ybo <- matxv(Y[s==j,, drop=FALSE], by) r2cv <- r2cv + cor(xbo, ybo)^2 puy <- if(need2getinv) { tyyb <- ytrans(yy, coef=by) ## keeping constant knots yinvb <- inverseFunction(yy, tyyb) yinvb(xbo, coef=by, what='sample') } else yinv(xbo, coef=by) er <- abs(y[s==j] - puy) madcv<- madcv + mean(er) medcv<- medcv + median(er) } r2cv <- r2cv / crossval madcv <- madcv / crossval medcv <- medcv / crossval } structure(list(y=y, x=x, ty=ty, tx=tx, rsquared=r2, rsquaredcv=r2cv, nk=nk, xdf=xdf, ydf=ydf, xcoefficients=xcof, ycoefficients=cof, xparms=xparms, yparms=yparms, xmeans=xmeans, ytrans=ytrans, yinv=yinv, linear.predictors=lp, residuals=res, xtype=xtype, ytype=ytype, yname=yname, r2boot=r2boot, r2opt=r2opt, mad=mad, madboot=madboot, madopt=madopt, med=med, medboot=medboot, medopt=medopt, madcv=madcv, medcv=medcv, xcov=covx, ycov=covy, xpxi=xpxi, n=n, m=nmiss, B=B, crossval=crossval), class='areg') } aregTran <- function(z, type, nk = length(parms), parms = NULL, functions = FALSE) { if(type=='l' || (type=='s' && nk==0)) return(if(functions) structure(as.matrix(z), fun =function(x,...) x, inversefun=function(x,...) x) else as.matrix(z)) if(type=='c') { n <- length(z) lp <- length(parms) ## Assume z is integer code if parms is given w <- if(lp) z else factor(z) x <- as.integer(w) if(!lp) parms <- 1 : max(x, na.rm=TRUE) z <- matrix(0, nrow=n, ncol=length(parms) - 1) z[cbind(1 : n, x - 1)] <- 1 attr(z, 'parms') <- if(lp)parms else levels(w) if(functions) { attr(z, 'fun') <- function(x, parms, coef) { if(length(parms) > length(coef)) coef <- c(0,coef) coef[-1] <- coef[-1] + coef[1] names(coef) <- parms coef[x] } formals(attr(z, 'fun')) <- list(x=integer(0), parms=parms, coef=numeric(0)) ## what is ignored; for compatibility with inverseFunction in Misc.s attr(z, 'inversefun') <- function(y, parms, coef, what=character(0)) { if(length(parms) > length(coef)) coef <- c(0, coef) isna <- is.na(y) y[isna] <- 0 x <- parms[whichClosest(c(coef[1], coef[1] + coef[-1]), y)] x[isna] <- NA x } formals(attr(z, 'inversefun')) <- list(y=numeric(0), parms=parms, coef=numeric(0), what=character(0)) } z } else { z <- rcspline.eval(z, knots=parms, nk=nk, inclx=TRUE) knots <- attr(z, 'knots') attr(z,'parms') <- knots if(functions) attr(z, 'fun') <- rcsplineFunction(knots) ## inverse function created later when coefficients available z } } predict.areg <- function(object, x, type=c('lp','fitted','x'), what=c('all','sample'), ...) { type <- match.arg(type) what <- match.arg(what) beta <- object$xcoefficients xparms <- object$xparms xtype <- object$xtype xdf <- object$xdf ybeta <- object$ycoefficients yinv <- object$yinv x <- as.matrix(x) p <- length(xdf) X <- matrix(NA, nrow=nrow(x), ncol=sum(xdf)) j <- 0 xnam <- names(xtype) for(i in 1:p) { w <- aregTran(x[,i], xtype[i], parms=xparms[[xnam[i]]]) m <- ncol(w) X[, (j + 1) : (j + m)] <- w j <- j + m } if(type == 'x') return(cbind(1, X)) xb <- matxv(X, beta) if(type=='fitted') yinv(xb, what=what, coef=ybeta) else xb } print.areg <- function(x, digits=4, ...) { xdata <- x[c('n', 'm', 'nk', 'rsquared', 'xtype', 'xdf', 'ytype', 'ydf')] xinfo <- data.frame(type=xdata$xtype, d.f.=xdata$xdf, row.names=names(xdata$xtype)) cat('\nN:', xdata$n, '\t', xdata$m, ' observations with NAs deleted.\n') cat('R^2: ', round(xdata$rsquared, 3), '\tnk: ', xdata$nk, '\tMean and Median |error|: ', format(x$mad, digits=digits), ', ', format(x$med, digits=digits), '\n\n', sep='') if(length(x$r2boot)) { x1 <- format(c(x$r2opt, x$madopt, x$medopt), digits=digits) x2 <- format(c(x$r2boot, x$madboot, x$medboot), digits=digits) n <- c('R^2', 'Mean |error|', 'Median |error|') d <- cbind('Bootstrap Estimates'=n, Optimism=x1, 'Optimism-corrected'=x2) row.names(d) <- rep('', 3) print(d, quote=FALSE, right=TRUE) } if(length(x$crossval)) { x1 <- format(c(x$rsquaredcv, x$madcv, x$medcv), digits=digits) n <- c('R^2', 'Mean |error|', 'Median |error|') d <- cbind(n, x1) dimnames(d) <- list(rep('',3), c(paste(x$crossval,'-fold Cross-validation',sep=''), 'Estimate')) cat('\n') print(d, quote=FALSE, right=TRUE) } cat('\n') print(xinfo) cat('\ny type:', xdata$ytype, '\td.f.:', xdata$ydf, '\n\n') invisible() } plot.areg <- function(x, whichx=1 : ncol(x$x), ...) { plot(x$y, x$ty, xlab=x$yname, ylab=paste('Transformed',x$yname)) r2 <- round(x$rsquared, 3) title(sub=bquote(R^2==.(r2)), adj=0) xdata <- x$x cn <- colnames(xdata) for(i in whichx) plot(xdata[,i], x$tx[,i], xlab=cn[i], ylab=paste('Transformed', cn[i]), ...) invisible() } Hmisc/R/show.pch.s0000644000176200001440000000354112266127133013441 0ustar liggesusersshow.pch <- function(object=par('font')) { plot(0,0,xlim=c(-1,11),ylim=c(0,26),type='n',axes=FALSE,xlab='',ylab='') j <- -1 for(i in 0:253) { if(i %% 25==0) { j <- j+1; k <- 26 } k <- k-1 points(j, k, pch=i, font=object) text(j+.45, k, i) } invisible() } character.table <- function(font=1) { ## Prints numeric equivalents to all latin characters ## Usage: graphsheet(orientation = "portrait") ## character.table() ## Print the resulting graphsheet. The printed version doesn't allways ## corresponds to the screen display. The character on line "xy" and column "z" ## of the table has code "xyz". ## These codes can be used as any other characters. e.g. ## title("\347\340 et \340") ## As the command line window of Splus can't print special characters ## cat("\347\340 et \340") ## will not print the special characters, at least under 4.5 and under 2000. ## ## Author: ## Pierre Joyet / Aktuariat pierre.joyet@bluewin.ch v <- 40:377 v <- v[v %% 100 < 80 & v %% 10 < 8] opar <- par(mar = c(5, 5, 4, 2) + 0.1, xpd=NA) plot(0:7, seq(4, 31, length = 8), type = "n", axes = FALSE, xlab = "", ylab = "") k <- 1 for(i in 4:31) for(j in 0:7) { text(j, 35 - i, eval(parse(text = paste("\"\\", v[k], "\"", sep = ""))), font = font) k <- k + 1 } text(0:7, rep(33, 7), as.character(0:7), font = 3) text(rep(-1, 28), 31:4, as.character(c(4:7, 10:17, 20:27, 30:37)), font = 3) par(opar) invisible() } show.col <- function(object=NULL) { plot(0,0,xlim=c(-1,10),ylim=c(0,10),type='n',axes=FALSE,xlab='',ylab='') j <- -1 for(i in 0:99) { if(i %% 10==0) { j <- j+1; k <- 10 } k <- k-1 points(j, k, pch=15, col=i, cex=3) text(j+.45, k, i) } invisible() } Hmisc/MD50000644000176200001440000005206014371140723011632 0ustar liggesusers16f8ea43efba90112351c5ecb33021c4 *COPYING b704fa1d479fd53b516a877a4b300e98 *DESCRIPTION 5aa8af7dc99f902a2be93674d36e6ea2 *NAMESPACE c4f9638ecada092b6b73794884395207 *NEWS 76f90acbe9bf8ad2eaaa6b8813eb7c82 *R/AFirst.lib.s 8fdbfa3b900cbbb42d94898093ec7fa5 *R/Cs.s 85683865508739ff7a19b280e562ab67 *R/GiniMd.s 7d0b0014572f85d46fa6a3417707344c *R/Key.s 10afe48276f47fe7b9cbeba8b151ee42 *R/Merge.r d760f061a1cfdcad79d25c6cdd0158e6 *R/Misc.s 081320dfb53e3362cfb0918952b60cce *R/R2Measures.r 9043da0ab6b8288636bb399cdb0f89bf *R/abs.error.pred.s bdf1cbb1952b1cdf53a296a6d29ecb08 *R/areg.s 146cbf7cb161fc73bef71599df0a3deb *R/aregImpute.s bf5dd5216d3d80cc6a01d0e82aff3750 *R/biVar.s b4b1cd12b90021d097455110506b80ff *R/binconf.s 4a8bfc99ba3dbc3823adb49523d6b070 *R/bootkm.s 12bf07383dcdc63a08469cf4311e50ff *R/bpower.s b8c95ecb00c8eee8d7712d77f7707b1d *R/bpplot.s bb0f5e114bad79ac6c8033ac0c8b74a6 *R/bystats.s 3dff999734ea32daa373b2950cceb72a *R/capitalize.s b9862f09ba4c92cd873521799fcf9d5f *R/ciapower.s f77dbf71580e9bf653fadd0c2f491f85 *R/cnvrt.coords.s 8415b600c989358e10497d6cc39146aa *R/combplotp.r 8c002470a851e0d14571cfda085e6c42 *R/confbar.s 6ac4ca97488c406e4f513cec9be4be27 *R/consolidate.s c04e31869fdc5107eb6af6886eadb566 *R/cpower.s bf85aff597a763e41e6a14d53359d30d *R/curveRep.s 74812a3a0e5ab1db9c0fb9082748d6e1 *R/cut2.s 25c501e1db065dacd0b7ebbc9918254e *R/data.frame.labelled.s 97dc5dcc48cb719088b817833067352c *R/dataRep.s ec8af558be91e1fa3415f1a99dd469b2 *R/dates.s 3f02d2486d14e095a96fe5ee10e483c7 *R/deff.s a181473ec89160b204ae28fe41a44de8 *R/describe.s 6cb5b3a77860b605f102bb528e84a071 *R/discrete.s 510723f9ba799696bf0196fc231e2ca2 *R/dotchart3.s 14ad428dacb6bed7013fdef6c05d9239 *R/dotchartpl.s 919e94c2c87f720601387c5171b57ffc *R/drawPlot.s 58c3f2f042601ffb8f889c0e2a16e635 *R/ecdf.s c1e489a07ca933fb02e004489fd3ee8e *R/epi.s bea6f791cf4519e57592b8f947ee6df7 *R/errbar.s 048a26c156cbf7588ef95b2d52418cc9 *R/event.chart.s f6f60ab400ea2df181948ccb799eccba *R/event.history.s 8f6279b9164a57be894cb2f12abb6ac6 *R/find.matches.s 87b8840dfbd4e58a508d74cb6e462e8a *R/fit.mult.impute.s c238614fb72943362407d74442bf236a *R/format.pval.s aace929824aad6ebdfba910950b6cc2b *R/ftu.s 473bbef2ffd42d37d059d9a4c66fe8a4 *R/gbayes.s 9af1cb80bca968cdd0bbc960b2100672 *R/gbayesSeqSim.r f76f66eae7faef0e445143d9e367619d *R/gettext.s 769b009b031f6f656a0e9b748057bfae *R/ggMisc.s 05a8ee8d5466b65585e5285ad046bf04 *R/ggfreqScatter.r 8dc06e7b35605c27a74fb3c3b6ab1681 *R/ggplotlyr.r b70800bb3730a5b84500d43e97d951f4 *R/groupn.s 926950a356c2e54ecd8b2317ec08a5d8 *R/hidingTOC.r 29cf6d5ee0c8444cb58ed0afb85ac57b *R/hist.data.frame.s 6d786e3f436d4e577c92132d3daf5282 *R/histSpikeg.s e79e72a4b25a7e30d32caf6b7a4b55c6 *R/histbackback.s 3ba3c85100e05c090c8caec1d628a05e *R/hoeffd.s 8e0a609468cd1506b2a506ba5bdca8e2 *R/html.s 1759e437814916630a47a0128ae1fd83 *R/impute.s 2d992948ec2ad38aa2c778882fd65e72 *R/in.operator.s e1bbabaa533c22a5378d1a4b645a80d2 *R/is.present.s 8cf9e0a10997c59a5be3f19a39c73684 *R/james.stein.s 8015b8ce84b2cc4686fca6ebba40e916 *R/labcurve.s bb1e0963d40b2a6df05f9277484c5ebb *R/label.s 526e1c9a614e4914908b00f675466117 *R/latex.s 4e31dcfc293ca68d459764b5d32515f3 *R/latexCheckOptions.r dda324419c115accb95440b58d422d43 *R/latexDotchart.s b160964c8423acea603c95a04c791084 *R/latexTabular.s 67c8a6450a4ead25d7fe5a7944a40d12 *R/latexTherm.s f15937a81b1e2a57a9707f5fbdd6c878 *R/list.tree.s ce608a6c653ec8c6fe828448f9a78eb6 *R/mApply.s d62470527f34181cace3f8935e4e35ea *R/mChoice.s e21c3cbb9b9c17b07223d4f805433395 *R/makeNstr.s e5f0eef89d954522d56fb5399066a6d3 *R/mask.s f4cc097babcda3c1edcd60f516713ff2 *R/matxv.s 694c248a54fc62c71172d6f02157b69a *R/mdb.get.s 4148365f1c827715d531c4e642846410 *R/minor.tick.s 96eba865bc3dde5e1521d79e91e21112 *R/misc.get.s 32a369e847b95005134970ac15b4ba73 *R/model.frame.default.s fc71dbaeeb57983dcfa3ac0497ed837b *R/mtitle.s 58635aedc770ba6ff0aa6bdc0e47729e *R/multLines.s d5a7dc64dd1472daca476f2fca4440d4 *R/na.delete.s 1f3488410c5f3721d93a23bf22e9a479 *R/na.detail.response.s dd4806066d614756cd8be2bef6bad3dd *R/na.keep.s 3d905a724130e0e181a9d00ecd45b195 *R/na.pattern.s 1ada10fb83652630cf67024c3f1b421c *R/nobsY.s 27019e9e90730ac0b25014b51d68ec66 *R/nstr.s cf2866606281d01f39e99d3205f50ae3 *R/num.intercepts.s a50c8048a7546e224ca402d362c084cd *R/pairUpDiff.r 215f9a06ffc6df02fb67576ad0da83b9 *R/panel.abwplot.s 9ce7ce9404ee0f280375b12c7b70019f *R/panel.bpplot.s 1630c1be85251dac0e8cd0243bedd201 *R/pc1.s 98d59510ae9b6125f404f484281f94bf *R/plot.describe.s 7afa7237d6203acbecd4d1b32f40a8c4 *R/plotCorrM.r 7768f312916e3d893a0b97896dc0d253 *R/plotlyM.r d28a72c30811b9c15cf028926de3b6ee *R/plsmo.s abee077f086dd6b61303a1fb0edc3960 *R/popower.s 9a1119c7949407c39968645e921a87a6 *R/print.char.list.s efe9a9a6c90594d1e7425cc46f2fc061 *R/pstamp.s 620401a9617131be8b2d162fd0466ff7 *R/rcorr.cens.s b9759f7459856f6eeb85e4ee3832f7ad *R/rcorr.s 0c9a682b270e97639d2f7d06c18aaa7f *R/rcorrp.cens.s 62264d939e616b9d1dc7070096b679e5 *R/rcspline.eval.s 2646dc6ac220d4414e5ac7b3509a4ad3 *R/rcspline.plot.s 42daef400a52ac984cb164ed67d35632 *R/rcspline.restate.s 1aa8a7124aa396e467f86a42f6651a53 *R/reShape.s d6669c123f34a1c754b9f0e8889390ca *R/redun.s 3314865263ea474380ecbbb73a675601 *R/reformM.r 41b180f15250bedd222f2c334d13eb1f *R/regexpEscape.s 8c57a7bf3f50a9288576ff9830cb5201 *R/responseSummary.s 3978f2ee3511549cb906af4618024a53 *R/rm.boot.s a1763e6429f086dd1783694da9b85342 *R/samplesize.bin.s d587d8ed7aee1aa12994f088da565dd8 *R/sas.get.s 98f6635f10a2262265dd52bd6ce64e4d *R/scat1d.s bcd4fb26ff4be0623165a02d1f874733 *R/score.binary.s 00d6b8dec0c5d0055a706e117ec4e373 *R/sedit.s f361dab6e800a20722ecc51bc08e056d *R/show.pch.s bfd1c58a14b5451fad603d2a20af2d47 *R/showPsfrag.s 349c683e96262eece28ee6110f85b828 *R/simMarkovOrd.r 525a4048d60605a8de57a4db1b684271 *R/solvet.s e2e1ee6a263cca9a9f1036cee25dfc69 *R/somers2.s 06ee4823f5d18fa93e99635bdf21a349 *R/spearman.test.s f1a12daf37ad6aed457fd3141379e0ed *R/spower.s a041b2aeb6c6af8ee47770b504538849 *R/src.s e56d15e9af7cdfa63bbbde558b7dec94 *R/stat-plsmo.r b47544f6b133f394f2fda329de60f733 *R/strgraphwrap.s 68cec90eb089d7d3334bcd9144965022 *R/string.break.line.s b42e6d7953adef86625842fa4bdbe291 *R/strwrap.s 055ef617ac4b0362b6466e6436ba69fb *R/subplot.s 14e533b635192fae8b28ca81b9c86f5d *R/substi.s e07fc01209369b3132178c5f5c055220 *R/summary.formula.s a2bba39104ecb43307450903b8132dd0 *R/summaryM.s f4e9b0484893f71186d0e527cfcab4b8 *R/summaryP.s dc27150ba262b8c3844ce46d36f1e492 *R/summaryRc.s 0ce190af103865fe51ba98c7bc4dab9d *R/summaryS.s 0af65ac91b7c04b13aac7791bdf758af *R/symbol.freq.s 474107a89a3a998eb78bbee1ac2dfdbe *R/sys.s 0917e77a727d026b5b12511b7710b283 *R/t.test.cluster.s eb87b4f621bfa787f568ceca584f9240 *R/tabulr.s c1dba69a612fd6fb121adb946a77596e *R/tex.s 2b30ea0d0be2d1eca4d07d2bcd1cc2aa *R/transace.s d1bfd0b35c0217dfd3b3d9b96929142b *R/transcan.s 847daf7fa4e67d6641c763a7873df93c *R/translate.s 91c82b7b8e9ad8cff084bf7b2069d11b *R/units.s 9cc4a154ada63f71e4eb19c38453e445 *R/upData.s ec3a0500a5d1afab5b53b052532dcc40 *R/upFirst.s 25b2e4d51ba3475a3e2bd22da07305ad *R/valueTags.s 99c5e1c368fea8433ac10a209081c5f2 *R/varclus.s f944b2ef6a72dde4e6441b75a51f4405 *R/wtd.stats.s 073c859622050305c189686d7178179b *R/xYplot.s 80493b82ba05f6c43b10f09df59d83c7 *R/xtfrm.labelled.s b72b2ab287d4c1af9d741022300a18bb *R/xy.group.s 1a0242a28b648a6eb2414ba6c86fa8d6 *R/ynbind.s 4a9bc5cb2318131f4f6a177965555ef1 *R/zoom.s 97a468f9be30b70f02ac809ce2fe8e17 *README.md 357fee92fdddd451d253d7a50836b510 *inst/CHANGELOG ba03163ea3c1f22c4a26a91477140822 *inst/THANKS d41d8cd98f00b204e9800998ecf8427e *inst/WISHLIST 68166d17f3525cc6d5ea99fc6a8ba83c *inst/tests/Ecdf.r 0b3a69351a7c10b16eca69a9a713fa9e *inst/tests/ace.s bf11d0979d077c25d7510b289aa20a37 *inst/tests/american-medical-association.csl 99a90bcb308d082f2b615822d477a47f *inst/tests/areg.s b4ca43805f7aa45aae3d146ef8abf22e *inst/tests/aregImpute.r 34700afdaa45acbbc7cfb598163d9c95 *inst/tests/aregImpute2.r 0b73b76245a462191695af917f104d7d *inst/tests/aregImpute3.r 2c2ca2a3c60ff347a101d41002c775e4 *inst/tests/aregImpute4.r 9657557ce6cb47ed62437e2971a02f90 *inst/tests/aregImpute5.r dc5f8faa672401adafaddd97d901a1f9 *inst/tests/bootkm.r 8c71471c170cd4b7f0bee73fca3055fe *inst/tests/consolidate.R 40313999a2d82fb8ffac5f8c2306ceaa *inst/tests/csv/FORMAT.csv 5fee907037836b5e42e0f163e608325f *inst/tests/csv/TEST.csv 0f4e7bbe74214f3fb0c6faca2ca4d423 *inst/tests/csv/_contents_.csv aa63c7d2c13113604223850ab3fe03e3 *inst/tests/cut2.r 83ef2f75fdba346104cf289db2f49c88 *inst/tests/dataframeReduce.r 24eb3c278840a4685edec5b605303db5 *inst/tests/describe.Rmd 8b12f0007761ac5ddd36ae01a1d55516 *inst/tests/describe2.r 176099d2964cc0b17203a1788106b31a *inst/tests/dotchartpl.r 76c83d1d4b392236094201486a8a135f *inst/tests/examples.Rmd cfd5ed703e86daf89c13c7c144666c2f *inst/tests/fit.mult.impute.bootstrap.r 44047c6260fc023709d57056d73c99c6 *inst/tests/fit.mult.impute.r 75b79535abd995c84089e44cbe02d37a *inst/tests/gbayes.r 15175f29e455ff23d155ef75a6a69696 *inst/tests/histSpike.r ce584821d9700b8f2f191180add2266e *inst/tests/histSpikeg.r 4561d42b56f8d606f94c7a85083aefd9 *inst/tests/hoeff.r fb5a32fba03003505f33815e5152dce6 *inst/tests/howto.html ab7e1b44e7a9d4746fea7465df1316fb *inst/tests/html-summaryM.r 167427cf5e418668ba98a7f8e2273837 *inst/tests/html.data.frame.r 5f2b069b6eaa4976089078925e3f041e *inst/tests/html.summaryM.Rmd 870d95c269c397dc01a0b91dadfad788 *inst/tests/inverseFunction.r 4dd4b3b76ef12c7e6dea7dc633f35029 *inst/tests/label.r 573465e16901912adb87c138b62056c6 *inst/tests/largest.empty.r 121746a15a8dbc8ad174c7f082990131 *inst/tests/latex-color.r 30c299061f543ca648113ab0269dbc74 *inst/tests/latex-html.Rmd 5aea9e63b75e5610cb0706d6485ba558 *inst/tests/latex.s 8f030d4ee7bc5ac946ca210bcfc53c7b *inst/tests/latex.summaryM.Rnw a40f892874e68edf0c621971b200260d *inst/tests/latexTabular.r 86bb29e3df2c4cc1dab74549971ca150 *inst/tests/latexTherm.Rnw 2a119079fd599b3fd6f64b6f579de087 *inst/tests/latexTherm.r b0fc6d4900270c13a20eafd31293bc9c *inst/tests/latexpng.r 0c6b7314112171894df7511e12398f05 *inst/tests/mChoice.r c0668a4323dcc6d83736668c00456d23 *inst/tests/minor.tick.r dc25c71a732941ce1d970f8585068a6b *inst/tests/panelbp.r 39662c6cbeab67ca1544cb736ea64f70 *inst/tests/plot.summaryM.plotly.r badcf6388bfae0071da3c10a2b77bac6 *inst/tests/procmeans.txt 8b5ae36bde7c6fc08b577f34ebea9268 *inst/tests/rcspline.plot.r 4cec1c4f2df5dab4ab8118a0c5211e14 *inst/tests/redun.r 13e057a458cffb13b2134e4dd523c770 *inst/tests/simRegOrd.r c17621d7759320c355a948272a8f587d *inst/tests/summary.formula.r 93d6cc6040ff9451c6559b41b539a76e *inst/tests/summary.formula.response.stratify.r a7581889e540f5b72b7ff5fbe82d2652 *inst/tests/summaryD.r b10dd45fb4fb16a077ef138bf0aef253 *inst/tests/summaryM-customtest.r 66adedb7df95fdd93fd9af5c426829e7 *inst/tests/summaryP.r c67457e5755b3049b79b3d6ee6fc2dc3 *inst/tests/summaryP2.r 1c451c58093a60464ddab77a55f034dd *inst/tests/summaryRc.r 572684d42f7e070fd1e4f639207c2bc7 *inst/tests/summaryS.r 1da8f0758cb0688baf63336d8073785a *inst/tests/summarySp.r b36692d5996f652cf69c36533bda7848 *inst/tests/test.rda 16fb6f4d0b48d6ef354c2f493170239d *inst/tests/test.sas be545aff97891d7a2d7a89060601d639 *inst/tests/test.xml 907f2fb5b395929aa67db43f7d3488de *inst/tests/test.xpt f7ecdf35cbda3600541a5a24b2fef03c *inst/tests/test2.xpt 7d8799b50e0922e62f230d1d9d6f619e *inst/tests/testexportlib.r 52c79beb3d98d36025262007f39d5884 *inst/tests/testexportlib.sas 31d1d268acf0f7e75a700c660f4ac731 *inst/tests/upData.r e1646179a6487ec47f043ba1125a5438 *inst/tests/wtd.r 93023f35f678f7674a067e4ac72c4cb1 *inst/tests/xYplotFilledBands.r 472d787fa8a7f0b5d1ac8dee9d866d22 *inst/todo 7e9e0d3131e1e1fc0e423359e1c829f6 *man/Cs.Rd efef488600d2c69b9dc68652c9e03597 *man/Ecdf.Rd ac4fff37fde2e6d0feae41ceda669399 *man/GiniMd.Rd 2b084c69d1db718784be029130560d30 *man/Hmisc-internal.Rd a8d3213ffdeb74c3f50bb2d54a8b9ab5 *man/Lag.Rd 352fb4122854747013fc8d3c96ddb360 *man/Merge.Rd eefc844ac2a73ffa305dd08206f823a2 *man/Misc.Rd c51df7edc25fd92fdd603738b777b384 *man/Overview.Rd e3273cfa3aa9e69c415b4ebcb10ada71 *man/R2Measures.Rd 71a62f91bcd26f7cbfa9a2755bed0830 *man/Save.Rd 79ed6280942247ec7b4580638c1cc885 *man/abs.error.pred.Rd 6ed0de6a1ac299e867fea5c42fd49024 *man/addMarginal.Rd 77a208cf321e5241f5e11e4dbb6d101e *man/all.is.numeric.Rd 89bde30f0578686f86512ffe08e7e4be *man/approxExtrap.Rd 03eed851f558773c200c8e2e09c9fff3 *man/areg.Rd 162611103783a62d004aa9af0666690a *man/aregImpute.Rd b6f41ce5607b746fe5cf9f80601c363e *man/biVar.Rd c6b61f5f8a001f8748635bde7ff92fb8 *man/binconf.Rd 0c75b817988c321cb06fefeb96fd4096 *man/bootkm.Rd 28f20fdd8e870711327dfa3cda995e4b *man/bpower.Rd 973831b376ae049e13f0bff466ed37c1 *man/bpplot.Rd 629c6b8dd873c22e8608185abd67046f *man/bystats.Rd 5cba3f43942a908ce5fef1004cf2f857 *man/capitalize.Rd 8130fe8c287419fe064d47c079250aaf *man/ciapower.Rd 4202bd798ad52004eec93d91b0a8d945 *man/cnvrt.coords.Rd 59655083adf88a21c7eab791e972fd0b *man/combplotp.Rd 00db76a86189178fe8385d5aaeda346b *man/consolidate.Rd 5821a351aae1dff1a760c955846628a2 *man/contents.Rd fb3224eb96702359c56fa180ab230562 *man/cpower.Rd 5673d004d5398e681cc11521217a38ca *man/csv.get.Rd f9f1ff7399b4801bca280156a77aec01 *man/curveRep.Rd bf04215ba59da1200b2295fa160d6205 *man/cut2.Rd 3eb4090f0c25a33dcf3452294ca80ec1 *man/data.frame.create.modify.check.Rd 30aa19bbc67a730bfe65d71ecd473bb4 *man/dataRep.Rd 1df7d66a5fcdc97254d4e70a8d487af4 *man/deff.Rd ea26141f45470acc67108da4b46deffb *man/describe.Rd 4c2942aab548faa49c5a389117b48b70 *man/discrete.Rd 94748a0c9bb7d0290a8b2dc7f4a0bb7d *man/dotchart2.Rd 67f28b3925d62e99e612e36532d3149f *man/dotchart3.Rd 2240c79711c6ab7bc4d204483d3d8258 *man/dotchartpl.Rd 553f6215baa4cad0ac3da850a3cbc2d2 *man/epi.Rd 36c0c98e4e6fcc78979da741c8dfb386 *man/equalBins.Rd 04771822c60d38658c3720cd91bc2113 *man/errbar.Rd 3bc08c67bb196770cb9ff90bdfa15672 *man/escapeRegex.Rd 50ba2202ecd33f8244ac838560286b93 *man/estSeqMarkovOrd.Rd a6b8bff2b02dda6ebe943bd8b2044d65 *man/estSeqSim.Rd b8c947ebb9ea63a9ecf543e9248a512b *man/event.chart.Rd 6f9b356b12e05d93c091e033a7d3f4a8 *man/event.convert.Rd 1beee8ab5e6bde656f0daaf7dfe1cbc5 *man/event.history.Rd c555b6a1bf5ca4c3bf01d93949dc890a *man/find.matches.Rd 9e22ca4e987f9f93b87b832b7a95d503 *man/first.word.Rd 09eb3bb9c47c3c7862fbc437083aab7d *man/format.df.Rd 041f76e1a74ad1e2361aab071b496dac *man/format.pval.Rd ebdd21800ffc4beea994eaac7d5b4668 *man/gbayes.Rd 47920142f9583c30c3b4bbd60d9a3035 *man/gbayesSeqSim.Rd bd9c601a3778a3bc502d85fa39dcb058 *man/getHdata.Rd 2e3625a44c067d7004d304ca7940e9f6 *man/getRs.Rd c10b701ac0c5fbf3a3a3654378694a1c *man/getZip.Rd ac60ee276185df5b27c0819a3e781bae *man/ggMisc.Rd da13083c7f23c4a36a1b49ccfc1e87e3 *man/ggfreqScatter.Rd 988163ff858c2a7b23ed89d50e056403 *man/ggplotlyr.Rd bd30d6597a6591b2d3b6202931502245 *man/hdquantile.Rd 3ad9255f40b316ad3c4349c842daad6a *man/hidingTOC.Rd 8256dd2a02e789ad86ec974fa90420cd *man/hist.data.frame.Rd 901875cc993a500f248fb22af00183d4 *man/histbackback.Rd 450305dcabbc06b9fb94f85d4dd4dbb0 *man/histboxp.Rd d868cbfad7aa949619a278f5ec1bd96f *man/hoeffd.Rd 26e4034aa830e891d14757190395804c *man/html.Rd d8856cc1665185e0a13d032e05a7ce67 *man/impute.Rd b785dbffdb86727e8650cf7e19df0377 *man/intMarkovOrd.Rd 396c791f149027eeafaa9515e93d3d27 *man/knitrSet.Rd 8fcd85e7402db30d54a27e1222355606 *man/labcurve.Rd af738949158db99a2448cb48d3b310d3 *man/label.Rd bebcb911d2f805ec0079cf0d7e4c02c7 *man/latex.Rd 9c1dfa5242426105b1ca4669b0977997 *man/latexCheckOptions.Rd c4a248262270a156d9723212daba90ff *man/latexDotchart.Rd 32f49f9726f15c0cae64179b3df74f20 *man/latexTabular.Rd 26968e20d36b253fb7e88f2902f33ec5 *man/latexTherm.Rd e110de27ad37decbe96987691beb7555 *man/legendfunctions.Rd 3e0817d289b96a7d59847a1144f5fa59 *man/list.tree.Rd a327d909648492c3e5b524888e27c35a *man/mApply.Rd f66d70a00d4ac8c6e69a916c41a23871 *man/mChoice.Rd c2c3d7fd7b6118a5480308fcacb57afa *man/makeNstr.Rd 1f4cb5e4cb669550dc4aca2745280286 *man/mdb.get.Rd ad5ca5884567e50edeba874e3f6e0566 *man/mgp.axis.Rd 9d0b72af8d419f9a78263abf388a339f *man/minor.tick.Rd 7e82c634be90a9db0751e0b9ec78778a *man/mtitle.Rd a6c96e43927dc9a895b515d69b8a4346 *man/multLines.Rd e63a1b60bde7eda1bef5e0c07d88b9b3 *man/na.delete.Rd bb9292b59a6e821c1ac815161d2e1196 *man/na.detail.response.Rd 8de167281758448213ff5b4f6052973c *man/na.keep.Rd f9eb6868ac6e099103d654c1c1084b21 *man/nin.Rd be66aaec2a64401335915c4671d50918 *man/nobsY.Rd 2f7e82883fd0be5a86a3bec4d4dc250a *man/nstr.Rd 1d560c844c38c8f7289bb27e21efe171 *man/num.intercepts.Rd 76b1975307ab89cc2eb19f12040342ce *man/pairUpDiff.Rd ab8edbb03663977e40ac2ffa3025d39f *man/panel.bpplot.Rd b8247448b0c2c6d876ce693f23c9fe85 *man/partition.Rd b34470017ff70767108ad8ee9e169f02 *man/pc1.Rd fd3bfeefd15a79c301f02a7c4631f8d1 *man/plotCorrM.Rd a3aed45d350c39121d61700f94e859cb *man/plotCorrPrecision.Rd 88d93c5117d432cbe4ba996c5c74d0d8 *man/plotlyM.Rd 81937c2b724d0ec42f817ebb88748e70 *man/plsmo.Rd a05ef4564d6773ed63e6430f9d52eae7 *man/popower.Rd 52be83076b1f0442cb5ab5c15bb16a3f *man/print.char.list.Rd 0a25e1c6f349789159b7239ae0f0f620 *man/print.char.matrix.Rd 057da98d466a023bda3991abbf880855 *man/prnz.Rd 7cc6decd6cb488bf6ef52ea36a1a349f *man/prselect.Rd 260137fa18118deb3cf8f5e8cd09801f *man/pstamp.Rd 6595b75b4809474bad712063e08bb0ff *man/rMultinom.Rd 888b130ec7fd2abd84cd5475170a0814 *man/rcorr.Rd 8e50a363db2787b941bcdb97b1baa7ba *man/rcorr.cens.Rd a32fc84fb8d9077fd571a31ed83e08c8 *man/rcorrp.cens.Rd 0e09013825fbf352d7b6492af66a2485 *man/rcspline.eval.Rd eb2cef4c4191b07031d867baf0adfe61 *man/rcspline.plot.Rd 6857f9694fede7b9cf3986f75e45428e *man/rcspline.restate.Rd fa12a16c134db85d6d6afd1f95a70cb2 *man/reShape.Rd 3ea575526896f5376dd9542cf738366d *man/redun.Rd 0312ac50dd86a78be90a5e834b59aaad *man/rlegend.Rd c6f75db89c6f3084e365810ac285e9a1 *man/rm.boot.Rd 326928a75e512ba40912a48fc355934e *man/samplesize.bin.Rd 89c4eefbaac065e35e5becf8f6e5319d *man/sasxport.get.Rd 7e3e2a7b07c5276af20fdf9a25a3d643 *man/scat1d.Rd db05fb7a0b4ab6f2e3296104ab70dca1 *man/score.binary.Rd 08744258b9fd7ca14e1b32ba62ca245f *man/sedit.Rd 8dbcd26d8e34394832eaa6c2f32931a3 *man/show.pch.Rd 053929c241c0d69a15198bfe23c22ec5 *man/showPsfrag.Rd 89f7962d3a2998760a8ec9a1025a08a8 *man/simMarkovOrd.Rd 9a8ed340a60f8fde016acc6b361a2044 *man/simRegOrd.Rd c63b29fcce92356968e66f1c66cd9bcd *man/simplifyDims.Rd baf8dcee242a5c39c4e6cddfc11b71ad *man/smean.sd.Rd 32aeaf16b41f0b5cc1f9552b0e8946ff *man/solvet.Rd bf3f45e963edf51b7d01878ec1b534c6 *man/somers2.Rd 7a28776c28e986b906cf041f07b4ad74 *man/soprobMarkovOrd.Rd 40c56a903db1c8731d357d519c2a7e6d *man/soprobMarkovOrdm.Rd 3c8807018f080566a3315b896b562f59 *man/spower.Rd bcf38fffb93e2739eebe0e21ca00e070 *man/spss.get.Rd d63d4d4bd78f7c6e545bc226dfd58481 *man/src.Rd b5466f303b02d40e728a1e74af5fac0f *man/stat_plsmo.Rd ac3599e852460b9a3f9c1c656b01c2f8 *man/stata.get.Rd 8c73fc252d781ce3fdaee10eade4126b *man/string.bounding.box.Rd 1bbde6f65443df855afb233c9705f423 *man/string.break.line.Rd 9a4cac30cfc22acf2afa062a30b62d9c *man/stringDims.Rd bbd0b4524bd9a4e350d6a7ad4aca3a0a *man/subplot.Rd 6181f806d9bb937ccc0894d4e079378c *man/summarize.Rd 954f0d9af2d6644d7b04736836e71fda *man/summary.formula.Rd f545fe56c301c81a23e81cc2034a4561 *man/summaryM.Rd 0b38201723a49b1bb44dce87f9d21b4c *man/summaryP.Rd ad31c9e52f4fc0991357792fb6395c60 *man/summaryRc.Rd e7824f82025464f03a7f880926417b55 *man/summaryS.Rd d9e66a07144bec4e7365da937119b5ea *man/symbol.freq.Rd 9f6e09f2d3a2ef434384999e071f4755 *man/sys.Rd 60faf1203ecb620797e8ce494ed1c0a6 *man/t.test.cluster.Rd 8fa160cbee216dbf5cb95959f4fe485e *man/tabulr.Rd ec1a45cd81fe0bb55b45eb5b0ead1e73 *man/tex.Rd 9d6fb3b837750d773910f9f4fd4fb602 *man/transace.Rd 59a5e9fa0d130c1532f9c8c86f17a3da *man/transcan.Rd 7e9eae90e12d52f637f46ce78bfc0c53 *man/translate.Rd 76093ee37d83f9e13c9be04fbaaca9bd *man/trunc.POSIXt.Rd 87253a989c5cea4bfaf4eb6bae0f9dbd *man/units.Rd 4a2c2dedc27b7f7b12110666e8f1108f *man/unix/sas.get.Rd b3e69d575202b5b86b8f3028bd7b18a0 *man/upData.Rd ff471a5b0ea40672a9f38901fcc4129d *man/upFirst.Rd 26c8160a0663c12504eb2e33b6118c45 *man/valueTags.Rd 182ce7ab3feb2d771f75e975f90bc0b2 *man/varclus.Rd 1f444122b5efaa46d465e55c2c93e7b9 *man/windows/sas.get.Rd 583a620471e3172d5d4cc3838a65b957 *man/wtd.stats.Rd c61329c4f1839b918ff74ca471e41063 *man/xYplot.Rd 13c3f8a542f16076e346f9d6cc7904c3 *man/xtfrm.labelled.Rd 83b8f2732a6c378785187242ff93b26a *man/xy.group.Rd c63b3fa2193829f494e39d8cd557cd83 *man/yearDays.Rd c87ff598535f37eaf4c78f044e722b74 *man/ynbind.Rd 8360e406703130bb09302a64a9638860 *src/Hmisc.c a8395ba413d746154d7e89ce07e1e11b *src/Hmisc.h 06d9b6e7e6a7e22898fc0134691889b3 *src/cidxcn.f ac182f4144cfd7853f252be1e4b3d75b *src/cidxcp.f 08ccfbf5a4efcc18c629a8f91abc6b78 *src/hoeffd.f 781a196766abf3ebddc10bc658ff22a2 *src/init.c d6bc8b5ebd92a27f1c9e3522d46fa466 *src/jacklins.f 10538b0cf98c499976a5bb6d74a5195e *src/largrec.f 2d0aa4a90313d361f628383dd5fcf7fb *src/mChoice.c 703f73252fe6efaf8244b0713aaabe6b *src/maxempr.f 69d3a08b2cd911161ba22af68f97f611 *src/nstr.c 5fd3cecbf580b3cb32f365f1f9036794 *src/ranksort.c df53032c14b9c8a41bfe9529411c7729 *src/ratfor/cidxcn.r ebb3e56a96c8678a2e3184d176af9926 *src/ratfor/cidxcp.r 4549893e527bae0e3140b29579c74720 *src/ratfor/hoeffd.r eec5235344d56eaaaff52805e71fdf30 *src/ratfor/jacklins.r daae316a69a5b013bdf334c4302725cd *src/ratfor/maxempr.r 29402d28cea92baf7631bc7ba15be2d2 *src/ratfor/rcorr.r 1e330cf030042a8aef488a254a6b1074 *src/ratfor/wclosest.r def0335461e456fdd3d966856a7a781c *src/rcorr.f 32839fee8ce62db43ef9b527256a8737 *src/sas/exportlib.sas 6391fc48464e9b488b3626eaacbe967d *src/string_box.c 46912e086e55a7d273443ff5a6e37bde *src/wclosest.f Hmisc/inst/0000755000176200001440000000000013555351153012300 5ustar liggesusersHmisc/inst/THANKS0000644000176200001440000000014412243661443013211 0ustar liggesusersGreg Snow for providing the subplot function and documentations Greg Snow, Ph.D. greg.snow@ihc.com Hmisc/inst/WISHLIST0000644000176200001440000000000012243661443013456 0ustar liggesusersHmisc/inst/todo0000644000176200001440000000025012243661443013164 0ustar liggesusersMake latex use options(latexcmd, dvipscmd) See if R mailbox has generalization of var.inner in model.frame.default.s Check arguments to .C("loess_raw") in wtd.stats.sHmisc/inst/tests/0000755000176200001440000000000014370454650013444 5ustar liggesusersHmisc/inst/tests/summaryD.r0000644000176200001440000000242112773301231015416 0ustar liggesusersrequire(Hmisc) set.seed(135) maj <- factor(c(rep('North',13),rep('South',13))) g <- paste('Category',rep(letters[1:13],2)) n <- sample(1:15000, 26, replace=TRUE) y1 <- runif(26) y2 <- pmax(0, y1 - runif(26, 0, .1)) png('/tmp/summaryD.png', width=550, height=800) spar(mfrow=c(3,2)) f <- function(x) sprintf('%4.2f', x) summaryD(y1 ~ maj + g, xlab='Mean', auxtitle='', fmtvals=f) summaryD(y1 ~ maj + g, groupsummary=FALSE) summaryD(y1 ~ g, fmtvals=f, auxtitle='') Y <- cbind(y1, y2) summaryD(Y ~ maj + g, fun=function(y) y[1,], symbol=c(1,17)) rlegend(.1, 26, c('y1','y2'), pch=c(1,17)) summaryD(y1 ~ maj, fun=function(y) c(Mean=mean(y), n=length(y)), auxvar='n') dev.off() # options(grType='plotly') sym <- if(grType() == 'plotly') c('circle', 'line-ns-open') else c(21, 3) h <- function(x) c(mean=mean(x), Q1=unname(quantile(x, .25)), Q3=unname(quantile(x, .75)), N=length(x)) summaryD(Y ~ maj + g, fun=h, auxvar='N', symbol=sym[c(1,2,2)], col=colorspace::rainbow_hcl(2)[c(1,2,2)], legendgroup=c('Mean', 'Quartiles', 'Quartiles')) png('/tmp/summaryD2.png', width=300, height=100) # Or: pdf('/tmp/z.pdf', width=3.5, height=1.25) spar() summaryD(y1 ~ maj, fmtvals=function(x) round(x,4), xlab=labelPlotmath('Velocity', 'm/s')) dev.off() Hmisc/inst/tests/testexportlib.r0000644000176200001440000000006612243661443016536 0ustar liggesuserslibrary(Hmisc) d <- sasxport.get('csv', method='csv') Hmisc/inst/tests/bootkm.r0000644000176200001440000000041612710664056015122 0ustar liggesusersrequire(Hmisc) yrs <- runif(30, 0, 10) ev <- sample(0:1, 30, TRUE) w <- bootkm(Surv(yrs, ev), times=5) describe(w) quantile(w, c(.025, .975)) # Try with only 2 events ev <- c(1, 1, rep(0, 28)) w <- bootkm(Surv(yrs, ev), times=5) describe(w) quantile(w, c(.025, .975)) Hmisc/inst/tests/aregImpute4.r0000644000176200001440000000031712556520362016014 0ustar liggesusersrequire(rms) set.seed(1) a <- runif(100) b <- factor(sample(c('a','b','c'), 100, TRUE)) b[10] <- NA d <- data.frame(a, b) x <- aregImpute(~ a + b, data=d) x$imputed$b fit.mult.impute(a ~ b, ols, x, data=d) Hmisc/inst/tests/histSpike.r0000644000176200001440000000045012442314330015555 0ustar liggesusersrequire(Hmisc) set.seed(1) x <- seq(0, 1, by=0.01) y1 <- x + x^2 y2 <- x - 2*x^3 + 1 png('/tmp/z.png') plot(x, y1, type='l') lines(x, y2) xs <- rnorm(1000, .3, .3) xs2 <- rnorm(1000, .7, .3) histSpike(xs, curve=list(x=x, y=y1), add=TRUE) histSpike(xs2, curve=list(x=x, y=y2), add=TRUE) dev.off() Hmisc/inst/tests/aregImpute2.r0000644000176200001440000000657612700121622016012 0ustar liggesuserslibrary(rms) set.seed(4) n <- c(20000,2000,200)[1] x2 <- rnorm(n) x1 <- sqrt(.5) * x2 + rnorm(n, sd=sqrt(1-.5)) y <- 1 * x1 + 1 * x2 + rnorm(n) type <- c('mcar','mar.x2')[2] x1m <- if(type=='mcar') ifelse(runif(n) < .5, x1, NA) else ifelse(rnorm(n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing coef(ols(y ~ x1+x2)) coef(ols(y ~ x1m + x2)) Ecdf(x1) Ecdf(x1m, lty=2, add=TRUE) Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE) plot(x2, x1m) plsmo(x2, is.na(x1m), datadensity=TRUE) dd <- datadist(x2,x1m) options(datadist='dd') f <- lrm(is.na(x1m) ~ rcs(x2,4)) plot(Predict(f, x2, fun=plogis)) d <- data.frame(x1,x1m,x2,y) # Find best-validating (in terms of bootstrap R^2) value of nk g <- aregImpute(~ y + x1m + x2, nk=c(0,3:5), data=d) g # nk=0 is best with respect to mean and median absolute deviations # Another good model is one that forces the target variable (x1m) to # be transformed linearly using tlinear=TRUE g <- aregImpute(~y + x1m + x2, nk=0, n.impute=5, data=d, pr=F, type=c('pmm','regression')[1], plotTrans=FALSE) s <- is.na(x1m) c(mean(g$imputed$x1), mean(x1[s])) ix1 <- g$imputed$x1[,5] x1i <- x1m x1i[s] <- ix1 rcorr(cbind(x1,x2,y)[s,]) rcorr(cbind(x1i,x2,y)[s,]) # allowing x1 to be nonlinearly transformed seems to increase the # correlation between imputed x1 and x2 and imputed x1 and y, # in addition to variance of imputed values increasing f <- fit.mult.impute(y ~ x1m + x2, ols, xtrans=g, data=d, pr=F) coef(f) g2 <- g g1 <- g plot(g1) # Ecdf(g2, add=TRUE, col='blue') ?? # For MARx2, pmm works reasonably well when nk=3, regression doesn't # both work well when nk=0 # For MCAR, pmm works well when nk=3, regression works moderately # well but imputed values have higher variance than real x1 values # when x1m is missing, and coefficient of x2 on y is 0.92 when n=20000 # Did not get worse by setting nk=6 # Regression imputation works fine with nk=6 with ~y+I(x1m)+x2 # Problem with I(y)+x1m+I(x2) plot(g) Ecdf(x1, add=TRUE, col='blue') Ecdf(x1m, lty=2, add=TRUE) Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE) # Look at distribution of residuals from areg for various nk s <- !is.na(x1m) f <- lm.fit.qr.bare(cbind(y,x2)[s,],x1m[s]) Ecdf(resid(f), lwd=2, col='gray') py <- f$fitted.values ry <- resid(f) g <- areg(cbind(y,x2), x1m, nk=6, xtype=rep('l',2)) p <- g$linear.predictors r <- resid(g) Ecdf(r, add=TRUE, col='blue') plot(py, p) coef(lm.fit.qr.bare(py,p)) plot(ry,r) coef(lm.fit.qr.bare(ry,r)) cor(ry,r) sd(ry) sd(r) pr <- predict(g, cbind(x1, x2)) pr2 <- g$linear.predictors describe(pr-pr2) Pr <- fitted(f) plot(Pr,pr) # ?? coef(lm.fit.qr.bare(Pr,pr)) obs.trans <- pr + r plot(obs.trans, y) w <- lm.fit.qr.bare(obs.trans,y) coef(w) w$rsquared # Strip out aregImpute code for regression imputation, force linearity, # no bootstrap, x1 is only variable with NAs ai <- function(x1, x2, y) { n <- length(x1) na <- (1:n)[is.na(x1)] nna <- length(na) j <- (1:n)[-na] f <- lm.fit.qr.bare(cbind(y,x2)[j,], x1[j]) prn(coef(f)) # Predicted mean x1 for only those that missing: predx1 <- matxv(cbind(y,x2)[na,], coef(f)) Ecdf(predx1, add=TRUE, col='blue') res <- f$residuals rp <- length(na) > length(res) px1 <- predx1 + sample(res, length(na), replace=rp) px1e <- approxExtrap(f$fitted.values, f$fitted.values, xout=px1)$y print(describe(abs(px1-px1e))) Ecdf(px1e, add=TRUE, col='green') x1[na] <- px1e x1 } x1i <- ai(x1m, x2, y) ols(y ~ x1i + x2) Hmisc/inst/tests/summary.formula.r0000644000176200001440000000134612311635720016764 0ustar liggesuserslibrary(Hmisc) getHdata(titanic3) g <- function(x) c(Mean=mean(x,na.rm=TRUE), N=sum(!is.na(x))) with(titanic3, tapply(age, llist(sex,pclass), g)) g <- function(x) c(Mean=apply(x, 2, mean, na.rm=TRUE), N=apply(x, 2, function(w)sum(!is.na(w)))) options(digits=3) summary(cbind(age,fare) ~ sex + pclass, method='cross', fun=g, data=titanic3) with(titanic3, g(cbind(age,fare))) ## From Kevin Thorpe kevin.thorpe@utoronto.ca ### generate data set.seed(31) demo <- data.frame(age=rnorm(100,50,10),sex=sample(c("Male","Female"),100,TRUE)) summary(~age,data=demo,method="reverse") summary(~sex,data=demo,method="reverse") ### used to work summary(~ age + sex, data=demo, method="reverse") summaryM(age + sex ~ 1, data=demo) Hmisc/inst/tests/dotchartpl.r0000644000176200001440000001767213673440421016004 0ustar liggesusersrequire(Hmisc) set.seed(2) d <- expand.grid(major=c('Alabama', 'Alaska', 'Arkansas', 'Arizona', 'Nevada'), minor=c('East', 'West'), group=c('Female', 'Male'), city=0:2) n <- nrow(d) # d$x <- (1 : nrow(d)) + runif(n) d$num <- round(100*runif(n)) d$denom <- d$num + round(100*runif(n)) d$x <- d$num / d$denom d with(d, dotchartpl(x, major, minor, group, city, big=city==0, num=num, denom=denom) ) ## Same without city, compute Famale - Male differences and conf. intervals ## Within major groups sort in descending order of differences, show ## differences with color of Female if positive, Male if negative, ## add layer with horizontal bar centered at the difference and with ## width equal to half-width of confidence interval d <- subset(d, city==0) i <- with(d, order(major, minor, group)) # xless(d[i, ]) with(d, dotchartpl(x, major, minor, group, refgroup='Male', num=num, denom=denom, xlim=c(0,1)) ) # Original source of aeanonym: HH package # aeanonym <- read.table(hh("datasets/aedotplot.dat"), header=TRUE, sep=",") # Modified to remove denominators from data and to generate raw data # (one record per event per subject) ae <- structure(list(RAND = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), PREF = structure(c(12L, 12L, 18L, 18L, 26L, 26L, 33L, 33L, 5L, 5L, 27L, 27L, 6L, 6L, 15L, 15L, 22L, 22L, 23L, 23L, 31L, 31L, 17L, 17L, 2L, 2L, 3L, 3L, 13L, 13L, 25L, 25L, 28L, 28L, 14L, 14L, 4L, 4L, 8L, 8L, 19L, 19L, 21L, 21L, 29L, 29L, 10L, 10L, 20L, 20L, 16L, 16L, 32L, 32L, 11L, 11L, 1L, 1L, 30L, 30L, 24L, 24L, 9L, 9L, 7L, 7L), .Label = tolower(c("ABDOMINAL PAIN", "ANOREXIA", "ARTHRALGIA", "BACK PAIN", "BRONCHITIS", "CHEST PAIN", "CHRONIC OBSTRUCTIVE AIRWAY", "COUGHING", "DIARRHEA", "DIZZINESS", "DYSPEPSIA", "DYSPNEA", "FATIGUE", "FLATULENCE", "GASTROESOPHAGEAL REFLUX", "HEADACHE", "HEMATURIA", "HYPERKALEMIA", "INFECTION VIRAL", "INJURY", "INSOMNIA", "MELENA", "MYALGIA", "NAUSEA", "PAIN", "RASH", "RESPIRATORY DISORDER", "RHINITIS", "SINUSITIS", "UPPER RESP TRACT INFECTION", "URINARY TRACT INFECTION", "VOMITING", "WEIGHT DECREASE")), class = "factor"), SAE = c(15L, 9L, 4L, 9L, 4L, 9L, 2L, 9L, 8L, 11L, 4L, 11L, 9L, 12L, 5L, 12L, 7L, 12L, 6L, 12L, 6L, 12L, 2L, 14L, 2L, 15L, 1L, 15L, 4L, 16L, 4L, 17L, 11L, 17L, 6L, 20L, 10L, 23L, 13L, 26L, 12L, 26L, 4L, 26L, 13L, 28L, 9L, 29L, 12L, 30L, 14L, 36L, 6L, 37L, 8L, 42L, 20L, 61L, 33L, 68L, 10L, 82L, 23L, 90L, 76L, 95L)), .Names = c("RAND", "PREF", "SAE"), class = "data.frame", row.names = c(NA, -66L)) ae$n <- ifelse(ae$RAND == 'a', 212, 188) ae$p <- ae$SAE / ae$n # ae <- subset(ae, p >= 0.05) with(ae, dotchartpl(p, num=SAE, denom=n, minor=PREF, group=RAND, refgroup='a')) n <- 500 set.seed(1) d <- data.frame( race = sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex = sample(c('Female', 'Male'), n, TRUE), treat = sample(c('A', 'B'), n, TRUE), smoking = sample(c('Smoker', 'Non-smoker'), n, TRUE), hypertension = sample(c('Hypertensive', 'Non-Hypertensive'), n, TRUE), region = sample(c('North America','Europe','South America', 'Europe', 'Asia', 'Central America'), n, TRUE)) d <- upData(d, labels=c(race='Race', sex='Sex')) dm <- addMarginal(d, region) s <- summaryP(race + sex + smoking + hypertension ~ region + treat, data=dm) ## add exclude1=FALSE to include female category ggplot(s, groups='treat', exclude1=TRUE, abblen=12) ggplot(s, groups='region') s$region <- ifelse(s$region == 'All', 'All Regions', as.character(s$region)) with(s, dotchartpl(freq / denom, major=var, minor=val, group=treat, mult=region, big=region == 'All Regions', num=freq, denom=denom) ) s2 <- s[- attr(s, 'rows.to.exclude1'), ] with(s2, dotchartpl(freq / denom, major=var, minor=val, group=treat, mult=region, big=region == 'All Regions', num=freq, denom=denom) ) Hmisc/inst/tests/csv/0000755000176200001440000000000013555351624014240 5ustar liggesusersHmisc/inst/tests/csv/FORMAT.csv0000755000176200001440000000041712243661443015746 0ustar liggesusersFMTNAME,START,END,LABEL,MIN,MAX,DEFAULT,LENGTH,FUZZ,PREFIX,MULT,FILL,NOEDIT,TYPE,SEXCL,EEXCL,HLO,DECSEP,DIG3SEP,DATATYPE,LANGUAGE RACE,1,1,green,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, RACE,2,2,blue,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, RACE,3,3,purple,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, Hmisc/inst/tests/csv/TEST.csv0000755000176200001440000000011312243661443015526 0ustar liggesusersrace,age,d1,dt1,t1 2,30,15402,1330767062,40425 4,31,15494,1338716527,40453 Hmisc/inst/tests/csv/_contents_.csv0000755000176200001440000000176312243661443017116 0ustar liggesusersMEMNAME,MEMLABEL,NAME,TYPE,LENGTH,LABEL,FORMAT,NOBS FORMAT,,DATATYPE,2,8,Date/time/datetime?,,3 FORMAT,,DECSEP,2,1,Decimal separator,,3 FORMAT,,DEFAULT,1,3,Default length,,3 FORMAT,,DIG3SEP,2,1,Three-digit separator,,3 FORMAT,,EEXCL,2,1,End exclusion,,3 FORMAT,,END,2,16,Ending value for format,,3 FORMAT,,FILL,2,1,Fill character,,3 FORMAT,,FMTNAME,2,8,Format name,,3 FORMAT,,FUZZ,1,8,Fuzz value,,3 FORMAT,,HLO,2,11,Additional information,,3 FORMAT,,LABEL,2,6,Format value label,,3 FORMAT,,LANGUAGE,2,8,Language for date strings,,3 FORMAT,,LENGTH,1,3,Format length,,3 FORMAT,,MAX,1,3,Maximum length,,3 FORMAT,,MIN,1,3,Minimum length,,3 FORMAT,,MULT,1,8,Multiplier,,3 FORMAT,,NOEDIT,1,3,Is picture string noedit?,,3 FORMAT,,PREFIX,2,2,Prefix characters,,3 FORMAT,,SEXCL,2,1,Start exclusion,,3 FORMAT,,START,2,16,Starting value for format,,3 FORMAT,,TYPE,2,1,Type of format,,3 TEST,,age,1,4,Age at Beginning of Study,,2 TEST,,d1,1,8,,MMDDYY,2 TEST,,dt1,1,8,,DATETIME,2 TEST,,race,1,3,,RACE,2 TEST,,t1,1,8,,TIME,2 Hmisc/inst/tests/latexTherm.Rnw0000644000176200001440000000127212243661443016250 0ustar liggesusers\documentclass{report} \begin{document} @ <>= require(Hmisc) knitrSet() latexTherm(c(1, 1, 1, 1), name='lta') latexTherm(c(.5, .7, .4, .2), name='ltb') latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0) latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc') latexTherm(c(0, 0, 0, 0), name='ltd') @ This is a the first:\lta and the second:\ltb\\ and the third without extra:\ltc END\\ Third with extra:\ltcc END\\ \vspace{2in}\\ All data = zero, frame only:\ltd <>= latexTherm(c(.5, .7, .4, .2), name='lte') @ % Note that the period after figure is necessary <>= plot(runif(20)) @ \end{document} Hmisc/inst/tests/latexTherm.r0000644000176200001440000000144312243661443015743 0ustar liggesusers# Usage: After running R, run latex on /tmp/z.tex require(Hmisc) source('~/R/Hmisc/R/latexTherm.s') f <- '/tmp/lt.tex' cat('', file='/tmp/z.tex'); cat('', file=f) ct <- function(...) cat(..., sep='', file='/tmp/z.tex', append=TRUE) ct('\\documentclass{report}\\begin{document}\n') latexTherm(c(1, 1, 1, 1), name='lta', file=f) latexTherm(c(.5, .7, .4, .2), name='ltb', file=f) latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0, file=f) latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc', file=f) latexTherm(c(0, 0, 0, 0), name='ltd', file=f) ct('\\input{/tmp/lt}\n') ct('This is a the first:\\lta and the second:\\ltb\\\\ and the third without extra:\\ltc END\\\\\nThird with extra:\\ltcc END\\\\ \n\\vspace{2in}\\\\ \n') ct('All data = zero, frame only:\\ltd\\\\') ct('\\end{document}\n') Hmisc/inst/tests/plot.summaryM.plotly.r0000644000176200001440000000041312756332267017743 0ustar liggesusersrequire(Hmisc) getHdata(pbc) pbc <- upData(pbc, moveUnits = TRUE) s <- summaryM(bili + albumin + alk.phos + copper + spiders + sex ~ drug, data=pbc, test=TRUE) s html(s) options(grType='plotly') a <- plot(s) a$Categorical a$Continuous plot(s, which='con', nrows=2) Hmisc/inst/tests/american-medical-association.csl0000644000176200001440000002204212755051403021626 0ustar liggesusers Hmisc/inst/tests/largest.empty.r0000644000176200001440000001135112243661443016423 0ustar liggesuserslibrary(Hmisc) par(mfrow=c(2,2)) w <- 2 for(i in 1:4) { if(w==1) { y <- exp(rnorm(20)) } else { x <- rnorm(20) y <- rnorm(20) plot(x, y) z <- list(x=x, y=y) } for(m in c('maxdim','area')) { for(numbins in c(25,100)) { u <- largest.empty(z$x, z$y, pl=TRUE, height=.05*diff(range(z$x)), width =.05*diff(range(z$y)), method=m, numbins=numbins) text(u, labels=m, adj=.5) if(w==2) points(z) } } } par(mfrow=c(1,1)) set.seed(1) x <- rnorm(1000); y <- rnorm(1000) plot(x,y) for(m in c('area', 'rexhaustive', 'exhaustive')) { cat('Method:', m, '\n') print(system.time(largest.empty(x, y, width=1.5, height=.5, method=m, pl=TRUE))) } comp <- function(a, b) { i <- identical(a,b) if(!i) print(cbind(a,b)) i } for(i in 1:70) { cat(i,'\n') set.seed(i) n <- sample(8:800, 1) x <- runif(n); y <- runif(n) plot(x, y) xl <- range(pretty(x)); yl <- range(pretty(y)) a <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03, method='rexhaustive', pl=TRUE) b <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03, method='exhaustive', pl=TRUE) comp(a[Cs(x,y,area)], b[Cs(x,y,area)]) comp(a$rect$x, b$rect$x) comp(a$rect$y, b$rect$y) } par(mfrow=c(2,2)) N <- 100; set.seed(8237) for(i in 1:4) { x <- runif(N); y <- runif(100) plot(x, y, pch="+", xlim=c(0,1), ylim=c(0,1), col="darkgray") for(m in c('area', 'rexhaustive', 'exhaustive')) { z <- largest.empty(x, y, 0.075, 0.075, pl=TRUE, numbins=100, xlim=c(0,1), ylim=c(0,1), method=m) cat(m, 'largest.empty Area:', z$area, '\n') print(cbind(z$rect$x, z$rect$y)) } } if(FALSE) { z <- Ecdf(y) points(lr(z$x, z$y, width=1.5, height=.05, pl=0, numbins=20)) lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4], width, height, numbins=25, pl=1) { area <- 0 xinc <- diff(xlim)/numbins yinc <- diff(ylim)/numbins i <- 1 j <- 0 for(xl in seq(xlim[1], xlim[2]-width, by=xinc)) { for(yl in seq(ylim[1],ylim[2]-height, by=yinc)) { j <- j + 1 if(j > 500) stop() xr <- if(any(x >= xl & y >= yl)) min(x[x >= xl & y >= yl]) else xlim[2] yu <- if(any(y >= yl & x >= xl)) min(y[y >= yl & x >= xl]) else ylim[2] if(pl==1) { ## Ecdf(Y) i <- i + 1 if(i > 8) i <- 2 polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=i) } ar <- (yu-yl)*(xr-xl) if(ar > area) { area <- ar x1 <- xl x2 <- xr y1 <- yl y2 <- yu if(pl==2) { i <- i + 1 if(i > 8) i <- 2 polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i) } } } } list(x=mean(c(x1,x2)), y=mean(c(y1,y2))) } lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4], width, height, numbins=25, pl=0) { area <- 0 xinc <- diff(xlim)/numbins yinc <- diff(ylim)/numbins i <- 1 for(xl in seq(xlim[1], xlim[2]-width, by=xinc)) { for(yl in seq(ylim[1],ylim[2]-height, by=yinc)) { for(xr in seq(xl+width,xlim[2],by=xinc)) { for(yu in seq(yl+height,ylim[2],by=yinc)) { if(any(x >= xl & x <= xr & y >= yl & y <= yu)) break if(pl==1) { Ecdf(Y) polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=2) } ## if(!any(x >= xl & x <= xr & y >= yl & y <= yu)) { ar <- (yu-yl)*(xr-xl) if(ar > area) { area <- ar x1 <- xl x2 <- xr y1 <- yl y2 <- yu if(pl==2) { i <- i + 1 if(i > 8) i <- 2 polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i) } } } } } } } list(x=mean(c(x1,x2)), y=mean(c(y1,y2))) } } Hmisc/inst/tests/dataframeReduce.r0000644000176200001440000000067112351644017016702 0ustar liggesusers# Thanks: JoAnn Alvarez require(Hmisc) set.seed(3) NN <- 200 x1 <- rnorm(NN) x2 <- x1^2 x3 <- runif(NN) x4 <- factor(NA, levels = c("January", "February", "March")) x5 <- factor(sample(c(1, 2, 3), size = NN, replace = TRUE), labels = c("January", "February", "March")) m <- 30 x2[1:m] <- NA x5[1:m] <- NA xdat <- data.frame(x1, x2, x3, x4) combine.levels(xdat$x4) xdat2 <- dataframeReduce(xdat, minprev=0.05, fracmiss = 0.05) Hmisc/inst/tests/latex.summaryM.Rnw0000644000176200001440000001002214005007600017035 0ustar liggesusers% From Yonghao Pua % Lauren Samuels % JoAnn Alvarez \documentclass{article} \usepackage{spaper} %\usepackage{relsize,setspace} % used by latex(describe( )) %\usepackage{longtable} %\usepackage{pdfpages} \usepackage{hyperref} \usepackage{here} %\usepackage{lscape} % for landscape mode tables %\usepackage{calc,epic,color} % used for latex(..., dotchart=TRUE) %\usepackage[superscript,nomove]{cite} % use if \cite is used and superscripts wanted %\usepackage{helvet} %\usepackage{moreverb} %\renewcommand{\familydefault}{\sfdefault} %\newcommand{\R}{{\normalfont\textsf{R}}{}} %\textwidth 6.75in % set dimensions before fancyhdr %\textheight 9.25in %\topmargin -.875in %\oddsidemargin -.125in %\evensidemargin -.125in %\usepackage{fancyhdr} % this and next line are for fancy headers/footers \pagestyle{fancy} \lhead{\textsc{}} %uncomment to remove left-sided headings \title{\textsf{Example illustrating problems with latex.summaryM}} \author{Pua Yong Hao\\\smaller\href{mailto:puayonghao@gmail.com}{puayonghao@gmail.com}} \date{\today} \begin{document} \maketitle \section{Descriptive Stats} <>= library(rms) knitrSet() n <- 500; set.seed(88) sex <- factor(sample(c("female","male"), n, TRUE)) age <- rnorm(n, 50, 10) height <- rnorm(n, 1.7, 0.5) type <- factor(sample(c('A', 'B'), n, TRUE)) dbase= data.frame(sex, age, height, type) dbase.dd <- datadist(dbase) options(datadist = "dbase.dd") @ When I use the \texttt{summaryM} function, note that the table footers are shown as captions. <>= latex(summaryM(age + height + type ~ sex , data=dbase, overall=TRUE, test=TRUE), size='small', where="ht", long=TRUE, prmsd = TRUE, npct='slash', caption="Descriptive Statistics", msdsize='scriptsize', round = 2, digits=2, prtest='P', pdig =2, file='', label="table:summary") @ <>= # From Lauren Samuels set.seed(1) d <- expand.grid(x1=c('A', 'B'), x2=c('a', 'b', 'c')) d$y <- runif(nrow(d)) d latex( summaryM(x2 + y ~ x1, data= d, test=TRUE, overall=TRUE, continuous=6 ), file="", where="htbp", caption="Descriptive stats and tests of between-group differences for all primary and secondary neuroimaging outcomes", label= "tbl:descrOutcomes", exclude1=FALSE, digits=2, long=TRUE, prmsd=TRUE, npct="slash", size="tiny", npct.size='tiny', center="centering") @ Put a complex table in an external \texttt{.tex} file for conversion to \texttt{html} using \texttt{htlatex}: <>= ## Example taken from help file for summaryM options(digits=3) set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE)) age <- rnorm(500, 50, 5) sbp <- rnorm(500, 120, 12) label(sbp) <- 'Systolic BP' units(sbp) <- 'mmHg' treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) treatment[1] sbp[1] <- NA # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') table(as.character(Symptoms)) # Produce separate tables by country f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country, groups='treatment', test=TRUE) fi <- '/tmp/z.tex' cat('\\documentclass{report}\\begin{document}\n', file=fi) w <- latex(f, file=fi, npct='slash', middle.bold=TRUE, prmsd=TRUE, append=TRUE) cat('\\end{document}\n', file=fi, append=TRUE) ## In /tmp run htlatex z.tex to produce z.html ## To get htlatex install the linux tex4ht package ## You may also need to install the tth package ## See https://biostat.app.vumc.org/wiki/Main/SweaveConvert @ <>= getHdata(pbc) s5 <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc) latex(s5, npct='both', here=TRUE, insert.bottom = "Polly", file='') @ \end{document} Hmisc/inst/tests/aregImpute5.r0000644000176200001440000000277113667216224016026 0ustar liggesusers# From Trevor Thompson tkt2@cdc.gov require(rms) if(FALSE) { require(rms) ## simulate competing risk data with 2 predictors set.seed(115) dat1<- crisk.sim(n=2000, foltime=200, dist.ev=c("weibull","weibull"), anc.ev=c(0.8,0.9), beta0.ev=c(2,2), dist.cens="weibull", anc.cens=1,beta0.cens=1, z=NULL, beta=list(c(-0.69,0), c(-0.35, -0.5)), x=list(c("bern", 0.3), c("bern", 0.6)), nsit=2) dat1$event<-factor(ifelse(is.na(dat1$cause), 0, dat1$cause)) dat1$x<-factor(dat1$x) dat1$x.1<-factor(dat1$x.1) ## set some predictor data to missing miss1<-rbinom(2000, 1, .15) miss2<-rbinom(2000, 1, .05) dat1[miss1==1,]$x<-NA dat1[miss2==1,]$x.1<-NA describe(dat1) ## impute missing data imp.obj <- aregImpute(~ x + x.1 + event + time, n.impute=20, data=dat1, x=TRUE) imp.obj ## create Fine-Gray function for dtrans dtrans.fg <- function(data) finegray(Surv(time, event) ~ ., data=data, etype=1) chk <- dtrans.fg(dat1) dim(chk) ## fit model with imputed data mod.fg <- fit.mult.impute(Surv(fgstart, fgstop, fgstatus) ~ x + x.1, cph, data=dat1, xtrans=imp.obj, dtrans=dtrans.fg, weights=fgwt, fit.reps=TRUE, x=TRUE, y=TRUE, surv=TRUE) # Problem: fgwt is in data created by dtrans, not in dat1 } x <- runif(10) y <- runif(10) x[1] <- NA ww = seq(0.01, 1, length=10) d <- data.frame(x, y, ww) a <- aregImpute(~ x + y, data=d, nk=0) dt <- function(dat) cbind(dat, ww=1:10) f <- fit.mult.impute(Surv(y) ~ x, cph, weights=ww, data=d, xtrans=a, dtrans=dt) Hmisc/inst/tests/examples.Rmd0000644000176200001440000002721414171041716015726 0ustar liggesusers--- title: "Hmisc Examples" author: "FE Harrell
    Department of Biostatistics
    Vanderbilt University
    " date: '`r Sys.Date()`' output: rmdformats::readthedown: thumbnails: false lightbox: true gallery: true highlight: tango use_bookdown: true toc_depth: 3 fig_caption: true csl: american-medical-association.csl bibliography: /home/harrelfe/bib/harrelfe.bib description: "Hmisc examples using plotly interactive graphics in an html report" --- **NOTE**: remove the `csl` and `bibliography` lines above if you don't have any bibliographic citations. # Introduction This is a set of reproducible examples for the R[@R] `Hmisc` package[@Hmisc], put together in an `rmarkdown` html document using `RStudio` and `knitr`. When viewing the resulting [html file](http://hbiostat.org/R/Hmisc/examples.html) you can see all the code, and there exist options to download the entire `rmarkdown` script, which is especially helpful for seeing how `knitr` chunks are specified. Graphics that have a [plotly](http://plot.ly/r) method for them are rendered using `plotly` instead of using defaults such as base graphics, `lattice`, or `ggplot2`. That way the plots are somewhat interactive, e.g., allow for drill-down for more information without the viewer needing to install `R`. Much of the tabular output produced here was created using `html` methods, which are especially easy to implement with `rmarkdown` and make for output that can be directly opened using word processors. Jump to [Computing Environment](#compenv) for a list of packages used here, and their version numbers. `Rmarkdown` themes such as `bookdown` and [rmdformats::readthedown](https://github.com/juba/rmdformats) (the latter is used here) allow one to number figures and the symbolically reference them. The `knitrSet` `capfile='captions.md'` argument below capitalizes on this to make it easy for the user to insert a table of figures anywhere in the report. Here we place it at the end. A caption listed in the table of figures is the short caption (`scap=` or `fig.scap` in the chunk header) if there is one, otherwise the long caption is used. If neither caption is used, that figure will not appear in the table of figures. The full script for this report may be found [here](https://github.com/harrelfe/Hmisc/blob/master/inst/tests/examples.Rmd). # Setup ```{r setup,results='hide'} require(Hmisc) knitrSet(lang='markdown', h=4.5, fig.path='png/', fig.align='left', capfile='captions.md') # + sometimes ,cache=TRUE # knitrSet redirects all messages to messages.txt options(grType='plotly') # for certain graphics functions mu <- markupSpecs$html # markupSpecs is in Hmisc cap <- mu$cap # function to output html caption lcap <- mu$lcap # for continuation for long caption # These last 2 functions are used by the putHfig function in Hmisc cat('', file='captions.md') # initialize table of short captions ``` The following (r mu$styles()) defines HTML styles and javascript functions, such as ffig, smg and the function for expanding and collapsing text as done by the expcoll function. `r mu$styles()` Here is an example using expcoll. Click on the down arrow to expand; it turns to an up arrow which can be clicked to contract. `r mu$expcoll('Example table
    ', html(data.frame(x1=runif(10), x2=letters[1:10]),file=FALSE))` Here is an example using a `knitr` hook function `markupSpec$html$uncover` which is communicated to `knitr` by `knitrSet`. ```{r, uncover=TRUE, label='Press Here', id='script'} 1 + 1 ``` To make the html output use the entire wide screen run the R command `mu$widescreen()`. # Fetching Data, Modifying Variables, and Printing Data Dictionary The `getHdata` function is used to fetch a dataset from the Vanderbilt `DataSets` web site `hbiostat.org/data`. The `upData` function is used to - create a new variable from an old one - add labels to 2 variables - add units to the new variable - remove the old variable - automatically move units of measurements from parenthetical expressions in labels to separate `units` attributed used by `Hmisc` and `rms` functions for table making and graphics `contents` is used to print a data dictionary, run through an `html` method for nicer output. Information about the data source may be found [here](https://hbiostat.org/data/repo/pbc.html). Click on the number of levels in the `contents` table to jump to the value labels for the variable. ```{r metadata,results='asis'} getHdata(pbc) # Have upData move units from labels to separate attribute pbc <- upData(pbc, fu.yrs = fu.days / 365.25, labels = c(fu.yrs = 'Follow-up Time', status = 'Death or Liver Transplantation'), units = c(fu.yrs = 'year'), drop = 'fu.days', moveUnits=TRUE, html=TRUE) # The following can also be done by running this command # to put the results in a new browser tab: # getHdata(pbc, 'contents') html(contents(pbc), maxlevels=10, levelType='table') ``` # Descriptive Statistics Without Stratification The html method is used for the `describe` function, and the output is put in a scrollable box. Other than for the overall title and variable names and labels, the output size used here is 80 (0.8 × the usual font size[^1]). But the graphical display of the descriptives statistics that follows this is probably better. [^1]: The default is 75% size. ```{r describe,results='asis'} # did have results='asis' above d <- describe(pbc) html(d, size=80, scroll=TRUE) # prList is in Hmisc; useful for plotting or printing a list of objects # Can just use plot(d) if don't care about the mess # If using html output these 2 images would not be rendered no matter what p <- plot(d) # The option htmlfig=2 causes markupSpecs$html$cap() to be used to # HTML-typeset as a figure caption and to put the sub-sub section # marker ### in front of the caption. htmlfig is the only reason # results='asis' was needed in the chunk header # We define a long caption for one of the plots, which does not appear # in the table of contents # prList works for html notebooks but not html documents # prList(p, lcap=c('', 'These are spike histograms'), htmlfig=2) ``` You can also re-form multiple `plotly` graphs into a [single HTML object](http://stackoverflow.com/questions/35193612). If you want to have total control over long and short figure captions, use the Hmisc `putHfig` function to render the result, with a caption and a short caption for the table of contents. That would have fixed a problem with the chunk below: when `plotly` graphics are not rendered in the usual way, the figure is not numbered and no caption appears. ```{r plotlym,cap='This used the htmltools tagList function.',scap='Two plotly graphics combined into one'} htmltools::tagList(p) # lapply(p, plotly::as.widget) ``` You can also create figure captions outside of R code by using the smg, fcap HTML tags defined in markupSpecs. The long caption not appearing in the table of contents will be in a separate line without ###. # Stratified Descriptive Statistics Produce stratified quantiles, means/SD, and proportions by treatment group. Plot the results before rendering as an advanced html table: - categorical variables: a single dot chart - continuous variables: a series of extended box plots ```{r summaryM,cap=paste('Proportions and', mu$chisq(), 'tests for categorical variables')} s <- summaryM(bili + albumin + stage + protime + sex + age + spiders + alk.phos + sgot + chol ~ drug, data=pbc, overall=FALSE, test=TRUE) plot(s, which='categorical') ``` To construct the caption outside of the code chunk use e.g. ### r cap('Proportions and', mu$chisq(), 'tests for categorical variables') where a backtick is placed before r and after the last ). ```{r summaryM2,results='asis',cap='Extended box plots for the first 4 continuous variables'} plot(s, which='continuous', vars=1 : 4) ``` ```{r summaryM3,cap='Extended box plots for the remaining continuous variables'} plot(s, which='continuous', vars=5 : 7) ``` ```{r summaryM4} html(s, caption='Baseline characteristics by randomized treatment', exclude1=TRUE, npct='both', digits=3, middle.bold=TRUE, prmsd=TRUE, brmsd=TRUE, msdsize=mu$smaller2) ``` Now show almost the full raw data for one continuous variable stratified by treatment. This display spike histograms using at most 100 bins, and also shows the mean and quantiles similar to what is in an extended box plot: 0.05, 0.25, 0.5, 0.75, 0.95. Measures of spread are also shown if the user clicks on their legend entries: Gini's mean difference (mean absolute difference between any two values) and the SD. These can be seen as horizontal lines up against the minimum x-value. ```{r histbox,cap="Spike histograms, means, quantiles, Gini's mean difference, and SD stratified by treatment",scap="Stratified spike histograms and quantiles"} with(pbc, histboxp(x=sgot, group=drug, sd=TRUE)) ``` The following is a better way to display proportions, for categorical variables. If computing marginal statistics by running the dataset through the `Hmisc` `addMarginal` function, the `plot` method with `options(grType='plotly')` is especially useful. ```{r summaryM5,cap='Proportions (large symbols) and proportions stratified by treatment (small symbols)',scap='Proportions with and without stratification by treatment'} pbcm <- addMarginal(pbc, drug) s <- summaryP(stage + sex + spiders ~ drug, data=pbc) # putHcap('Proportions stratified by treatment') plot(s, groups='drug') s <- summaryP(stage + sex + spiders ~ drug, data=pbcm) plot(s, marginVal='All', marginLabel='All Treatment Groups') ``` # Better Demonstration of Boxplot Replacement ```{r support,cap="Spike histograms, means, quantiles, Gini's mean difference, and SD for MAP stratified by diagnosis",scap="Stratified spike histograms and quantiles for MAP"} getHdata(support2) with(support2, histboxp(x=meanbp, group=dzgroup, sd=TRUE, bins=200)) ``` # Changing Size of Figure Captions As explained [here](https://stackoverflow.com/questions/45018397), one can place captions under figures using ordinary `knitr` capabilities, and one can change the size of captions. The following example defines a `CSS` style to make captions small (here `0.6em`), and produces a plot with a caption. Unlike using `putHfig` captions given in `knitr` chunks do not also appear in the table of contents. ```{r simplecap,cap='This is a simple figure caption'} # Note: in the chunk header cap is an alias for fig.cap defined by knitrSet plot(runif(10)) ``` # Computing Environment[^2] {#compenv} `r mu$session()` # Bibliographic File Managament ## Find and Install `.csl` Reference Style Files ```{r findbib,eval=FALSE} # Note: mu was defined in an earlier code chunk # Only need to install .csl file once. mu$installcsl(rec=TRUE) # get list of recommended styles mu$installcsl() # web search of styles meeting your criteria # Install a .csl file to your project directory: mu$installcsl('american-medical-association') ``` `r markupSpecs$markdown$tof()` Note: the hidden R command that rendered the table of figures (including short captions) was `markupSpecs$markdown$tof()`. [^2]: `mu` is a copy of the part of the `Hmisc` package object `markupSpecs` that is for html. It includes a function `session` that renders the session environment (including package versions) in html. # References Hmisc/inst/tests/test.xpt0000644000176200001440000000276012243661443015162 0ustar liggesusersHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 SAS SAS SASLIB 8.2 AIX 20DEC02:12:34:2320DEC02:12:34:23 HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS TEST SASDATA 8.2 AIX 20DEC02:12:34:2320DEC02:12:34:23 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000500000000000000000000 RACE RACE AGE Age at Beginning of Study D1 MMDDYY DT1 DATETIME T1 TIME  HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 A BD<*HOQDA@BD<HO1oD Hmisc/inst/tests/test.rda0000644000176200001440000000054412243661443015113 0ustar liggesusersRDX2 X  test@@@>@?AhAHAk,"PAK;5A 8A   names RACE AGE D1 DT1 T1 class data.frame row.names 1 2Hmisc/inst/tests/summarySp.r0000644000176200001440000000657713340335643015642 0ustar liggesusersrequire(Hmisc) n <- 1000 set.seed(1) d <- data.frame(sbp =round(rnorm(n, 120, 10)), dbp =round(rnorm(n, 80, 10)), age =round(rnorm(n, 50, 10), 1), days =sample(c(0,30,60,90), n, TRUE), S1 =Surv(2*runif(n)), S2=Surv(runif(n)), race =sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex =sample(c('Female', 'Male'), n, TRUE), treat =sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE), meda =sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE)) d <- upData(d, labels=c(sbp='Systolic BP', dbp='Diastolic BP', race='Race', sex='Sex', treat='Treatment', days='Time Since Randomization', S1='Hospitalization', S2='Re-Operation', meda='Medication A', medb='Medication B'), units=c(sbp='mmHg', dbp='mmHg', age='years', days='days')) s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d) plotp(s, groups='treat') plotp(s, groups='treat', fitter=loess) # Show both points and smooth curves: plotp(s, groups='treat', fitter=loess, showpts=TRUE) # Use loess to estimate the probability of two different types of events as # a function of time s <- summaryS(meda + medb ~ days + treat + region, data=d) plotp(s, groups='treat', fitter=loess) # Demonstrate dot charts of summary statistics s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean) plotp(s, groups='treat', height=200) # Compute parametric confidence limits for mean, and include sample sizes f <- function(x) { x <- x[! is.na(x)] c(smean.cl.normal(x, na.rm=FALSE), n=length(x)) } s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f) mu <- markupSpecs$html # in Hmisc lab <- paste0(mu$overbar('X'), ' ± t0.975 × s') plotp(s, groups='treat', funlabel=lab) ## Stratify by region and treat fit an exponential distribution to ## S1 and S2 and estimate the probability of an event within 0.5 years f <- function(y) { hazard <- sum(y[,2]) / sum(y[,1]) 1. - exp(- hazard * 0.5) } s <- summaryS(S1 + S2 ~ region + treat, data=d, fun=f) plotp(s, groups='treat', funlabel='Prob[Event Within 6m]', xlim=range(pretty(s$y))) ## Demonstrate combined use of fun and sfun ## First show the same quantile intervals used in panel.bppplot by ## default, stratified by region and day #d <- upData(d, days=round(days / 30) * 30) g <- function(y) { probs <- c(0.05, 0.125, 0.25, 0.375) probs <- sort(c(probs, 1 - probs)) y <- y[! is.na(y)] w <- hdquantile(y, probs) m <- hdquantile(y, 0.5, se=TRUE) se <- as.numeric(attr(m, 'se')) c(Median=as.numeric(m), w, se=se, n=length(y)) } s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d) plotp(s, groups='region', sfun=mbarclpl) # Similar but use back-to-back spike histograms s <- summaryS(sbp + dbp ~ days + region, data=d) plotp(s, groups='region', sfun=medvpl, alphaSegments=0.6) ### ??? ## Show Wilson confidence intervals for proportions, and confidence ## intervals for difference in two proportions g <- function(y) { y <- y[!is.na(y)] n <- length(y) p <- mean(y) se <- sqrt(p * (1. - p) / n) structure(c(binconf(sum(y), n), se=se, n=n), names=c('Proportion', 'Lower', 'Upper', 'se', 'n')) } s <- summaryS(meda + medb ~ days + region, fun=g, data=d) plotp(s, groups='region', sfun=mbarclpl) Hmisc/inst/tests/fit.mult.impute.bootstrap.r0000644000176200001440000000150012311316755020677 0ustar liggesusers# Edited example from Jane Cook jane.cookng@gmail.com require(rms) set.seed(1) n <- 101 y <- runif(n) y[1:2] <- NA x1 <- sample(c('a','b'), n, TRUE) x2 <- runif(n) + .15 * y d <- data.frame(y, x1, x2) a <- aregImpute(~ y + x1 + x2, burnin=10, n.impute=100, data=d) f <- fit.mult.impute(y ~ x1 + x2, ols, a, data=d) B <- 20 # actually use B=1000 ranks <- matrix(NA, nrow=B, ncol=2) ## Put - in front of plot in next line to have rank 1 = best rankvars <- function(fit) rank(plot(anova(fit), sort='none', pl=FALSE)) Rank <- rankvars(f) for(i in 1:B) { j <- sample(1:n, n, TRUE) bootfit <- update(f, data=d, subset=j, pr=FALSE) ranks[i,] <- rankvars(bootfit) } for(k in 1 : 2) { cat('Frequency of Ranks for Predictor:', k, '\n') print(table(ranks[, k])) cat('\n') } lim <- t(apply(ranks, 2, quantile, probs=c(.025,.975))) Hmisc/inst/tests/gbayes.r0000644000176200001440000000051713006204320015062 0ustar liggesusersrequire(Hmisc) source('~/R/Hmisc/R/gbayes.s') d <- 1:3 x <- c(-.5, 0, .5) v <- c(1, .5, .25) for(w in c('cdf', 'postmean')) { f <- gbayesMixPost(d0=1.2, v0=5, what=w) print(f(d, x[1], v[1])) print(f(d[1], x, v)) f <- gbayesMixPost(d0=1.2, d1=1.2, v0=5, v1=5, mix=.5, what=w) print(f(d, x[1], v[1])) print(f(d[1], x, v)) } Hmisc/inst/tests/summaryM-customtest.r0000644000176200001440000000212513016056214017637 0ustar liggesusers## From tormodb https://github.com/harrelfe/Hmisc/issues/61 require(Hmisc) src <- example(summaryM, give.lines=TRUE) eval(parse(text=src[1:34])) # xless(f) # see built-in cat. test catTestchisq.sim_p <- function(tab) { st <- if (! is.matrix(tab) || nrow(tab) < 2 || ncol(tab) < 2) list(p.value = NA, statistic = NA, parameter = NA) else { rowcounts <- tab %*% rep(1, ncol(tab)) tab <- tab[rowcounts > 0, ] if (! is.matrix(tab)) list(p.value = NA, statistic = NA, parameter = NA) else chisq.test(tab, correct = FALSE, simulate.p.value = TRUE) } list(P = st$p.value, stat = st$statistic, df = (nrow(tab) - 1) * (ncol(tab) - 1), # st$parameter NA testname = "Pearson", namefun = 'chisq', # must add this line for Hmisc_4.0 statname = "Chi-square", latexstat = "\\chi^{2}_{df}", plotmathstat = "chi[df]^2") } f <- summaryM(age + sex + sbp + Symptoms ~ treatment, test=TRUE, catTest = catTestchisq.sim_p) f Hmisc/inst/tests/ace.s0000644000176200001440000000126612700120434014346 0ustar liggesusers# Verify that ace works for categorical response variable, giving # a y-transformation that is a linear translation of Fisher's optimum scores # (y-specific mean of x) when there is one predictor that is forced to # be linear. For now using aregImpute's override of ace library(acepack) set.seed(1) y <- rep(1:3,100) x <- -3*(y==1) -7*(y==2) + 30*(y==3) + runif(300) - .5 xbar <- tapply(as.matrix(x), y, mean) xbar # 1 2 3 #-3.010843 -7.021050 30.002227 # z <- ace(x, y, cat=0, lin=1) table(y, z$ty) # -0.82366 -0.583755 1.40741 #1 0 100 0 #2 100 0 0 #3 0 0 100 plot(xbar[y], z$ty) cor(xbar[y], z$ty) #[1] 1 Hmisc/inst/tests/hoeff.r0000644000176200001440000000220213163733506014711 0ustar liggesusersrequire(Hmisc) if(FALSE) .Fortran('jrank', as.double(1:5), as.double(1:5), 5L, double(5), double(5), double(5)) hoeffd(1:6, c(1,3,2,4,5,6)) y <- 1:20; y[3] <- 17; y[17] <- 3 hoeffd(1:20, y)$D set.seed(5) x <- runif(800); y <- runif(800) hoeffd(x,y)$D for(n in c(50,100,200,400,1000)) { set.seed(1) x <- seq(-10,10,length=n) y <- x*sign(runif(n,-1,1)) h <- hoeffd(x,y) print(c(h$D[1,2], h$aad[1,2], h$maxad[1,2])) } #[1] 0.06812286 in old version (real*4 in places) #[1] 0.04667929 #[1] 0.05657654 #[1] 0.07048487 #[1] 0.06323746 # From http://www.sciencemag.org/content/suppl/2011/12/14/334.6062.1518.DC1/Reshef.SOM.pdf # Table S2: Definitions of functions used for Figure 2A in the Science article w <- function(y) { ylab <- deparse(substitute(y)) plot(x, y, ylab=substitute(y), type='l') h <- hoeffd(x, y) cat(ylab, '\n') print(c(D=h$D[1,2],P=h$P[1,2],aDif=h$aad[1,2],mDif=h$maxad[1,2])) } x <- seq(0, 1, length=320) par(mfrow=c(3,3)) w(x) w(4*(x-.5)^2) w(128*(x-1/3)^3 -48*(x-1/3)^2 - 12*(x-1/3) + 2) w(10^(10*x) - 1) w(sin(10*pi*x) + x) w(sin(16*pi*x)) w(sin(13*pi*x)) w(sin(7*pi*x*(1+x))) w(runif(320)) Hmisc/inst/tests/testexportlib.sas0000644000176200001440000000072212243661443017062 0ustar liggesusersPROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; PROC FORMAT CNTLOUT=format;RUN; data test; LENGTH race 3 age 4; age=30; label age="Age at Beginning of Study"; race=2; d1='3mar2002'd ; dt1='3mar2002 9:31:02'dt; t1='11:13:45't; output; age=31; race=4; d1='3jun2002'd ; dt1='3jun2002 9:42:07'dt; t1='11:14:13't; output; format d1 mmddyy10. dt1 datetime. t1 time. race race.; run; %INCLUDE "H:\R\Hmisc\sas\exportlib.sas"; %exportlib(work, H:\R\Hmisc\tests\csv); Hmisc/inst/tests/aregImpute3.r0000644000176200001440000000110312243661443016004 0ustar liggesusersrequire(Hmisc) n <- 100 set.seed(1) y <- sample(0:8, n, TRUE) x1 <- runif(n) x2 <- runif(n) x2[1:10] <- NA z <- sample(1:20, n, TRUE) d <- data.frame(y, x1, x2, z) f1 <- glm(y ~ x1 + x2, family=poisson) f2 <- glm(y ~ x1 + x2 + offset(log(z)), family=poisson) a <- aregImpute(~ y + x1 + x2) g1 <- fit.mult.impute(y ~ x1 + x2 , glm, a, family=poisson, data=d) g2 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), glm, a, family=poisson, data=d) # g3 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), Glm, a, family=poisson, data=d) coef(g1) coef(g2) # coef(g3) coef(f1) coef(f2) Hmisc/inst/tests/summaryS.r0000644000176200001440000001231612666577507015467 0ustar liggesusersrequire(Hmisc) n <- 100 set.seed(1) d <- data.frame(sbp=rnorm(n, 120, 10), dbp=rnorm(n, 80, 10), age=rnorm(n, 50, 10), days=sample(1:n, n, TRUE), S1=Surv(2*runif(n)), S2=Surv(runif(n)), race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex=sample(c('Female', 'Male'), n, TRUE), treat=sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE), meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE)) d <- upData(d, labels=c(sbp='Systolic BP', dbp='Diastolic BP', race='Race', sex='Sex', treat='Treatment', days='Time Since Randomization', S1='Hospitalization', S2='Re-Operation', meda='Medication A', medb='Medication B'), units=c(sbp='mmHg', dbp='mmHg', age='years', days='days')) Png <- function(z) png(paste('/tmp/summaryS', z, '.png', sep='')) Png(1) s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d) # d2 <- subset(d, region=='Europe') # par(mfrow=c(2,1)) # with(d2, plot(days, dbp, col=as.integer(treat))) # ss <- subset(s, region=='Europe' & yvar == 'dbp') # dim(ss) # with(ss, plot(days, y, col=as.integer(treat))) # plot(s) # 3 pages plot(s, groups='treat') Png(1) plot(s, groups='treat', datadensity=TRUE, scat1d.opts=list(lwd=.5, nhistSpike=0)) dev.off() Png(2) plot(s, groups='treat', panel=panel.loess, key=list(space='bottom', columns=2), datadensity=TRUE, scat1d.opts=list(lwd=.5)) dev.off() # Show both points and smooth curves: Png(3) plot(s, groups='treat', panel=function(...) {panel.xyplot(...); panel.loess(...)}) dev.off() plot(s, y ~ days | yvar * region, groups='treat') # Make your own plot using data frame created by summaryP xyplot(y ~ days | yvar * region, groups=treat, data=s, scales=list(y='free', rot=0)) # Use loess to estimate the probability of two different types of events as # a function of time s <- summaryS(meda + medb ~ days + treat + region, data=d) pan <- function(...) panel.plsmo(..., type='l', label.curves=max(which.packet()) == 1, datadensity=TRUE) Png(4) plot(s, groups='treat', panel=pan, paneldoesgroups=TRUE, scat1d.opts=list(lwd=.7), cex.strip=.8) dev.off() # Demonstrate dot charts of summary statistics s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean) plot(s) Png(5) plot(s, groups='treat', funlabel=expression(bar(X))) dev.off() # Compute parametric confidence limits for mean, and include sample sizes f <- function(x) { x <- x[! is.na(x)] c(smean.cl.normal(x, na.rm=FALSE), n=length(x)) } s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f) # Draw [ ] for lower and upper confidence limits in addition to thick line Png(6) plot(s, funlabel=expression(bar(X) %+-% t[0.975] %*% s), pch.stats=c(Lower=91, Upper=93)) # type show.pch() to see defs. dev.off() Png(7) plot(s, textonly='n', textplot='Mean', digits=1) dev.off() # Customize printing of statistics to use X bar symbol and smaller # font for n=... cust <- function(y) { means <- format(round(y[, 'Mean'], 1)) ns <- format(y[, 'n']) simplyformatted <- paste('X=', means, ' n=', ns, ' ', sep='') s <- NULL for(i in 1:length(ns)) { w <- paste('paste(bar(X)==', means[i], ',~~scriptstyle(n==', ns[i], '))', sep='') s <- c(s, parse(text=w)) } list(result=s, longest=simplyformatted[which.max(nchar(simplyformatted))]) } Png(8) plot(s, groups='treat', cex.values=.65, textplot='Mean', custom=cust, key=list(space='bottom', columns=2, text=c('Treatment A:','Treatment B:'))) dev.off() ## Stratifying by region and treat fit an exponential distribution to ## S1 and S2 and estimate the probability of an event within 0.5 years f <- function(y) { hazard <- sum(y[,2]) / sum(y[,1]) 1. - exp(- hazard * 0.5) } s <- summaryS(S1 + S2 ~ region + treat, data=d, fun=f) plot(s, groups='treat', funlabel='Prob[Event Within 6m]', xlim=c(.3, .7)) ## Demonstrate simultaneous use of fun and panel ## First show the same quantile intervals used in panel.bppplot by ## default, stratified by region and day d <- upData(d, days=round(days / 30) * 30) g <- function(y) { probs <- c(0.05, 0.125, 0.25, 0.375) probs <- sort(c(probs, 1 - probs)) y <- y[! is.na(y)] w <- hdquantile(y, probs) m <- hdquantile(y, 0.5, se=TRUE) se <- as.numeric(attr(m, 'se')) c(Median=as.numeric(m), w, se=se, n=length(y)) } s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d) Png(9) plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) dev.off() # Similar but use half-violin plots s <- summaryS(sbp + dbp ~ days + region, data=d) Png('9v') plot(s, groups='region', panel=medvPanel, paneldoesgroups=TRUE) dev.off() ## Show Wilson confidence intervals for proportions, and confidence ## intervals for difference in two proportions g <- function(y) { y <- y[!is.na(y)] n <- length(y) p <- mean(y) se <- sqrt(p * (1. - p) / n) structure(c(binconf(sum(y), n), se=se, n=n), names=c('Proportion', 'Lower', 'Upper', 'se', 'n')) } s <- summaryS(meda + medb ~ days + region, fun=g, data=d) Png(10) plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) dev.off() Hmisc/inst/tests/simRegOrd.r0000644000176200001440000000420013150050317015501 0ustar liggesusers## First use no ordinal high-end category overrides, and compare power ## to t-test when there is no covariate require(Hmisc) n <- 100 delta <- .5 sd <- 1 require(pwr) power.t.test(n = n / 2, delta=delta, sd=sd, type='two.sample') # 0.70 source('~/R/Hmisc/R/popower.s') set.seed(1) w <- simRegOrd(n, delta=delta, sigma=sd, pr=TRUE) # 0.686 ## Now do ANCOVA with a quadratic effect of a covariate n <- 100 x <- rnorm(n) w <- simRegOrd(n, nsim=400, delta=delta, sigma=sd, x=x, X=cbind(x, x^2), Eyx=function(x) x + x^2, pr=TRUE) w$power # 0.68 ## Fit a cubic spline to some simulated pilot data and use the fitted ## function as the true equation in the power simulation require(rms) N <- 1000 set.seed(2) x <- rnorm(N) y <- x + x^2 + rnorm(N, 0, sd=sd) f <- ols(y ~ rcs(x, 4), x=TRUE) n <- 100 j <- sample(1 : N, n, replace=n > N) x <- x[j] X <- f$x[j,] w <- simRegOrd(n, nsim=400, delta=delta, sigma=sd, x=x, X=X, Eyx=Function(f), pr=TRUE) w$power ## 0.70 ## Finally, add discrete ordinal category overrides and high end of y ## Start with no effect of treatment on these ordinal event levels (OR=1.0) w <- simRegOrd(n, nsim=400, delta=delta, odds.ratio=1, sigma=sd, x=x, X=X, Eyx=Function(f), p=c(.98, .01, .01), pr=TRUE) w$power ## 0.61 (0.3 if p=.8 .1 .1, 0.37 for .9 .05 .05, 0.50 for .95 .025 .025) ## Now assume that odds ratio for treatment is 2.5 ## First compute power for clinical endpoint portion of Y alone or <- 2.5 p <- c(.9, .05, .05) popower(p, odds.ratio=or, n=100) # 0.275 ## Compute power of t-test on continuous part of Y alone power.t.test(n = 100 / 2, delta=delta, sd=sd, type='two.sample') # 0.70 ## Note this is the same as the p.o. model power from simulation above ## Solve for OR that gives the same power estimate from popower popower(rep(.01, 100), odds.ratio=2.4, n=100) # 0.706 ## Compute power for continuous Y with ordinal override w <- simRegOrd(n, nsim=400, delta=delta, odds.ratio=or, sigma=sd, x=x, X=X, Eyx=Function(f), p=c(.9, .05, .05), pr=TRUE) w$power ## 0.72 Hmisc/inst/tests/summaryP2.r0000644000176200001440000000233112762553032015522 0ustar liggesusersrequire(Hmisc) n <- 500 set.seed(1) d <- data.frame( race = sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex = sample(c('Female', 'Male'), n, TRUE), treat = sample(c('A', 'B'), n, TRUE), smoking = sample(c('Smoker', 'Non-smoker'), n, TRUE), hypertension = sample(c('Hypertensive', 'Non-Hypertensive'), n, TRUE), region = sample(c('North America','Europe','South America', 'Europe', 'Asia', 'Central America'), n, TRUE)) d <- upData(d, labels=c(race='Race', sex='Sex')) dm <- addMarginal(d, region) s <- summaryP(race + sex + smoking + hypertension ~ region + treat, data=dm) ## add exclude1=FALSE to include female category ggplot(s, groups='treat', exclude1=TRUE, abblen=12) ggplot(s, groups='region') ## plotly graphic source('~/R/Hmisc/R/summaryP.s');source('~/R/Hmisc/R/dotchartpl.s') options(grType='plotly') plot(s, groups='treat', marginVal='All', marginLabel='All Regions', xlim=c(0,1)) ## Make sure plotly graphic works with simpler cases s <- summaryP(race + sex + smoking + hypertension ~ treat, data=dm) plot(s) plot(s, groups='treat') s <- summaryP(race + sex + smoking + hypertension ~ 1, data=dm) plot(s) Hmisc/inst/tests/summary.formula.response.stratify.r0000644000176200001440000000036312304110712022452 0ustar liggesusersrequire(Hmisc) set.seed(1) d <- expand.grid(day=c(1, 3), rx=c('A','B'), reps=1:3) d$x <- runif(nrow(d)) s <- summary(x ~ day + stratify(rx), fun=smean.sd, overall=FALSE, data=d) w <- latex(s, file='/tmp/z.tex', table.env=FALSE, booktabs=TRUE) Hmisc/inst/tests/test.xml0000644000176200001440000002137412243661443015151 0ustar liggesusers 111 ABC 27 1 2 1976-04-22 222 XYX 35.2 2 1 1968-02-10 333 WHO 19 1 1 1984-04-20 444 WHY 45.7 1 3 1957-08-14 555 HUH 82 2 3 1921-05-06
    Hmisc/inst/tests/rcspline.plot.r0000644000176200001440000000026012705252626016420 0ustar liggesuserslibrary(Hmisc) data(pbc) with(pbc, rcspline.plot(age, time, model="cox",event=status %in% 1:2, nk=5, show="xbeta", showknots=TRUE, plotcl=TRUE, lty=1) ) Hmisc/inst/tests/mChoice.r0000644000176200001440000000356312600352300015164 0ustar liggesusers## Thanks: Colin O'Rourke require(Hmisc) d = data.frame( CaseNum = 1:20, Status = c("Dead", "Dead", "Dead", "Dead", "Alive", "Alive", "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", "Alive", "Alive", "Dead", "Dead", "Dead", "Alive", "Dead"), Cause1 = c("Bloating", "Heart", "Age", "Sepsis", NA, NA, NA, "Age", "Bloating", "Sepsis", "Heart", "Heart", NA, NA, NA, "Cancer", "Sepsis", "Heart", NA, "Age"), Cause2 = c("", "Age", "Bloating", "Dog bite", NA, NA, NA, "Fright", "Sepsis", "Age", "Bloating", "Cancer", NA, NA, NA, "Heart", "Dog bite", "", NA, "Dog bite"), Cause3 = c("", "", "", "Trauma", NA, NA, NA, "", "Trauma", "", "Trauma", "", NA, NA, NA, "", "", "", NA, "")) # The data created from the R code above has patient status (alive/dead) and # if dead, what the cause or causes of death are. Patients who are alive have a # causes set to missing. # Turn the causes of death in a variable of class “mChoice”. d$Cause = with(d, mChoice(Cause1, Cause2, Cause3, label="Cause of death")) sum(is.na(d$Cause)) summary(~ Cause, data=d) summary(~ Cause, data=subset(d, Status == 'Dead')) # Notice how NA is tabulated as part of the summary, and how it appears as # the most common combination of causes. Now, I would like to summarise the # frequency of each cause, marginal on the other causes. # This gives the following summary table (output in table 1): # Not only is NA tabulated in the summary, but the percentages are out of the # full 20 patients, rather than only the 13 patient who died. # FH response: mChoice intended for the NA to be something like "none" or "", and to get the proportions # you want you need to subset the data as above on Status == 'Dead' Hmisc/inst/tests/label.r0000644000176200001440000000031512654122672014704 0ustar liggesusersrequire(Hmisc) x <- 1:10 y <- (11:20) + .1 label(y) <- 'Y' attributes(y) d <- data.frame(x, y) attributes(d$y) m <- model.frame(y ~ x, data=d) m$y attributes(m$y) mr <- model.response(m) attributes(mr) mr Hmisc/inst/tests/describe.Rmd0000644000176200001440000000107213510672620015662 0ustar liggesusers--- title: "describe Test" author: "F Harrell" date: "August 20, 2017" output: html_document --- ```{r setup} require(Hmisc) set.seed(1) x1 <- runif(100) x2 <- sample(1:5, 100, TRUE) x3 <- sample(c('dig', 'cat', 'giraffe'), 100, TRUE) d <- data.frame(x1, x2, x3) describe(d) html(describe(d)) ``` ```{r indiv} html(describe(x1)) ``` ```{r indevasdf} html(describe(data.frame(x1))) ``` ```{r lricketts} x = c(0, 0, 0, 0, 2053, 2098, 0, 2053, 2098, 2, 5, 0, 0, 0, 5, 13, 13, 0, 2053, 2098) length(unique(x)) table(x) d <- describe(x) d d$values ``` Hmisc/inst/tests/aregImpute.r0000644000176200001440000000721612700122132015715 0ustar liggesusers## See Paul T von Hippel, The American Statistician 58:160-164, 2004 mvector <- c(0,0) msigma <- matrix(c(1,0.5,0.5,1), nrow=2) library(mvtnorm) library(Hmisc) # XZ <- rmvnorm(1000, mvector, msigma) mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { Z <- matrix(rnorm(n * p), p, n) t(u + t(chol(S)) %*% Z) } XZ <- mvrnorm(1000, 2, mvector, msigma) U <- rnorm(1000) Y <- XZ[,1]+XZ[,2]+U summary(lm(Y ~ XZ)) X <- XZ[,1] Z <- XZ[,2] Z.ni <- Z type <- c('random','X<0','Y<0','Z<0')[3] i <- switch(type, random= runif(1000) < .5, 'X<0' = X<0, 'Y<0' = Y<0, 'Z<0' = Z<0) Zna <- Z Zna[i] <- NA summary(lm(Y ~ X + Zna)) #w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna)) #w <- aregImpute(~I(Y)+I(X)+I(Zna),fweight=.75) w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna), n.impute=5, type='regression') plot(w) Ecdf(Zna, add=T, col='red') Ecdf(Z, add=T, col='green') # plot(w$imputed$Zna, Z[is.na(Zna)]) # use if n.impute=1 # abline(a=0,b=1,lty=2) # lm(Z[is.na(Zna)] ~ w$imputed$Zna) coef(fit.mult.impute(Y~X+Zna, lm, w, data=data.frame(X,Zna,Y),pr=F)) #-------------------------------------------------------------------- ## From Ewout Steyerberg # Missing values: illustrate MCAR, MAR, MNAR mechanism # linear models library(rms) ## 1. x1 and x2 with y1 outcome ## A) X only ## B) X+Y ######################### ### Test Imputation ### ### use aregImpute in default settings ######################### n <- 20000 # arbitrary sample size x2 <- rnorm(n=n, mean=0, sd=1) # x2 standard normal x1 <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5)) # x2 correlated with x1 y1 <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y # var of y1 larger with correlated x1 - x2 x1MCAR <- ifelse(runif(n) < .5, x1, NA) # MCAR mechanism for 50% of x1 x1MARx <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing (since mean x2==0) x1MARy <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0) # x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0) x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0) plot(x2, x1MARx) plsmo(x2, is.na(x1MARx), datadensity=TRUE) dd <- datadist(x2,x1MARx) options(datadist='dd') f <- lrm(is.na(x1MARx) ~ rcs(x2,4)) plot(Predict(f, x2, fun=plogis)) d <- data.frame(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR) ols(y1~x1+x2) ols(y1 ~ x1MARx + x2) # MAR on x: 3 approaches; CC, MI with X, MI with X+Y g <- aregImpute(~I(y1) + I(x1MARx) + I(x2), n.impute=5, data=d, pr=F, type=c('pmm','regression')[1], match='closest', plotTrans=TRUE) plot(g) Ecdf(x1, add=TRUE, col='red',q=.5) Ecdf(x1MARx, add=TRUE, col='blue',q=.5) f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) g <- aregImpute(~y1 + x1MARx + x2, n.impute=5, data=d, pr=F, type='regression', plotTrans=TRUE) f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) # MAR on y: 3 approaches; CC, MI with X, MI with X+Y f <- ols(y1~x1MARy+x2) if(FALSE) { Mat.imputation[i,29:32] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3]) g <- aregImpute(~x1MARy + x2, n.impute=5, data=d, pr=F, type='regression') f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) Mat.imputation[i,33:36] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3]) g <- aregImpute(~y1 + x1MARy + x2, n.impute=5, data=d, pr=F, type='regression') f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) Mat.imputation[i,37:40] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3]) } Hmisc/inst/tests/test.sas0000644000176200001440000000153712243661443015136 0ustar liggesuserslibname x SASV5XPT "test.xpt"; libname y SASV5XPT "test2.xpt"; PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; PROC FORMAT CNTLOUT=format;RUN; data test; LENGTH race 3 age 4; age=30; label age="Age at Beginning of Study"; race=2; d1='3mar2002'd ; dt1='3mar2002 9:31:02'dt; t1='11:13:45't; output; age=31; race=4; d1='3jun2002'd ; dt1='3jun2002 9:42:07'dt; t1='11:14:13't; output; format d1 mmddyy10. dt1 datetime. t1 time. race race.; run; data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8; DO i=1 TO 100; x3=ranuni(3); x4=ranuni(5); x5=ranuni(7); x6=ranuni(9); x7=ranuni(11); x8=ranuni(13); output; END; DROP i; RUN; PROC MEANS;RUN; /* PROC CPORT LIB=work FILE='test.xpt';run; * no; */ PROC COPY IN=work OUT=x;SELECT test;RUN; PROC COPY IN=work OUT=y;SELECT test format z;RUN; Hmisc/inst/tests/latexTabular.r0000644000176200001440000000030312717613557016261 0ustar liggesusersrequire(Hmisc) x <- data.frame(x1=c('a','b^2','c'), x2=1:3 + .25) format.df(x, dec=1) cat(latexTabular(x, headings=c('$x_{1}$', '$x_{2}$'), translate=FALSE, hline=2)) cat(latexTabular(x, dec=1)) Hmisc/inst/tests/summaryP.r0000644000176200001440000000406413000770532015435 0ustar liggesusersrequire(Hmisc) n <- 100 f <- function(na=FALSE) { x <- sample(c('N', 'Y'), n, TRUE) if(na) x[runif(100) < .1] <- NA x } set.seed(1) d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE), age=rnorm(n, 50, 10), race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex=sample(c('Female', 'Male'), n, TRUE), treat=sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE)) d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines', x5='Pregnant', x6='Other event', x7='MD withdrawal', race='Race', sex='Sex')) dasna <- subset(d, region=='North America') with(dasna, table(race, treat)) png('/tmp/summaryP.png', width=550, height=550) yy <- with(d, ynbind(x1, x2, x3, x4, x5, x6, x7, label='Exclusions')) print(unclass(yy)) print(yy[,1]) print(attributes(yy[,1])) table(yy[,1]) print(yy[,1:2]) print(attributes(yy[,1:2])) s <- summaryP(race + sex + ynbind(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~ region + treat, data=d) # add exclude1=FALSE to include female category plot(s, val ~ freq | region * var, groups='treat') # best looking dev.off() plot(s, groups='treat') # plot(s, groups=treat, outerlabels=FALSE) for standard lattice output plot(s, groups='region', key=list(columns=2, space='bottom')) g <- ggplot(s, groups='treat') plotly::ggplotly(g, tooltip='text') # poor output s <- summaryM(race + sex + x1 + x2 ~ treat + region, data=d) options(grType='plotly') plot(s) options(grType='base') plot(s <- summaryP(race + sex ~ region, data=d, exclude1=FALSE), col='green') # Make your own plot using data frame created by summaryP dotplot(val ~ freq | region * var, data=s, # was groups=treat xlim=c(0,1), scales=list(y='free', rot=0), xlab='Fraction', panel=function(x, y, subscripts, ...) { denom <- s$denom[subscripts] x <- x / denom panel.dotplot(x=x, y=y, subscripts=subscripts, ...) }) Hmisc/inst/tests/latexpng.r0000644000176200001440000000076612717435246015465 0ustar liggesusers# See tex.stackexchange.com/questions/11866 require(Hmisc) getHdata(pbc) s <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc) f <- '/tmp/z.tex' cat('\\documentclass[convert={density=600,outext=.png}]{standalone}\n\\begin{document}\n', file=f) w <- latex(s, npct='both', center='none', table.env=FALSE, insert.bottom = FALSE, file=f, append=TRUE) cat('\\end{document}\n', file=f, append=TRUE) system('cd /tmp; latex -shell-escape z; display /tmp/z.png') Hmisc/inst/tests/latex.s0000644000176200001440000001256013013333535014740 0ustar liggesusers# Copy to /tmp, and after running to create z.tex, run pdflatex require(Hmisc) x <- cbind(x1=1:5, x2=2:6) file <- '/tmp/z.tex' # Note: adding here package caused LaTeX problems cat('\\documentclass{article}\n\\usepackage{hyperref,lscape,ctable,booktabs,longtable}\n\\begin{document}\n', file=file) # Example from Johannes Hofrichter dat <- data.frame(a=c(1,2), b=c(2,3)) w <- latex(dat, file=file, ctable=TRUE, caption = "caption", label="test", append=TRUE) # Example from Ben Bolker d <- data.frame(x=1:2, y=c(paste("a", paste(rep("very",30),collapse=" "),"long string"), "a short string")) w <- latex(d, file=file, col.just=c("l","p{3in}"), table.env=FALSE, append=TRUE) # Example from Yacine H df <- data.frame(matrix(1:16, ncol=4)) latex(df, file="", rownamesTexCmd="bfseries") latex(df, file="", cgroup=c("G1","G2"), n.cgroup=c(2,2)) latex(df, file="", cgroup=c("G1","G2"), n.cgroup=c(2,2), rownamesTexCmd="bfseries") ## Test various permutations of options test <- function(caption=NULL, center=NULL, table.env=TRUE, size=NULL, booktabs=FALSE, landscape=FALSE, ctable=FALSE, longtable=FALSE, hyperref=NULL, insert=TRUE, caption.loc='top', colheads=NULL) { i <<- i + 1 cat('\\clearpage\ni=', i, '\n\\hrule\n', sep='', file=file, append=TRUE) ib <- it <- NULL g <- function(x) { if(! length(x)) return(NULL) if(is.character(x)) paste(substitute(x), '=', x, ', ', sep='') else if(x) paste(substitute(x), '=T, ', sep='') else NULL } colh <- colheads if(insert) { z <- paste(g(caption), g(center), g(table.env), g(size), g(booktabs), g(landscape), g(ctable), g(longtable), g(hyperref), if(caption.loc != 'top') g(caption.loc), sep='') if(length(colheads)) { colheads <- paste(colheads, collapse=',') z <- paste(z, g(colheads), sep='') } it <- paste('Top: i=', i, ':', z, sep='') ib <- 'Text for bottom' } w <- latex(x, file=file, append=TRUE, caption=caption, center=center, table.env=table.env, size=size, booktabs=booktabs, landscape=landscape, ctable=ctable, longtable=longtable, hyperref=hyperref, insert.top=it, insert.bottom=ib, caption.loc=caption.loc, colheads=colh) invisible() } i <- 0 test() test(hyperref='rrrrr') test(caption='This caption') test(caption='This caption, supposed to be at bottom', caption.loc='bottom') for(cen in c('center', 'centering', 'centerline')) test(center=cen) test(table.env=FALSE) test(size='scriptsize') test(table.env=FALSE) test(booktabs=TRUE, landscape=TRUE) test(ctable=TRUE, landscape=TRUE) test(longtable=TRUE) test(table.env=FALSE, colheads=FALSE) cat('\\end{document}\n', file=file, append=TRUE) # Run pdflatex /tmp/z ## From Sam Zhao library(Hmisc) my.table <- matrix(1:81, nrow=9) n.col <- 9 n.row <- 9 #for(i in 1:9){ cell.format <- matrix(rep("", n.col*n.row), nrow=n.row, ncol = n.col) cell.format[c(1,4,7),] <- "color{blue}" cell.format[,6] <- "color{blue}" cell.format[c(2,3,5,6,8,9),9] <- "color{red}" w <- latex(my.table, file="/tmp/z.tex", numeric.dollar = T, title = "", where="h", rowname = " ", ctable=TRUE, cellTexCmds = cell.format, rgroup = c("RGrour1", "RGrour2","RGrour3"), #n.rgroup = c(3,3,3), n.rgroup = c(3,3,3), cgroup = c("", "Cgroup1","Cgroup2","Cgroup3"), n.cgroup = c(1,2,4,2), caption = "The Example Table Using Hmisc on R 3.3.1.", label = "tab:comp-csp-results-large-small-imp" ) ## From Gary Napier require(Hmisc) require(htmlTable) x <- rnorm(12, 0, 1) y <- rnorm(12, 0, 1) Sa_2 <- data.frame(Mean = x, SD = y) Om_2 <- data.frame(Mean = x, SD = y) Nu <- data.frame(Mean = x, SD = y) Param_names <- c("Sa_2", "Om_2", "Nu") Group <- rep(c("Ctrl", "Pat"), 6) Analyses_names <- sprintf("A%s", 1:6) Mean_sd <- cbind(Sa_2, Om_2, Nu) Mean_sd <- signif(Mean_sd, digit = 2) ## Works perfectly h <- htmlTable(Mean_sd, rnames = Group, rgroup =Analyses_names, n.rgroup = rep(2, 6), cgroup = Param_names, n.cgroup = c(2, 2, 2)) cat(h, sep='\n', file='/tmp/z.html') w <- latex(Mean_sd, file = '/tmp/z.tex', title = '', rowname = Group, # he originally had rnames=Group rgroup = Analyses_names, n.rgroup = rep(2, 6), cgroup = Param_names, # he originally had cnames=Param_names n.cgroup = c(2, 2, 2)) ## From Niclas https://github.com/harrelfe/Hmisc/issues/59 require(Hmisc) options(digits=3) set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) age <- rnorm(500, 50, 5) treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) symp <- c('Headache','Stomach Ache','Hangnail','Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE) w <- latex(f, file='/tmp/z.tex') ## https://github.com/harrelfe/Hmisc/issues/60 require(Hmisc) d <- data.frame(x=1:2, y=2:1) w <- latex(d, file='/tmp/z.tex', insert.bottom='Bottom text', table.env=FALSE) Hmisc/inst/tests/inverseFunction.r0000644000176200001440000000300312751732066017005 0ustar liggesuserslibrary(Hmisc) z <- structure(list(x = c(-1.01157732356344, -0.844512148091014, -0.723389895873506, -0.598091014269186, -0.518735055919784, -0.42684920940995, -0.347493251060548, -0.263960663324335, -0.113602005399152, 0.195468569224836, 0.441889703046664, 0.746783648283841, 0.901318935595835, 0.947261858850752, 0.99738141149248 ), y = c(-1.0034980323568, -0.861827721906428, -0.668211630957586, -0.49820725841714, -0.309313511149978, -0.0920857017927416, 0.0637516397026673, 0.0920857017927417, 0.0212505465675558, -0.0826410144293835, -0.0873633581110625, 0.0684739833843463, 0.517096633143857, 0.75321381722781, 0.894884127678181)), .Names = c("x", "y")) library(rms) dd <- datadist(as.data.frame(z)); options(datadist='dd') f <- ols(y ~ rcs(x,5), data=z) ggplot(Predict(f)) + geom_vline(xintercept=c(-.1772, .31375)) + geom_point(aes(x=x, y=y), data=as.data.frame(z)) xx <- seq(-1,1,length=1000) g <- Function(f) h <- inverseFunction(xx, g(xx)) plot(xx[-1], diff(g(xx))) abline(h=0) turns <- formals(h)$turns plot(Predict(f), abline=list(v=turns)) with(Predict(f), plot(x, yhat, type='l')) a <- seq(-1.2,1.2,by=.001) w <- h(a) for(i in 1:ncol(w)) lines(w[,i], a, col=i+1) w <- h(a, what='sample') points(w, a, col='gray') x <- seq(-1, 1, by=.01) y <- x^2 h <- inverseFunction(x,y) formals(h)$turns # vertex a <- seq(0, 1, by=.01) plot(0, 0, type='n', xlim=c(-.5,1.5)) lines(a, h(a)[,1]) ## first inverse lines(a, h(a)[,2], col='red') ## second inverse a <- c(-.1, 1.01, 1.1, 1.2) points(a, h(a)[,1]) Hmisc/inst/tests/redun.r0000644000176200001440000000065212700122615014733 0ustar liggesusersrequire(Hmisc) set.seed(1) n <- 100 x1 <- runif(n) x2 <- runif(n) x3 <- x1 + x2 + runif(n)/10 x4 <- x1 + x2 + x3 + runif(n)/10 x5 <- factor(sample(c('a','b','c'),n,replace=TRUE)) x6 <- 1*(x5=='a' | x5=='c') redun(~x1+x2+x3+x4+x5+x6, r2=.8) redun(~x1+x2+x3+x4+x5+x6, r2=.8, allcat=TRUE) # redun(.., allcat=TRUE, minfreq=40) gives same result as allcat=FALSE x0 <- c(rep(0,99),1) redun(~x0+x1+x2+x3+x4+x5+x6, r2=.8, minfreq=2) Hmisc/inst/tests/html.summaryM.Rmd0000644000176200001440000000450013041713564016660 0ustar liggesusers--- title: "My Test" author: "FE Harrell" date: '`r Sys.Date()`' output: html_document: toc: yes html_notebook: highlight: textmate toc: yes toc_float: collapsed: yes --- # Descriptive Stats ```{r, results='hide'} require(Hmisc) n <- 500; set.seed(88) sex <- factor(sample(c("female","male"), n, TRUE)) age <- rnorm(n, 50, 10) height <- rnorm(n, 1.7, 0.5) type <- factor(sample(c('A', 'B'), n, TRUE)) dbase= data.frame(sex, age, height, type) ``` ```{r} s <- summaryM(age + height + type ~ sex , data=dbase, overall=TRUE, test=TRUE) html(s, prmsd = TRUE, npct='slash', caption="Descriptive Statistics", round = 2, digits=2, prtest='P', pdig=2) ``` ```{r} # From Lauren Samuels set.seed(1) d <- expand.grid(x1=c('A', 'B'), x2=c('a', 'b', 'c')) d$y <- runif(nrow(d)) htmlVerbatim(d) # htmlVerbatim is in Hmisc h <- html( summaryM(x2 + y ~ x1, data= d, test=TRUE, overall=TRUE, continuous=6 ), caption="Descriptive stats and tests of between-group differences for all primary and secondary neuroimaging outcomes", exclude1=FALSE, digits=2, prmsd=TRUE, npct="slash") cat(as.character(h), file='z.html', sep='\n') h ``` ```{r ex} ## Example taken from help file for summaryM options(digits=3) set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE)) age <- rnorm(500, 50, 5) sbp <- rnorm(500, 120, 12) label(sbp) <- 'Systolic BP' units(sbp) <- 'mmHg' treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) treatment[1] sbp[1] <- NA # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') table(as.character(Symptoms)) # Produce separate tables by country f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country, groups='treatment', test=TRUE) html(f, npct='slash', middle.bold=TRUE, prmsd=TRUE) ``` ```{r pbc} getHdata(pbc) # load('~/data/teaching/pbc.sav') s5 <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc, test=TRUE) html(s5, npct='both', insert.bottom = "Polly") ``` Hmisc/inst/tests/procmeans.txt0000644000176200001440000000127012243661443016171 0ustar liggesusersThe MEANS Procedure Variable N Mean Std Dev Minimum Maximum -------------------------------------------------------------------------- x3 100 0.5131445 0.2944341 0.0057602 0.9938965 x4 100 0.5119257 0.3100749 0.0263616 0.9826741 x5 100 0.4887739 0.3141976 0.0041338 0.9972528 x6 100 0.4986746 0.2710817 0.0100958 0.9951080 x7 100 0.5533156 0.2843679 0.0420104 0.9979081 x8 100 0.4809487 0.2892945 0.0072688 0.9596358 -------------------------------------------------------------------------- Hmisc/inst/tests/html.data.frame.r0000644000176200001440000000020212760324534016564 0ustar liggesusersrequire(Hmisc) getHdata(pbc) f <- '/tmp/z.html' cat('', file=f) html(contents(pbc), levelType='table') d <- describe(pbc) html(d) Hmisc/inst/tests/wtd.r0000644000176200001440000000111013075710762014416 0ustar liggesusers# Jose.M.Pavia@uv.es require(Hmisc) PerCapita <- c(10, 20, 30, 20, 20, 40) Group <- c( "A", "B", "B", "A", "A", "B") W <- c(1.5, 2.3, 4.5, 2.6, 1.7, 3.9) ## Works wtd.mean(PerCapita, weights=W) wtd.quantile(PerCapita, weights=W) wtd.mean(PerCapita[Group=="A"], weights=W[Group=="A"]) wtd.mean(PerCapita[Group=="B"], weights=W[Group=="B"]) g <- function(y) wtd.mean(y[,1],y[,2]) summarize(cbind(PerCapita, W), llist(Group), g, stat.name='y') ## davharris https://github.com/harrelfe/Hmisc/issues/69 x <- c(3.7,3.3,3.5,2.8) wt <- c(5, 5, 4, 1)/15 wtd.mean(x, wt) wtd.var(x, wt) Hmisc/inst/tests/Ecdf.r0000644000176200001440000000121313003653212014450 0ustar liggesusers## From Bayazid Sarkar require(Hmisc) set.seed(1) x <- exp(rnorm(100)) w <- sample(1:5, 100, TRUE) g <- sample(c('a','b','c'), 100, TRUE) Ecdf(log(x), weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3), subtitles=FALSE) Ecdf(x, weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3), subtitles=FALSE, log='x') ## From Piotr Balwierz reds <- rnorm(n=100, mean=5, sd=1) blues <- rnorm(n=100, mean=0, sd=1) g <- c(rep("red", length(reds)), rep("blue", length(blues))) Ecdf(x=c(reds, blues), group=g, col=c("red", "blue")) Ecdf(c(reds, blues), group=factor(g), col=c('blue', 'red')) Hmisc/inst/tests/html-summaryM.r0000644000176200001440000000437012716715041016403 0ustar liggesusersrequire(Hmisc) ht <- function(x, filebase, ...) { ltx <- latex(x, file=paste('/tmp/', filebase, '.tex', sep=''), prmsd=TRUE, msdsize='scriptsize', round=3, pdig=2, npct='both', middle.bold=TRUE, ...) invisible(html(ltx, file=paste('/tmp/', filebase, '.html', sep=''))) } n <- 500; set.seed(88) sex <- factor(sample(c("female","male"), n, TRUE)) age <- rnorm(n, 50, 10) height <- rnorm(n, 1.7, 0.5) type <- factor(sample(c('A', 'B'), n, TRUE)) dbase= data.frame(sex, age, height, type) ht(summaryM(age + height + type ~ sex , data=dbase, overall=TRUE, test=TRUE), 'a', caption="Cool descriptive statistics", label="table:summary") ## If this were in a knitr document you could have the following after the @ ## that ends the chunk to also include the LaTeX typeset table (omit the ## ) ## \input{/tmp/a} # From Lauren Samuels set.seed(1) d <- expand.grid(x1=c('A', 'B'), x2=c('a', 'b', 'c')) d$y <- runif(nrow(d)) d w <- ht( summaryM(x2 + y ~ x1, data= d, test=TRUE, overall=TRUE, continuous=6), 'b', caption="Descriptive stats and tests of between-group differences for all primary and secondary neuroimaging outcomes", label= "tbl:descrOutcomes", exclude1=FALSE) ## Example taken from help file for summaryM set.seed(173) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE)) age <- rnorm(500, 50, 5) sbp <- rnorm(500, 120, 12) label(sbp) <- 'Systolic BP' units(sbp) <- 'mmHg' treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) treatment[1] sbp[1] <- NA # Generate a 3-choice variable; each of 3 variables has 5 possible levels symp <- c('Headache','Stomach Ache','Hangnail', 'Muscle Ache','Depressed') symptom1 <- sample(symp, 500,TRUE) symptom2 <- sample(symp, 500,TRUE) symptom3 <- sample(symp, 500,TRUE) Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') table(as.character(Symptoms)) # Produce separate tables by country f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country, groups='treatment', test=TRUE) ht(f, 'c') getHdata(pbc) s5 <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc) ht(s5, 'd', insert.bottom = "More stuff to add \\ldots") Hmisc/inst/tests/latex-html.Rmd0000644000176200001440000000107512570662722016173 0ustar liggesusers--- output: html_document: default --- ```{r setup} require(Hmisc) knitrSet(lang='markdown') ``` ```{r summm} getHdata(pbc) html(latex(describe(pbc)), rmarkdown=TRUE) summarym <- summaryM(bili + albumin + stage + protime + sex + age + spiders ~ drug, data=pbc, test=TRUE) w <- latex(summarym, npct='both', middle.bold=TRUE) # Use npct='slash' to avoid the use of image files for fractions html(w, rmarkdown=TRUE) ``` ```{r lrm} require(rms) getHdata(pbc) f <- lrm(spiders ~ rcs(age, 4) + rcs(albumin, 3) + sex, data=pbc) print(f, rmarkdown=TRUE) ``` Hmisc/inst/tests/consolidate.R0000644000176200001440000000461612243661443016077 0ustar liggesuserslibrary(Hmisc) named.equal <- function(x,y) { x.names <- sort(names(x)) y.names <- sort(names(y)) if(!identical(x.names, y.names)) { cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='') stop("x and y do not have the same element names") } if(any(x.names == "") || any(y.names == "")) { cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='') stop("x or y has unnamed elements") } if(!identical(x[x.names], y[x.names])) { print(x) print(y) stop("x and y do not have identical element values") } return(TRUE) } a <- c(a = 5, b = 2, c = 4) b <- c(c = 3, d = 4, e = 12) c <- list(q = 5, h = 2, b = 14) d <- list(w = 2, h = 3, e = 21) a1 <- structure(c(5, 2, 3, 4, 12), .Names = c("a", "b", "c", "d", "e")) a2 <- structure(list(a = 5, b = 14, c = 4, q = 5, h = 2), .Names = c("a", "b", "c", "q", "h")) a3 <- structure(list(q = 5, h = 2, b = 2, a = 5, c = 4), .Names = c("q", "h", "b", "a", "c")) a4 <- structure(list(q = 5, h = 3, b = 14, w = 2, e = 21), .Names = c("q", "h", "b", "w", "e")) a5 <- structure(c(5,2,4,4,12), .Names = c("a", "b", "c", "d", "e")) a6 <- structure(list(a = 5, b = 2, c = 4, q = 5, h = 2), .Names = c("a", "b", "c", "q", "h")) a7 <- structure(list(q = 5, h = 2, b = 14, a = 5, c = 4), .Names = c("q", "h", "b", "a", "c")) a8 <- structure(list(q = 5, h = 2, b = 14, w = 2, e = 21), .Names = c("q", "h", "b", "w", "e")) r1 <- consolidate(a, b, protect=FALSE) r2 <- consolidate(a, c, protect=FALSE) r3 <- consolidate(c, a, protect=FALSE) r4 <- consolidate(c, d, protect=FALSE) is.vector(r1) is.list(r2) is.list(r3) is.list(r4) named.equal(r1, a1) named.equal(r2, a2) named.equal(r3, a3) named.equal(r4, a4) r5 <- consolidate(a, b, protect=TRUE) r6 <- consolidate(a, c, protect=TRUE) r7 <- consolidate(c, a, protect=TRUE) r8 <- consolidate(c, d, protect=TRUE) named.equal(r5, a5) named.equal(r6, a6) named.equal(r7, a7) named.equal(r8, a8) named.equal(r3, r6) named.equal(r2, r7) e <- a consolidate(e) <- b named.equal(e, r1) e <- a consolidate(e, protect = TRUE) <- b named.equal(e, r5) f <- c(1,2,3,5) consolidate(attributes(f)) <- c named.equal(attributes(f), c) consolidate(attributes(f)) <- NULL named.equal(attributes(f), c) Hmisc/inst/tests/howto.html0000644000176200001440000000220212243661443015463 0ustar liggesusers How to Create SAS Transport Files

    How to Create SAS Transport Files

    1. If any of the datasets you are exporting are not already in the WORK library, copy them to there:
      PROC COPY IN=mylib OUT=WORK; SELECT test1 test2; RUN;
      
    2. If you have created value label formats using PROC FORMAT; VALUE ..., output these value labels into a SAS dataset:
      PROC FORMAT CNTLOUT=format;RUN;
      
    3. Define a LIBNAME to reference the SAS Version 5 transport file engine:
      libname xp SASV5XPT "test.xpt";
      
    4. Copy all needed datasets to, e.g., test.xpt:
      PROC COPY IN=work OUT=xp;SELECT test1 test2 format;RUN;
      
      DO NOT use PROC CPORT to create the file.

    Frank E Harrell Jr
    Last modified: Fri Jun 6 15:47:58 EDT 2003 Hmisc/inst/tests/histSpikeg.r0000644000176200001440000000223612452304415015734 0ustar liggesusersrequire(Hmisc) set.seed(1) p <- data.frame(x=1:10, y=1:10 + runif(10)) d <- data.frame(x=rnorm(1000, 5, 1), y=sample(1:10, 1000, TRUE)) g <- ggplot(p, aes(x=x, y=y)) + geom_line() g + histSpikeg(y ~ x, p, d) g + histSpikeg(y ~ x, data=d, side=1) g + geom_point() + histSpikeg(y ~ x, data=d, lowess=TRUE) p <- expand.grid(sex=c('male','female'), region=c('a','b','c'), x=1:10) p$y <- with(p, x + runif(60) + 2*runif(60)*(sex=='female') + 3*(region=='c')) g <- ggplot(p, aes(x=x, y=y, color=sex)) + facet_wrap(~ region) g + geom_line() d <- expand.grid(sex=c('male', 'female'), region=c('a','b','c'), reps=1:300) d$x <- rnorm(nrow(d), 5, 2) d$x[d$sex == 'male'] <- rnorm(sum(d$sex == 'male'), 7, .4) d$x[d$region == 'b'] <- rnorm(sum(d$region == 'b'), 2, 1) g + geom_line() + histSpikeg(y ~ x + sex + region, p, d) d$y <- with(d, x + runif(1800) + 2*runif(1800)*(sex=='female') + 3*(region=='c')) g + histSpikeg(y ~ x + sex + region, data=d, lowess=TRUE) g + geom_line() + histSpikeg(y ~ x + sex + region, data=d, lowess=TRUE) h <- histSpikeg(y ~ x + sex + region, data=d, lowess=TRUE) g + h g + h$hist + h$lowess # equivalent; ggplot2 uses both elements of h list Hmisc/inst/tests/minor.tick.r0000644000176200001440000000152512351663737015714 0ustar liggesusers# Thanks: 袁超磊 from ROBERT I. KABACOFF, 2011, R in Action, #function could not give two minor tick marks #between each major tick mark on the y-axis. ## This seemed to fail but it really succeeded. ## minor.tick uses par('xaxp' or 'yaxp') and worked with respect ## to the plot( ) as can be seen if yaxt='n' is omitted require(Hmisc) x <- c(1:10) y <- x z <- 10/x plot(x, y, type="b", pch=21, col="red", yaxt="n", lty=3, ann=FALSE) lines(x, z, type="b", pch=22, col="blue", lty=2) axis(2, at=x, labels=x, col.axis="red", las=2) axis(4, at=z, labels=round(z, digits=2), col.axis="blue", las=2, cex.axis=0.7, tck=-.01) mtext("y=1/x", side=4, line=3, cex.lab=1, las=2, col="blue") title("An Example of Creative Axes", xlab="X values", ylab="Y=X") minor.tick(nx=2, ny=3, tick.ratio=0.5) Hmisc/inst/tests/panelbp.r0000644000176200001440000000466412243661443015257 0ustar liggesusersrequire(Hmisc) set.seed(1) var <- c(rep('A', 100), rep('B', 100)) trt <- sample(c('T1','T2'), 200, TRUE) x <- c(runif(100), 10*runif(100)) y <- x + c(runif(100)/10, runif(100)) N <- tapply(x, llist(var, trt), function(x) sum(!is.na(x))) print(N) #trt <- factor(paste(trt, ' (n=', N[cbind(var,trt)], ')', sep='')) #var <- factor(paste(var, ' (n=', N[cbind(var,trt)], ')', sep='')) vn <- var for(v in unique(var)) { i <- var == v n <- tapply(!is.na(x[i]), trt[i], sum) nam <- names(n) # n <- sprintf('%s,(n[%s]==%g, n[%s1]==%g)', nam[1], n[1], nam[2], n[2]) # w <- sprintf('paste(%s," (", n[%s]==%g,~~n[%s]==%g,")")', # v, nam[1], n[1], nam[2], n[2]) # cat(w, '\n') # vn[var == v] <- parse(text=w) n <- sprintf('%s (n%s=%g, n%s=%g)', v, nam[1],n[1], nam[2],n[2]) vn[var == v] <- n } trt <- factor(trt) xyplot(as.integer(trt) ~ x | vn, panel=panel.bpplot, ylim=c(0,3), scale=list(y=list(at=1:2, labels=levels(trt)), x=list(relation='free', limits=list(c(0,1),c(0,13)))), ylab='Treatment', layout=c(1,2)) # strip.default or strip.custom may provide workarounds # http://r.789695.n4.nabble.com/Expressions-in-lattice-conditional-variables-td4660089.html bpl <- function(x, group, lab, cex.labels=.75) { quants=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975) group <- factor(group) xlim <- quantile(x, c(.025,.975), na.rm=TRUE) sfn <- function(x, quants) { o <- options(digits=10) ## So won't lose precision in quantile names on.exit(options(o)) c(quantile(x,quants), Mean=mean(x), SD=sqrt(var(x)), N=sum(!is.na(x))) } qu <- tapply(x, group, sfn, simplify=TRUE, quants) qu$Combined <- sfn(x, quants) sm <- matrix(unlist(qu), ncol=length(quants)+3, byrow=TRUE, dimnames=list(names(qu), c(format(quants),'Mean','SD','N'))) bpplt(sm[,-ncol(sm)], xlab=lab, xlim=xlim, cex.points=.5) upedge <- par('usr')[4] outerText('N', upedge+strheight('N', cex=cex.labels)/2, cex=cex.labels) for(i in 1:nrow(sm)) outerText(sm[i,'N'], 4-i, cex=cex.labels) } spar(mfrow=c(2,1), left=-1,rt=3,bot=1.5, mgp=c(2.5,.6,0), tcl=-.3, ps=12) set.seed(2) trt <- c(rep('T1',100), rep('T2',100)) x1 <- runif(100) x2 <- 10*runif(100) trt <- sample(c('T1','T2'), 100, TRUE) bpl(x1, trt, expression(x[1])) title(sub=expression(F[1,20] == 2.53), cex.sub=.75, adj=0, line=2) bpl(x2, trt, expression(x[list(2,23)])) Hmisc/inst/tests/upData.r0000644000176200001440000000157613012373564015052 0ustar liggesusers# From David Norris require(Hmisc) df <- upData(mtcars, cyl=factor(cyl,levels=2*(2:4),labels=paste(2*(2:4),"cyl", sep="-")), am=factor(am,levels=0:1,labels=c("automatic","manual")), gear=factor(gear,levels=3:5,labels=paste(3:5,"speed", sep="-")), labels=c( mpg="Miles per gallon" ,cyl="Number of cylinders" ,disp="Displacement" ,hp="Gross horsepower" ,drat="Rear axle ratio" ,wt="Weight" ,qsec="1/4 mile time" ,am="Transmission type" ,gear="Number of forward gears" ,carb="Number of carburetors" ), units=c( wt="lb/1000" ,disp="in^3" ,qsec="sec" ), drop='vs' ) contents(df) Hmisc/inst/tests/fit.mult.impute.r0000644000176200001440000000123012657644215016673 0ustar liggesusers## From Leena Choi require(rms) set.seed(3) x1 <- factor(sample(c('a','b','c'),1000,TRUE)) x2 <- (x1=='b') + 3*(x1=='c') + rnorm(1000,0,2) x3 <- rnorm(1000) y <- x2 + 1*(x1=='c') + .2*x3 + rnorm(1000,0,2) orig.x1 <- x1[1:250] orig.x2 <- x2[251:350] x1[1:250] <- NA x2[251:350] <- NA d <- data.frame(x1,x2,x3,y) ddist <- datadist(d); options(datadist='ddist') a <- aregImpute(~y + x1 + x2 + x3, nk=c(0,3:5), data=d) f <- fit.mult.impute(y ~ x1 + x2 + x3, ols, a, data=d) g <- fit.mult.impute(y ~ x1 + x2 + x3, lm, a, data=d) vcov(f) - vcov(g) f summary(g) f <- ols(y ~ x1 + x2 + x3, data=d) g <- lm (y ~ x1 + x2 + x3, data=d) vcov(f) - vcov(g) f summary(g) Hmisc/inst/tests/xYplotFilledBands.r0000644000176200001440000000117012700123266017204 0ustar liggesusers# This example uses the summarize function in Hmisc to # compute the median and outer quartiles. The outer quartiles are # displayed using "filled bands" require(Hmisc) set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) month <- dfr$month; year <- dfr$year y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) # filled bands: default fill = pastel colors matching solid colors # in superpose.line (this works differently in R) xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, method="filled bands" , data=s, type="l") Hmisc/inst/tests/cut2.r0000644000176200001440000000126112571323711014476 0ustar liggesusers## Use cut2 to create groups of observations having about m observations ## per group, and plot flat lines covering each interval, with y = group mean require(Hmisc) set.seed(1) n <- 3000 m <- 200 x <- c(0, 1, round(runif(n - 3, 5, 100)), 200) y <- rbinom(n, 1, ifelse(x < 50, .3, .7)) plsmo(x, y, xlim=range(x)) plsmo(x, y, f=.25, col='red', add=TRUE) lines(supsmu(x, y, bass=2), col='green') g <- cut2(x, m=m) w <- cut2(x, m=m, onlycuts=TRUE) p <- tapply(y, g, mean) segments(w[-length(w)], p, w[-1], p) ## lines((w[-length(w)] + w[-1]) / 2, p, col=adjustcolor('blue', alpha=.2)) ne <- 2 : (length(w) - 1) segments(w[ne], p[-1], w[ne], p[-length(p)], col=adjustcolor('blue', alpha=.15)) Hmisc/inst/tests/summaryRc.r0000644000176200001440000000117612243661443015613 0ustar liggesusersrequire(Hmisc) set.seed(177) sex <- factor(sample(c("m","f"), 500, rep=TRUE)) age <- rnorm(500, 50, 5) bp <- rnorm(500, 120, 7) units(age) <- 'Years'; units(bp) <- 'mmHg' label(bp) <- 'Systolic Blood Pressure' L <- .5*(sex == 'm') + 0.1 * (age - 50) y <- rbinom(500, 1, plogis(L)) png('/tmp/summaryRc.png', height=750) spar(mfrow=c(3,2), top=2, cex.axis=1) summaryRc(y ~ age + bp) # For x limits use 1st and 99th percentiles to frame extended box plots summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01) summaryRc(y ~ age + bp + stratify(sex), label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) dev.off() Hmisc/inst/tests/describe2.r0000644000176200001440000000056013535720115015464 0ustar liggesusers## Test from Matt Shotwell - rounding issue in describe.vector when values ## have very different magnitudes require(Hmisc) set.seed(42) x <- c(runif(1000), runif(2)*1e7) d <- describe(x) d # Test with a large number of distinct character values set.seed(1) k <- paste0('kkkkkkkkkk', round(runif(1000) * 200)) describe(k) describe(k, listunique=1000, listnchar=2) Hmisc/inst/tests/latex-color.r0000644000176200001440000000204513024615455016056 0ustar liggesusers## From Sam.Zhao@agriculture.gov.au require(Hmisc) # create a 9 by 9 table my.table <- matrix(1:81, nrow=9) colnames(my.table) <- paste("c",1:9,sep='') n.col <- 9 n.row <- 9 # cell format cell.format <- matrix(rep("", n.col*n.row), nrow=n.row, ncol = n.col) #color the rows 1,4,7 blue cell.format[c(1,4,7),] <- "color{blue}" my.table[c(1,4,7),] <- 'blue' # color the column ‘c6’ blue cell.format[,6] <- "color{blue}" my.table[,6] <- 'blue' #color the cells (2,9), (3,9), (5,9), (6,9), (8,9) and (9,9) red cell.format[c(2,3,5,6,8,9),9] <- "color{red}" my.table[c(2,3,5,6,8,9),9] <- 'red' w <- latex(my.table, file="/tmp/z.tex", numeric.dollar = TRUE, title = "", where="h", rowname = " ", ctable=TRUE, cellTexCmds = cell.format, rgroup = c("RGroup1", "RGroup2","RGroup3"), n.rgroup = c(3,3,3), cgroup = c("", "CGroup1","CGroup2","CGroup3"), n.cgroup = c(1,2,4,2), caption = "The Example Table Using Hmisc on R 2.14.", label = "tab:comp-csp-results-large-small-imp" ) Hmisc/inst/tests/test2.xpt0000644000176200001440000002354012243661443015243 0ustar liggesusersHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 SAS SAS SASLIB 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS TEST SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000500000000000000000000 RACE RACE AGE Age at Beginning of Study D1 MMDDYY DT1 DATETIME T1 TIME  HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 A BD<*HOQDA@BD<HO1oD HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS FORMAT SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000002100000000000000000000 FMTNAME Format name START Starting value for format END Ending value for format LABEL Format value label (MIN Minimum length .MAX Maximum length 1DEFAULT Default length 4LENGTH Format length 7 FUZZ Fuzz value : PREFIX Prefix characters B MULT Multiplier D FILL Fill character L NOEDIT Is picture string noedit? MTYPE Type of format PSEXCL Start exclusion QEEXCL End exclusion R HLO Additional information SDECSEP Decimal separator ^DIG3SEP Three-digit separator _DATATYPEDate/time/datetime? `LANGUAGELanguage for date strings h HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 RACE 1 1green AB(A`A`7- NNN RACE 2 2blue AB(A`A`7- NNN RACE 3 3purpleAB(A`A`7- NNN HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS Z SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000600000000000000000000 X3 X4 X5 X6 X7 X8  HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 @@0@3<@7O@p@ga`@@V@&=f@3>f@u@]@ݯT#_@ @0@@=z@4Y@ܖU-@md@0@P@fj@l0Q~@!0C@/`@H@, @ɐoK@𑉰@DcȈ@,R@jX?,@@@]2@0:`@x@@@aKh@& L@h!Cx@@Lx@?@ek@6nm@_HN@\D@A@y@@7%pn@Qƙ @%K@0?{E@ql@#uF@ϪeP@VJ @8@#@OP?T@9@΅ ;@5ݰkh@C@7@R|@"aD@k#@q^PF@x@q@X@!h@ hlAl@QPPޢ@@ذ@2@61h@^/t@W 8@p@,@]\H@X@@U`@C@*@@H.@u,@š@@p@9@@@0`?&!AH@L<Ƙy@P@t@(@5-sj@>;,@V+߶@@`@x@E3@^@qb@';@s@0@@g=@-W@!ǤCJ@@"@@ @@HY`?2a`@DiP0@@/@#fɬ@pq@2h@GH@8@<@l@Y@SC@܅#7@;)*@@8@'?2@yN@}n*@"@B@ӎ@Rh@=h@XZD@Gk[@`9`fp@8qQ@@s>@A+|@!@v @L;ſ@`@P@j`@q @Ŝ8@c"jǐD@Ax@؀@@=+z@N0?+\ V@D@~@,@7}@k@ɥߓJ@!@@@"@\(h@W@p0`@5@ـ@D @W,@@@,Y@H@Β`@8x@i^i@ҙա@oWl(@?X?PG`@Mvn@) P@9P?x@@fP@@͝8@%?0@uMh@K@/޸^@V}@c"@6l@U\@x@@`@Q"8@CĄ@Ye@I5Pk<@L|@p@jp@fq@6M7h@# 4F@@n@[@-@s4#@fI̓\@(@߱@lS@ؚw@~$U@sZ0@@h@@(u`@'@Gk@p(@@@Q%|@Y?H@\,@ؚX@5Z8@8@Du0@W>O?@@ @PE#@;h@?@+%WdL@h?k(?@o@E @i֙xӰ@@ڱ@ @A,Ԍ@-^X@" D@T@9@@@|@@H@bXLİ@'h@@ܰİ@ӝ+@צ@ѯH@5P;@\0@@lo@SyH@S0@4iH@+f@@C*@y#(@[A@͚O8@p@P@q8@#M0@7+n,@?~@1@X@=@IE4@a+?*OjT@C8?wd@@;p@ҍZi@=`z@@wӄ@h@1Pb@[跄?5m j`@0@Ø@z|@6Ql@ɷ?tPaF?>l?vd|@0@l@ @p@GJ3x@\@@a5@!d&.@ @G;춎w@@_@2X@5j@| @R!m @@@x@S!X?}|<@_W4@Vw\@X@`@p(@h/ @^VL伬@"E-@~@/ @6@d@`px4@T ?k@J @^\@w`@Bx@4&h@@O@^8?)Z/ R@z@@4h@@@L$@m|@&+[P?wT!@X@tn@]@uƌf@E?0@ 6@@@9Ɍ@Aנ@ }@{@cln@0@O@;\@J4@h.i \@a"D@R@)@6KP@x@H.6?"X @Un8@X@E:@+0@+yjV?=;x@ԩQR?@W@i?@0@їI(@ʱu!`@@@8@@Y蝰@)SL@?@@?@@DY@׺냯u@k@+O@2l@c@3gr? ;@P8@(@ᑩ @Fp@b0@uB @@wH@߀P@@Yɀ@F@@@~@w Z,@(Q@ڣG( Hmisc/inst/tests/areg.s0000644000176200001440000000342213663712457014555 0ustar liggesusers# Tests for parametric version of ace in acepack if(FALSE) { set.seed(1) library(Hmisc) source('~/R/test/parAce.s') ns <- c(30,300,3000,10000) for(n in ns) { y <- sample(1:5,n,TRUE) x <- abs(y-3) + runif(n) par(mfrow=c(4,3)) for(k in c(0,3:5)) { z <- parAce(x,y,xtype='spline',ytype='cat',k=k) plot(x, z$tx) title(paste('R2=',format(z$rsquared))) tapply(z$ty, y, range) a <- tapply(x,y,mean) b <- tapply(z$ty,y,mean) plot(a,b) abline(lsfit(a,b)) # Should get same result to within linear transformation if reverse x and y w <- parAce(y,x,xtype='cat',ytype='spline',k=k) plot(z$ty, w$tx) title(paste('R2=',format(w$rsquared))) abline(lsfit(z$ty, w$tx)) } if(n < max(ns)) {cat('Press enter to continue:');readline()} } # Example where one category in y differs from others but only in variance of x n <- 50 y <- sample(1:5,n,TRUE) x <- rnorm(n) x[y==1] <- rnorm(sum(y==1), 0, 5) z <- parAce(x,y,xtype='lin',ytype='cat') summary(z) plot(z) z <- parAce(x,y,xtype='spline',ytype='cat',k=4) summary(z) plot(z) par(mfrow=c(1,2)) for(n in c(200,2000)) { x <- rnorm(n); y <- rnorm(n) + x z <- parAce(x,y,xtype='spline',ytype='spline',k=5) plot(x, z$x) plot(y, z$y) title(n) readline() } n <- 200 x1 <- rnorm(n); x2 <- rnorm(n); y <- rnorm(n) + x1^2 z <- parAce(cbind(x1,x2),y,xtype=c('spline','lin'),ytype='spline',k=3) par(mfrow=c(2,2)) plot(x1, z$x[,1]) plot(x2, z$x[,2]) plot(y, z$y) n <- 5000 x1 <- rnorm(n); x2 <- rnorm(n); y <- (x1 + rnorm(n))^2 z <- parAce(cbind(x1,x2),y,xtype=c('spline','spline'),ytype='spline',k=5) par(mfrow=c(2,2)) plot(x1, z$x[,1]) plot(x2, z$x[,2]) plot(y, z$y) n <- 10000 x <- matrix(runif(n*20),n,20) y <- rnorm(n) z <- parAce(x,y,xtype=rep('spline',20),ytype='spline',k=5) } Hmisc/inst/CHANGELOG0000644000176200001440000000006512243707760013515 0ustar liggesusersSee https://github.com/harrelfe/Hmisc/commits/master