Hmisc/ 0000755 0001762 0000144 00000000000 14765570322 011330 5 ustar ligges users Hmisc/MD5 0000644 0001762 0000144 00000060705 14765570322 011650 0 ustar ligges users 16f8ea43efba90112351c5ecb33021c4 *COPYING
7bcf33a4a8a4f07e21b93aa03b03fa00 *DESCRIPTION
4c097817eb9afa92bf1ad8a06094c62c *NAMESPACE
5368bf8b93151613bac87e0582066b2e *NEWS
76f90acbe9bf8ad2eaaa6b8813eb7c82 *R/AFirst.lib.s
8fdbfa3b900cbbb42d94898093ec7fa5 *R/Cs.s
b5bfe4ae6950ece451619109f3d46695 *R/Fdebug.r
314b24f935cf8e2ee7a677ed26479b83 *R/GiniMd.s
7d0b0014572f85d46fa6a3417707344c *R/Key.s
10afe48276f47fe7b9cbeba8b151ee42 *R/Merge.r
ab0755523280707d18f98dc2edd8b15f *R/Misc.s
081320dfb53e3362cfb0918952b60cce *R/R2Measures.r
8dad28fed2d0f99cdee16b9c1ac67fbd *R/abd.r
9043da0ab6b8288636bb399cdb0f89bf *R/abs.error.pred.s
03d460384555b2c0accd40339817283f *R/addggLayers.r
03213052dbab068bc45939cc495f8849 *R/areg.s
30a1ac50bbae9027548b09f8a34c9a9a *R/aregImpute.s
bf5dd5216d3d80cc6a01d0e82aff3750 *R/biVar.s
b4b1cd12b90021d097455110506b80ff *R/binconf.s
4fec382a831176fa508723178a709915 *R/bootkm.s
12bf07383dcdc63a08469cf4311e50ff *R/bpower.s
b8c95ecb00c8eee8d7712d77f7707b1d *R/bpplot.s
bb0f5e114bad79ac6c8033ac0c8b74a6 *R/bystats.s
3dff999734ea32daa373b2950cceb72a *R/capitalize.s
b9862f09ba4c92cd873521799fcf9d5f *R/ciapower.s
f77dbf71580e9bf653fadd0c2f491f85 *R/cnvrt.coords.s
06b2b44cb04c1cddbba5c6bf112c8b57 *R/combine.levels.r
8415b600c989358e10497d6cc39146aa *R/combplotp.r
81b857a60ca48c96870419ab771a7387 *R/completer.r
8c002470a851e0d14571cfda085e6c42 *R/confbar.s
6ac4ca97488c406e4f513cec9be4be27 *R/consolidate.s
c04e31869fdc5107eb6af6886eadb566 *R/cpower.s
1d781cfa4d6123c2cb783ca714f3e315 *R/curveRep.s
49e43c5261cdf4e3b9b1c225c331c47c *R/cut2.s
25c501e1db065dacd0b7ebbc9918254e *R/data.frame.labelled.s
97dc5dcc48cb719088b817833067352c *R/dataRep.s
ec8af558be91e1fa3415f1a99dd469b2 *R/dates.s
3f02d2486d14e095a96fe5ee10e483c7 *R/deff.s
0cc7d70f97385be3c9e5942357e4b4e0 *R/describe.s
6cb5b3a77860b605f102bb528e84a071 *R/discrete.s
3c7e7885dccb0e283f96f91fe1b170ba *R/dotchart3.s
3d1c3094a57c0fcf886f87367b539d86 *R/dotchartpl.s
919e94c2c87f720601387c5171b57ffc *R/drawPlot.s
99b87da45bc90c9f56d975cfbe16c0bd *R/ebpcomp.r
728f2d8a00846a73762b486315811c88 *R/ecdf.s
d11651dc4d1c51be124dc230355167bb *R/ecdfSteps.r
c1e489a07ca933fb02e004489fd3ee8e *R/epi.s
9df65cd87eb802c762cddd247dc8a3d2 *R/errbar.s
048a26c156cbf7588ef95b2d52418cc9 *R/event.chart.s
f6f60ab400ea2df181948ccb799eccba *R/event.history.s
909ee02c6c87b1c16420787be731df2b *R/fImport.r
8f6279b9164a57be894cb2f12abb6ac6 *R/find.matches.s
026b53d7afa41be1b9431147a304fab1 *R/fit.mult.impute.s
c238614fb72943362407d74442bf236a *R/format.pval.s
aace929824aad6ebdfba910950b6cc2b *R/ftu.s
473bbef2ffd42d37d059d9a4c66fe8a4 *R/gbayes.s
25b0b3622f0fc66a77fc9cfa6f1cf4e8 *R/gbayesSeqSim.r
2800e27dbcdb3c79e9609483db401cbb *R/geom_stepconfint.r
f76f66eae7faef0e445143d9e367619d *R/gettext.s
769b009b031f6f656a0e9b748057bfae *R/ggMisc.s
1954a3b632a36deaa0fee52b2a1cae0e *R/ggfreqScatter.r
8dc06e7b35605c27a74fb3c3b6ab1681 *R/ggplotlyr.r
b70800bb3730a5b84500d43e97d951f4 *R/groupn.s
7005f9e36c628d4319cab71a85194e7f *R/hashCheck.r
926950a356c2e54ecd8b2317ec08a5d8 *R/hidingTOC.r
a0d1970f4732d8ab3e4c20b3ae34a3c4 *R/hist.data.frame.s
9a3e9e556be124a4b898ff9342e86a40 *R/histSpikeg.s
e79e72a4b25a7e30d32caf6b7a4b55c6 *R/histbackback.s
6d4ca1c9b9edf2476bc947145baa60f5 *R/hlab.r
3ba3c85100e05c090c8caec1d628a05e *R/hoeffd.s
a5bba9522a2668a4ac304f2223ce1520 *R/html.s
e44f34547cba1ef4a434477d140998a1 *R/htmltabv.r
1759e437814916630a47a0128ae1fd83 *R/impute.s
2d992948ec2ad38aa2c778882fd65e72 *R/in.operator.s
e1bbabaa533c22a5378d1a4b645a80d2 *R/is.present.s
8cf9e0a10997c59a5be3f19a39c73684 *R/james.stein.s
b7ae0906d596784bb592aa46d306e438 *R/labcurve.s
daa2d9a32f0b5db51f487dcb96ff58cc *R/label.s
71fab4a69fbd4518403b506f1fcf0cb0 *R/latestFile.r
526e1c9a614e4914908b00f675466117 *R/latex.s
4e31dcfc293ca68d459764b5d32515f3 *R/latexCheckOptions.r
dda324419c115accb95440b58d422d43 *R/latexDotchart.s
b160964c8423acea603c95a04c791084 *R/latexTabular.s
67c8a6450a4ead25d7fe5a7944a40d12 *R/latexTherm.s
f15937a81b1e2a57a9707f5fbdd6c878 *R/list.tree.s
ce608a6c653ec8c6fe828448f9a78eb6 *R/mApply.s
d82af633cd7d55d966e9c3b723187334 *R/mChoice.s
e21c3cbb9b9c17b07223d4f805433395 *R/makeNstr.s
e5f0eef89d954522d56fb5399066a6d3 *R/mask.s
f4cc097babcda3c1edcd60f516713ff2 *R/matxv.s
694c248a54fc62c71172d6f02157b69a *R/mdb.get.s
7f55b1f89d2cac3138bcc3419c2de455 *R/meltData.r
4148365f1c827715d531c4e642846410 *R/minor.tick.s
ce09d5ff43279995bd138ab5dd654bc9 *R/misc.get.s
32a369e847b95005134970ac15b4ba73 *R/model.frame.default.s
a249184df64b5f38d91e174971dc0765 *R/movStats.r
fc71dbaeeb57983dcfa3ac0497ed837b *R/mtitle.s
1b91897d110771682243816be35059e1 *R/multLines.s
97ebbb81148d208a3a8103861390453f *R/nCoincident.r
d5a7dc64dd1472daca476f2fca4440d4 *R/na.delete.s
1f3488410c5f3721d93a23bf22e9a479 *R/na.detail.response.s
dd4806066d614756cd8be2bef6bad3dd *R/na.keep.s
3d905a724130e0e181a9d00ecd45b195 *R/na.pattern.s
abc0f1c38d72a9bf3e59ce9d194c71ee *R/nobsY.s
27019e9e90730ac0b25014b51d68ec66 *R/nstr.s
4b4604d9b62ab103eec9eef18c90e067 *R/num.intercepts.s
a5a26bf4ece26b9b707595a063f1bfd8 *R/ordGroupBoot.r
14dd78910567f53ea09253306e731b06 *R/pMedian.r
a50c8048a7546e224ca402d362c084cd *R/pairUpDiff.r
215f9a06ffc6df02fb67576ad0da83b9 *R/panel.abwplot.s
68d8bbb7b9e6f5fa975b37f2e1245291 *R/panel.bpplot.s
1630c1be85251dac0e8cd0243bedd201 *R/pc1.s
98d59510ae9b6125f404f484281f94bf *R/plot.describe.s
7afa7237d6203acbecd4d1b32f40a8c4 *R/plotCorrM.r
7768f312916e3d893a0b97896dc0d253 *R/plotlyM.r
361419eeca9108cb009e1126283e2f5f *R/plsmo.s
507d5adc6994ca6fffaeee1ab0bbfe2e *R/popower.s
f06f2d0620feb4f293f85652890e07b5 *R/princmp.r
9a1119c7949407c39968645e921a87a6 *R/print.char.list.s
5e6df449d8b6cd6f06693d7298d47150 *R/printL.r
efe9a9a6c90594d1e7425cc46f2fc061 *R/pstamp.s
e2cf58cc149b7d657e44a976f5e8f185 *R/qcrypt.r
9a30b2b86cd970a03325c14d9e5ed37b *R/qrxcenter.r
f3c43a8d39f1458c92a8d2d7df290d9a *R/r2describe.r
2679a3e28310299f523e0ffb440db739 *R/rcorr.cens.s
df3bc2ca9cf1ee3bce159a72b6ead0f0 *R/rcorr.s
4bbed0ec0b6f43b54b8aa32effb023f5 *R/rcorrp.cens.s
62264d939e616b9d1dc7070096b679e5 *R/rcspline.eval.s
55c4cf53a776ed56486cbdec15d64e74 *R/rcspline.plot.s
42daef400a52ac984cb164ed67d35632 *R/rcspline.restate.s
1aa8a7124aa396e467f86a42f6651a53 *R/reShape.s
11e4c341765e538b58c4eb746814f792 *R/redun.s
02946450a1d09319cea2cef1c360336a *R/reformM.r
41b180f15250bedd222f2c334d13eb1f *R/regexpEscape.s
a747dcb6f49ebcd6f1e54180e64deaa9 *R/responseSummary.s
3978f2ee3511549cb906af4618024a53 *R/rm.boot.s
611e706e219da7df2949330788522e88 *R/rmClose.r
223b5153c60f6b5fc5755066399c1561 *R/runParallel.r
a1763e6429f086dd1783694da9b85342 *R/samplesize.bin.s
d587d8ed7aee1aa12994f088da565dd8 *R/sas.get.s
b77bb6020828f4e18cb5a12720911587 *R/scat1d.s
bcd4fb26ff4be0623165a02d1f874733 *R/score.binary.s
00d6b8dec0c5d0055a706e117ec4e373 *R/sedit.s
981211f07f8dd4511d72cab337086357 *R/seqFreq.r
f361dab6e800a20722ecc51bc08e056d *R/show.pch.s
bfd1c58a14b5451fad603d2a20af2d47 *R/showPsfrag.s
349c683e96262eece28ee6110f85b828 *R/simMarkovOrd.r
525a4048d60605a8de57a4db1b684271 *R/solvet.s
e2e1ee6a263cca9a9f1036cee25dfc69 *R/somers2.s
06ee4823f5d18fa93e99635bdf21a349 *R/spearman.test.s
ff69c2235a6faeb4021a3bdb0914bb41 *R/spower.s
a041b2aeb6c6af8ee47770b504538849 *R/src.s
a3ca370ec684fbeeb54d0e7f489a7377 *R/stat-plsmo.r
b47544f6b133f394f2fda329de60f733 *R/strgraphwrap.s
68cec90eb089d7d3334bcd9144965022 *R/string.break.line.s
b42e6d7953adef86625842fa4bdbe291 *R/strwrap.s
055ef617ac4b0362b6466e6436ba69fb *R/subplot.s
14e533b635192fae8b28ca81b9c86f5d *R/substi.s
4915c47bbe69b3a85821829581a95d5b *R/summary.formula.s
5fb3787a912c21acc68f3e444562ac3a *R/summaryM.s
fc25014d239f92e9d37936a723d9858e *R/summaryP.s
364b98873997e962346cecac9573080f *R/summaryRc.s
0dbb2bcf15563930bc6a18f124cb0dc4 *R/summaryS.s
0af65ac91b7c04b13aac7791bdf758af *R/symbol.freq.s
474107a89a3a998eb78bbee1ac2dfdbe *R/sys.s
0917e77a727d026b5b12511b7710b283 *R/t.test.cluster.s
eb87b4f621bfa787f568ceca584f9240 *R/tabulr.s
10578b4a45211889232005d4a67d49f2 *R/testCharDateTime.r
c1dba69a612fd6fb121adb946a77596e *R/tex.s
d2cde1711133822c7983dacf7325ccce *R/transace.s
93f895d99d674bc46b750b858c758686 *R/transcan.s
847daf7fa4e67d6641c763a7873df93c *R/translate.s
72baa8606c0d8a92dacdd1a638f70c0c *R/units.s
77c1963cc01ee8e706982a042b965522 *R/upData.s
ec3a0500a5d1afab5b53b052532dcc40 *R/upFirst.s
25b2e4d51ba3475a3e2bd22da07305ad *R/valueTags.s
c6b53e54357bba5bfa5437cc66eb9645 *R/varclus.s
f944b2ef6a72dde4e6441b75a51f4405 *R/wtd.stats.s
bd3a508012d473f9e0de02aa8519ebc2 *R/xYplot.s
80493b82ba05f6c43b10f09df59d83c7 *R/xtfrm.labelled.s
0bdfa14386b3d2bf55d4e37607d37a9e *R/xy.group.s
1a0242a28b648a6eb2414ba6c86fa8d6 *R/ynbind.s
4a9bc5cb2318131f4f6a177965555ef1 *R/zoom.s
97a468f9be30b70f02ac809ce2fe8e17 *README.md
357fee92fdddd451d253d7a50836b510 *inst/CHANGELOG
ba03163ea3c1f22c4a26a91477140822 *inst/THANKS
d41d8cd98f00b204e9800998ecf8427e *inst/WISHLIST
68166d17f3525cc6d5ea99fc6a8ba83c *inst/tests/Ecdf.r
0b3a69351a7c10b16eca69a9a713fa9e *inst/tests/ace.s
bf11d0979d077c25d7510b289aa20a37 *inst/tests/american-medical-association.csl
99a90bcb308d082f2b615822d477a47f *inst/tests/areg.s
b4ca43805f7aa45aae3d146ef8abf22e *inst/tests/aregImpute.r
d8de6324e50018859c5fa8ff55be5450 *inst/tests/aregImpute2.r
0a87728fe0b671998cd1a655d421ce96 *inst/tests/aregImpute3.r
2c2ca2a3c60ff347a101d41002c775e4 *inst/tests/aregImpute4.r
06304d5b59a91d1baad738f399f11b1d *inst/tests/aregImpute5.r
d480d9f9616fd71971912aa960edb075 *inst/tests/aregImpute6.r
3fb7b95cce65c35e087cac94acda6ace *inst/tests/bootkm.r
8c71471c170cd4b7f0bee73fca3055fe *inst/tests/consolidate.R
40313999a2d82fb8ffac5f8c2306ceaa *inst/tests/csv/FORMAT.csv
5fee907037836b5e42e0f163e608325f *inst/tests/csv/TEST.csv
0f4e7bbe74214f3fb0c6faca2ca4d423 *inst/tests/csv/_contents_.csv
aa63c7d2c13113604223850ab3fe03e3 *inst/tests/cut2.r
83ef2f75fdba346104cf289db2f49c88 *inst/tests/dataframeReduce.r
24eb3c278840a4685edec5b605303db5 *inst/tests/describe.Rmd
8b12f0007761ac5ddd36ae01a1d55516 *inst/tests/describe2.r
11a8e5cd6eab21d1d166eadce4e13d53 *inst/tests/dotchartpl.r
85183f4a6c723e165b9b5ebcb0bbf6ff *inst/tests/examples.Rmd
cfd5ed703e86daf89c13c7c144666c2f *inst/tests/fit.mult.impute.bootstrap.r
44047c6260fc023709d57056d73c99c6 *inst/tests/fit.mult.impute.r
a33d53cc60ed7ba477c3c42456e2efd1 *inst/tests/fitmult-lrt.r
dbda28f189aafc3af047a447ad931f86 *inst/tests/gbayes.r
15175f29e455ff23d155ef75a6a69696 *inst/tests/histSpike.r
e1cb5b0004896e62c59f030749502d93 *inst/tests/histSpikeg.r
e941595ceda17c900b77203c90797392 *inst/tests/hlab.r
4561d42b56f8d606f94c7a85083aefd9 *inst/tests/hoeff.r
fb5a32fba03003505f33815e5152dce6 *inst/tests/howto.html
ab7e1b44e7a9d4746fea7465df1316fb *inst/tests/html-summaryM.r
167427cf5e418668ba98a7f8e2273837 *inst/tests/html.data.frame.r
5f2b069b6eaa4976089078925e3f041e *inst/tests/html.summaryM.Rmd
ad6976712202d234e0234878be75d637 *inst/tests/inverseFunction.r
4dd4b3b76ef12c7e6dea7dc633f35029 *inst/tests/label.r
573465e16901912adb87c138b62056c6 *inst/tests/largest.empty.r
121746a15a8dbc8ad174c7f082990131 *inst/tests/latex-color.r
30c299061f543ca648113ab0269dbc74 *inst/tests/latex-html.Rmd
5aea9e63b75e5610cb0706d6485ba558 *inst/tests/latex.s
8f030d4ee7bc5ac946ca210bcfc53c7b *inst/tests/latex.summaryM.Rnw
a40f892874e68edf0c621971b200260d *inst/tests/latexTabular.r
86bb29e3df2c4cc1dab74549971ca150 *inst/tests/latexTherm.Rnw
7bd0cdcdfc94c92447484dd8d12166c9 *inst/tests/latexTherm.r
b0fc6d4900270c13a20eafd31293bc9c *inst/tests/latexpng.r
0c6b7314112171894df7511e12398f05 *inst/tests/mChoice.r
c0668a4323dcc6d83736668c00456d23 *inst/tests/minor.tick.r
54aed26f652e798340d9c1bf129d9d94 *inst/tests/ordGroupBoot.r
de29fe314e56e44256137d61652066e4 *inst/tests/pMedian.r
ca3aeb15387cea45fb60b35bdd1b572b *inst/tests/panelbp.r
39662c6cbeab67ca1544cb736ea64f70 *inst/tests/plot.summaryM.plotly.r
badcf6388bfae0071da3c10a2b77bac6 *inst/tests/procmeans.txt
65981ed4156c4a00de4287ec05b5d88e *inst/tests/rcspline.plot.r
4cec1c4f2df5dab4ab8118a0c5211e14 *inst/tests/redun.r
e6d67a2c1445037af2aeb21660c0c035 *inst/tests/simRegOrd.r
920c6f52812ea57eb3c602375e9a91cc *inst/tests/spark.qmd
c17621d7759320c355a948272a8f587d *inst/tests/summary.formula.r
93d6cc6040ff9451c6559b41b539a76e *inst/tests/summary.formula.response.stratify.r
c87bad32acd03337701d50ae7ec1bb13 *inst/tests/summaryD.r
b10dd45fb4fb16a077ef138bf0aef253 *inst/tests/summaryM-customtest.r
6c57e8c6b04912ba361b4ce9f8f067d4 *inst/tests/summaryP.r
3a8fd7a3a1d77d8e6741dff90df6ca1e *inst/tests/summaryP2.r
2439671aac7d86d52bfa22db2364b72a *inst/tests/summaryRc.r
b15ca6f668e69c55d1247eb8a3b05328 *inst/tests/summaryS.r
036347670fd51e7fbe173624410af2eb *inst/tests/summarySp.r
b36692d5996f652cf69c36533bda7848 *inst/tests/test.rda
16fb6f4d0b48d6ef354c2f493170239d *inst/tests/test.sas
be545aff97891d7a2d7a89060601d639 *inst/tests/test.xml
907f2fb5b395929aa67db43f7d3488de *inst/tests/test.xpt
f7ecdf35cbda3600541a5a24b2fef03c *inst/tests/test2.xpt
7d8799b50e0922e62f230d1d9d6f619e *inst/tests/testexportlib.r
52c79beb3d98d36025262007f39d5884 *inst/tests/testexportlib.sas
31d1d268acf0f7e75a700c660f4ac731 *inst/tests/upData.r
e1646179a6487ec47f043ba1125a5438 *inst/tests/wtd.r
93023f35f678f7674a067e4ac72c4cb1 *inst/tests/xYplotFilledBands.r
472d787fa8a7f0b5d1ac8dee9d866d22 *inst/todo
7e9e0d3131e1e1fc0e423359e1c829f6 *man/Cs.Rd
cec2ab865d8b920cb20e4157dc2a3faf *man/Ecdf.Rd
ce7c12a81542bed1269e3df56e654e1f *man/Fdebug.Rd
ac4fff37fde2e6d0feae41ceda669399 *man/GiniMd.Rd
a40b9629c493e0d79c8d615968b05b45 *man/Hmisc-internal.Rd
a8d3213ffdeb74c3f50bb2d54a8b9ab5 *man/Lag.Rd
352fb4122854747013fc8d3c96ddb360 *man/Merge.Rd
627dfba9db83428bfec0d8a6a81e0a1c *man/Misc.Rd
c51df7edc25fd92fdd603738b777b384 *man/Overview.Rd
e3273cfa3aa9e69c415b4ebcb10ada71 *man/R2Measures.Rd
71a62f91bcd26f7cbfa9a2755bed0830 *man/Save.Rd
79ed6280942247ec7b4580638c1cc885 *man/abs.error.pred.Rd
6ed0de6a1ac299e867fea5c42fd49024 *man/addMarginal.Rd
0307026cd58ecc62b3bae03fad3444a7 *man/addggLayers.Rd
77a208cf321e5241f5e11e4dbb6d101e *man/all.is.numeric.Rd
89bde30f0578686f86512ffe08e7e4be *man/approxExtrap.Rd
03eed851f558773c200c8e2e09c9fff3 *man/areg.Rd
dcc8a93cf111668be00d78e22f44f607 *man/aregImpute.Rd
b6f41ce5607b746fe5cf9f80601c363e *man/biVar.Rd
c6b61f5f8a001f8748635bde7ff92fb8 *man/binconf.Rd
0c75b817988c321cb06fefeb96fd4096 *man/bootkm.Rd
5b1778fdb848406994977afce5d5926c *man/bpower.Rd
973831b376ae049e13f0bff466ed37c1 *man/bpplot.Rd
629c6b8dd873c22e8608185abd67046f *man/bystats.Rd
5cba3f43942a908ce5fef1004cf2f857 *man/capitalize.Rd
8130fe8c287419fe064d47c079250aaf *man/ciapower.Rd
4202bd798ad52004eec93d91b0a8d945 *man/cnvrt.coords.Rd
4a9199d5a875797123e368ddeb4e7049 *man/combine.levels.Rd
59655083adf88a21c7eab791e972fd0b *man/combplotp.Rd
9b42140e35d81f61e7bd69b847da5d00 *man/completer.Rd
00db76a86189178fe8385d5aaeda346b *man/consolidate.Rd
60014abc0416dfe6cefa079829e55511 *man/contents.Rd
fb3224eb96702359c56fa180ab230562 *man/cpower.Rd
94dbc0d7bcea5846705ee54bedc888db *man/csv.get.Rd
68b879767da3e838f48b4dac36576fb7 *man/curveRep.Rd
6d51b6467a0fe925a33191219b1fbed1 *man/cut2.Rd
3eb4090f0c25a33dcf3452294ca80ec1 *man/data.frame.create.modify.check.Rd
30aa19bbc67a730bfe65d71ecd473bb4 *man/dataRep.Rd
1df7d66a5fcdc97254d4e70a8d487af4 *man/deff.Rd
6ec474869dfc2350b2d6b178751f8afc *man/describe.Rd
4c2942aab548faa49c5a389117b48b70 *man/discrete.Rd
94748a0c9bb7d0290a8b2dc7f4a0bb7d *man/dotchart2.Rd
67f28b3925d62e99e612e36532d3149f *man/dotchart3.Rd
97cf4979cb0a20a9a821fefb978c7b6e *man/dotchartpl.Rd
b6b8b3105a3bea6a000bd44a1290d428 *man/ebpcomp.Rd
0fc46345cb885371abecf611e9527eb8 *man/ecdfSteps.Rd
553f6215baa4cad0ac3da850a3cbc2d2 *man/epi.Rd
36c0c98e4e6fcc78979da741c8dfb386 *man/equalBins.Rd
04771822c60d38658c3720cd91bc2113 *man/errbar.Rd
3bc08c67bb196770cb9ff90bdfa15672 *man/escapeRegex.Rd
50ba2202ecd33f8244ac838560286b93 *man/estSeqMarkovOrd.Rd
a6b8bff2b02dda6ebe943bd8b2044d65 *man/estSeqSim.Rd
b8c947ebb9ea63a9ecf543e9248a512b *man/event.chart.Rd
6f9b356b12e05d93c091e033a7d3f4a8 *man/event.convert.Rd
1beee8ab5e6bde656f0daaf7dfe1cbc5 *man/event.history.Rd
6069148fb3eb0c915315040413058f8d *man/extractlabs.Rd
6ea8ba9c1a5c68fe620b2f626413c598 *man/fImport.Rd
d809990bc7096eeffb6b66912a30bb39 *man/find.matches.Rd
9e22ca4e987f9f93b87b832b7a95d503 *man/first.word.Rd
09eb3bb9c47c3c7862fbc437083aab7d *man/format.df.Rd
041f76e1a74ad1e2361aab071b496dac *man/format.pval.Rd
ebdd21800ffc4beea994eaac7d5b4668 *man/gbayes.Rd
7529bdf0199f61f7159e178a3e9477b4 *man/gbayesSeqSim.Rd
ab3a4686de48e83f00313b53a4242bea *man/geom_stepconfint.Rd
bd9c601a3778a3bc502d85fa39dcb058 *man/getHdata.Rd
2e3625a44c067d7004d304ca7940e9f6 *man/getRs.Rd
c10b701ac0c5fbf3a3a3654378694a1c *man/getZip.Rd
d8e25aba0248ee6ced344b4753915eb8 *man/getabd.Rd
b41ae9f8ef043065d48b384054a11247 *man/ggMisc.Rd
9568f14a303cf9a9fb4034f96149609e *man/ggfreqScatter.Rd
988163ff858c2a7b23ed89d50e056403 *man/ggplotlyr.Rd
244a65b5f7159129541e9a20dde0f4e5 *man/hashCheck.Rd
bd30d6597a6591b2d3b6202931502245 *man/hdquantile.Rd
9bde85cef5a8815e6a4adbc2163aa870 *man/hidingTOC.Rd
8256dd2a02e789ad86ec974fa90420cd *man/hist.data.frame.Rd
901875cc993a500f248fb22af00183d4 *man/histbackback.Rd
450305dcabbc06b9fb94f85d4dd4dbb0 *man/histboxp.Rd
39d49c90f6bc59160ec33f0060a7d7dd *man/hlab.Rd
2d935e2ba637e687ef797c110d925be9 *man/hlabs.Rd
d868cbfad7aa949619a278f5ec1bd96f *man/hoeffd.Rd
7320d2cd7258700d9fe0a0347bd2e055 *man/html.Rd
a674adfbf624682f6dd9ed3c4c49678f *man/htmltabv.Rd
d8856cc1665185e0a13d032e05a7ce67 *man/impute.Rd
b785dbffdb86727e8650cf7e19df0377 *man/intMarkovOrd.Rd
396c791f149027eeafaa9515e93d3d27 *man/knitrSet.Rd
4831b433c139db936cecf4dff76b9a05 *man/labcurve.Rd
fd6547343482e782269052b8ed6cc1c5 *man/label.Rd
d0d1b03b264e6d6456c4e03e5e793c3f *man/latestFile.Rd
99bcdc4206da2fedc3111efc1310bd97 *man/latex.Rd
9c1dfa5242426105b1ca4669b0977997 *man/latexCheckOptions.Rd
c4a248262270a156d9723212daba90ff *man/latexDotchart.Rd
32f49f9726f15c0cae64179b3df74f20 *man/latexTabular.Rd
26968e20d36b253fb7e88f2902f33ec5 *man/latexTherm.Rd
e110de27ad37decbe96987691beb7555 *man/legendfunctions.Rd
3e0817d289b96a7d59847a1144f5fa59 *man/list.tree.Rd
a327d909648492c3e5b524888e27c35a *man/mApply.Rd
316e0a9b1ae8773d36fac47286651eb2 *man/mChoice.Rd
c2c3d7fd7b6118a5480308fcacb57afa *man/makeNstr.Rd
1f4cb5e4cb669550dc4aca2745280286 *man/mdb.get.Rd
5c92f2660d2cd3da53775dddbf20ddf3 *man/meltData.Rd
ad5ca5884567e50edeba874e3f6e0566 *man/mgp.axis.Rd
9d0b72af8d419f9a78263abf388a339f *man/minor.tick.Rd
a647f8b2ac09ca4ddef385a031282e12 *man/movStats.Rd
7e82c634be90a9db0751e0b9ec78778a *man/mtitle.Rd
a6c96e43927dc9a895b515d69b8a4346 *man/multLines.Rd
7d0729fa837191e15480d69e4ed9b2a1 *man/nCoincident.Rd
e63a1b60bde7eda1bef5e0c07d88b9b3 *man/na.delete.Rd
bb9292b59a6e821c1ac815161d2e1196 *man/na.detail.response.Rd
8de167281758448213ff5b4f6052973c *man/na.keep.Rd
f9eb6868ac6e099103d654c1c1084b21 *man/nin.Rd
be66aaec2a64401335915c4671d50918 *man/nobsY.Rd
2f7e82883fd0be5a86a3bec4d4dc250a *man/nstr.Rd
62f5f11538766e90066e6c60293024b1 *man/num.intercepts.Rd
f55f76304f36da0a7d694432dd486981 *man/ordGroupBoot.Rd
ebeb13de209042339fc09649c67baabc *man/pMedian.Rd
76b1975307ab89cc2eb19f12040342ce *man/pairUpDiff.Rd
ab8edbb03663977e40ac2ffa3025d39f *man/panel.bpplot.Rd
b8247448b0c2c6d876ce693f23c9fe85 *man/partition.Rd
b34470017ff70767108ad8ee9e169f02 *man/pc1.Rd
a2d7a25650d5cac6eb0559ebb958f5bf *man/plot.princmp.Rd
fd3bfeefd15a79c301f02a7c4631f8d1 *man/plotCorrM.Rd
a3aed45d350c39121d61700f94e859cb *man/plotCorrPrecision.Rd
88d93c5117d432cbe4ba996c5c74d0d8 *man/plotlyM.Rd
3efc920d21cf40ee48ba846da51d2da5 *man/plsmo.Rd
6a480cdaeb1dbf34525224011cbe17b9 *man/popower.Rd
ddba6cfbe8781522fad083b7bc2c8b92 *man/princmp.Rd
52be83076b1f0442cb5ab5c15bb16a3f *man/print.char.list.Rd
0a25e1c6f349789159b7239ae0f0f620 *man/print.char.matrix.Rd
1104d753bf0c6c1922c62a5861fcefc0 *man/print.princmp.Rd
7bac2453cdb4b35e558e06e062d3697c *man/printL.Rd
daadd0938153ffa0d8697956b67bc11c *man/prnz.Rd
7cc6decd6cb488bf6ef52ea36a1a349f *man/prselect.Rd
260137fa18118deb3cf8f5e8cd09801f *man/pstamp.Rd
df90d6541f8e0c8d1a5444fe2274ab57 *man/qcrypt.Rd
629712fe13491fd516ebbda2c0b99e7c *man/qrxcenter.Rd
c89285950ff40f6516cabb5521cc41f0 *man/r2describe.Rd
6595b75b4809474bad712063e08bb0ff *man/rMultinom.Rd
fd0b9a7cf19a71fc1d5cbedf491600e8 *man/rcorr.Rd
f459db9aa8d263e3e37230490a0f3791 *man/rcorr.cens.Rd
c91c9b895bf92991301998a7e6f12d5b *man/rcorrp.cens.Rd
0e09013825fbf352d7b6492af66a2485 *man/rcspline.eval.Rd
e11508b0ff32c5fb382f41829d2de213 *man/rcspline.plot.Rd
6857f9694fede7b9cf3986f75e45428e *man/rcspline.restate.Rd
fa12a16c134db85d6d6afd1f95a70cb2 *man/reShape.Rd
d0079ed71417d0f8de2b29fca654e963 *man/redun.Rd
a379ec34cf93cb46195c4900c56b4b78 *man/rlegend.Rd
c123888600428367841f57d2d26e9080 *man/rm.boot.Rd
30fc230091ff18884d7fa64bbca5175d *man/rmClose.Rd
f7e532ece5295a38e93b64603db186c7 *man/runParallel.Rd
fa34dd6513e9995c87890ff22c320f6c *man/runifChanged.Rd
326928a75e512ba40912a48fc355934e *man/samplesize.bin.Rd
89c4eefbaac065e35e5becf8f6e5319d *man/sasxport.get.Rd
00f128485f2bd4c29252f13dc983e864 *man/scat1d.Rd
db05fb7a0b4ab6f2e3296104ab70dca1 *man/score.binary.Rd
08744258b9fd7ca14e1b32ba62ca245f *man/sedit.Rd
99b427e67d354d897fc74ff4b21d6621 *man/seqFreq.Rd
8dbcd26d8e34394832eaa6c2f32931a3 *man/show.pch.Rd
053929c241c0d69a15198bfe23c22ec5 *man/showPsfrag.Rd
89f7962d3a2998760a8ec9a1025a08a8 *man/simMarkovOrd.Rd
9a8ed340a60f8fde016acc6b361a2044 *man/simRegOrd.Rd
c63b29fcce92356968e66f1c66cd9bcd *man/simplifyDims.Rd
baf8dcee242a5c39c4e6cddfc11b71ad *man/smean.sd.Rd
32aeaf16b41f0b5cc1f9552b0e8946ff *man/solvet.Rd
ba32fcedcee35f8db3674fcb7fd5182b *man/somers2.Rd
7a28776c28e986b906cf041f07b4ad74 *man/soprobMarkovOrd.Rd
40c56a903db1c8731d357d519c2a7e6d *man/soprobMarkovOrdm.Rd
584eb5885df8361333458acfba0d8165 *man/spikecomp.Rd
9a8ed935151b63601f404fc7df637ae5 *man/spower.Rd
bcf38fffb93e2739eebe0e21ca00e070 *man/spss.get.Rd
d63d4d4bd78f7c6e545bc226dfd58481 *man/src.Rd
0cb5588f043d047c80ae3b90f7b3e6f9 *man/stat_plsmo.Rd
ac3599e852460b9a3f9c1c656b01c2f8 *man/stata.get.Rd
8c73fc252d781ce3fdaee10eade4126b *man/string.bounding.box.Rd
1bbde6f65443df855afb233c9705f423 *man/string.break.line.Rd
9a4cac30cfc22acf2afa062a30b62d9c *man/stringDims.Rd
bbd0b4524bd9a4e350d6a7ad4aca3a0a *man/subplot.Rd
6181f806d9bb937ccc0894d4e079378c *man/summarize.Rd
775133817c1e694038ac36713d1adf30 *man/summary.formula.Rd
f545fe56c301c81a23e81cc2034a4561 *man/summaryM.Rd
e1728a29ba3b7a4b55c861d87f814e01 *man/summaryP.Rd
ad31c9e52f4fc0991357792fb6395c60 *man/summaryRc.Rd
36c4f09fef215ced18fea72c6f240ee8 *man/summaryS.Rd
d9e66a07144bec4e7365da937119b5ea *man/symbol.freq.Rd
9f6e09f2d3a2ef434384999e071f4755 *man/sys.Rd
60faf1203ecb620797e8ce494ed1c0a6 *man/t.test.cluster.Rd
8fa160cbee216dbf5cb95959f4fe485e *man/tabulr.Rd
996241a9c13b901a3d5133de1624b9ed *man/testCharDateTime.Rd
ec1a45cd81fe0bb55b45eb5b0ead1e73 *man/tex.Rd
6a55f6a9b70721cec9c3b8830bceaa0e *man/transace.Rd
befd3ceb2cd8cec372ab9c079cbbac2a *man/transcan.Rd
7e9eae90e12d52f637f46ce78bfc0c53 *man/translate.Rd
76093ee37d83f9e13c9be04fbaaca9bd *man/trunc.POSIXt.Rd
f96c75ccd181f46905cf268ed970950e *man/units.Rd
4a2c2dedc27b7f7b12110666e8f1108f *man/unix/sas.get.Rd
520aafa2af57171547f9d174e976d903 *man/upData.Rd
ff471a5b0ea40672a9f38901fcc4129d *man/upFirst.Rd
26c8160a0663c12504eb2e33b6118c45 *man/valueTags.Rd
d37883cd97dfed83cf84e92aa94705bf *man/varclus.Rd
82dac06e1320048cccb93b0e5580ba3a *man/vlab.Rd
a7ee5852bd965cedb06b50e051df63b1 *man/windows/sas.get.Rd
583a620471e3172d5d4cc3838a65b957 *man/wtd.stats.Rd
5b1ee124701694b2e4e3df44103890fa *man/xYplot.Rd
13c3f8a542f16076e346f9d6cc7904c3 *man/xtfrm.labelled.Rd
f4201e2abbe8b745abb7fcc98668332f *man/xy.group.Rd
c63b3fa2193829f494e39d8cd557cd83 *man/yearDays.Rd
c87ff598535f37eaf4c78f044e722b74 *man/ynbind.Rd
8360e406703130bb09302a64a9638860 *src/Hmisc.c
feacb897c62776acb7e8ebffa7446262 *src/Hmisc.h
72742560872c7a863b1aa9ce21160d4a *src/cidxcn.f90
a519b7e05097490440cb744c57392de4 *src/cidxcp.f90
7e72cc8bd747b4f470f778f252818cc3 *src/cutgn.f90
1d61e62a27b2830ad58ac6645b11c5df *src/hlqest.f90
3e94078aa23d484e0bd94375b813855e *src/hoeffd.f90
ae1123fc79ca93bcaf9d83fcaccff889 *src/init.c
1f84fad7c8d73e5839030ce4d2cf1f85 *src/jacklins.f90
10538b0cf98c499976a5bb6d74a5195e *src/largrec.f
2d0aa4a90313d361f628383dd5fcf7fb *src/mChoice.c
9e3c72bd4e48c78170545e5e0c50d7be *src/maxempr.f90
69d3a08b2cd911161ba22af68f97f611 *src/nstr.c
5fd3cecbf580b3cb32f365f1f9036794 *src/ranksort.c
df53032c14b9c8a41bfe9529411c7729 *src/ratfor/cidxcn.r
ebb3e56a96c8678a2e3184d176af9926 *src/ratfor/cidxcp.r
4549893e527bae0e3140b29579c74720 *src/ratfor/hoeffd.r
eec5235344d56eaaaff52805e71fdf30 *src/ratfor/jacklins.r
daae316a69a5b013bdf334c4302725cd *src/ratfor/maxempr.r
29402d28cea92baf7631bc7ba15be2d2 *src/ratfor/rcorr.r
1e330cf030042a8aef488a254a6b1074 *src/ratfor/wclosest.r
876ea1d7925ee7c194d363a7214504f3 *src/rcorr.f90
32839fee8ce62db43ef9b527256a8737 *src/sas/exportlib.sas
6391fc48464e9b488b3626eaacbe967d *src/string_box.c
c6c9ebc7448e3c3fafdbac4c610344d4 *src/wclosest.f90
Hmisc/R/ 0000755 0001762 0000144 00000000000 14765552317 011535 5 ustar ligges users Hmisc/R/pc1.s 0000644 0001762 0000144 00000001110 12243661443 012363 0 ustar ligges users pc1 <- function(x, hi)
{
p <- ncol(x)
x <- x[!is.na(x %*% rep(1,p)),]
xo <- x
for(i in 1:p) {
y <- x[,i]
x[,i] <- (y-mean(y))/sqrt(var(y))
}
g <- prcomp(x)
cat("Fraction variance explained by PC1:",format(g$sdev[1]^2/sum(g$sdev^2)),
"\n\n")
pc1 <- g$x[,1]
f <- lsfit(xo, pc1)
if(!missing(hi)) {
if(sum(f$coef[-1]<0) >= p/2)
pc1 <- -pc1
r <- range(pc1)
pc1 <- hi*(pc1-r[1])/diff(r)
f <- lsfit(xo, pc1)
}
cat("Coefficients to obtain PC1:\n\n")
print(f$coef)
attr(pc1,"coef") <- f$coef
invisible(pc1)
}
Hmisc/R/find.matches.s 0000644 0001762 0000144 00000011651 12250355511 014250 0 ustar ligges users find.matches <- function(x, y, tol=rep(0,ncol(y)), scale=tol,
maxmatch=10)
{
##if(length(dim(x))==0) x <- matrix(x, nrow=1) 10may02
if(!is.matrix(x))
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
if(!is.matrix(y))
y <- as.matrix(y) ## 10may02
if(p != ncol(y))
stop("number of columns of x and y must match")
ny <- nrow(y)
rown <- dimnames(x)[[1]]
ry <- dimnames(y)[[1]]
matches <- matrix(if(length(ry))
""
else
0,
n, maxmatch,
dimnames=list(rown,
paste("Match #",1:maxmatch,sep="")))
distance <- matrix(NA, n, maxmatch,
dimnames=list(rown,
paste("Distance #",1:maxmatch,sep="")))
if(length(ry)==0)
ry <- 1:ny
scale <- ifelse(scale==0,1,tol)
ones <- rep(1,p)
mx <- 0
for(i in 1:n) {
dif <- abs(y - rep(x[i,], rep.int(ny,p)))
toll <- rep(tol, rep.int(nrow(dif),p))
which <- (1:ny)[((dif > toll) %*% ones)==0]
lw <- length(which)
if(lw) {
scaled <- dif[which,,drop=FALSE]/rep(scale, rep.int(lw,p))
dist <- (scaled^2) %*% ones
lw <- min(lw,maxmatch)
mx <- max(mx,lw)
d <- order(dist)[1:lw]
matches[i,1:lw] <- ry[which[d]]
distance[i,1:lw] <- dist[d]
}
}
structure(list(matches=matches[,1:mx], distance=distance[,1:mx]),
class="find.matches")
}
print.find.matches <- function(x, digits=.Options$digits, ...)
{
cat("\nMatches:\n\n")
print(x$matches, quote=FALSE)
cat("\nDistances:\n\n")
print(x$distance, digits=digits)
invisible()
}
summary.find.matches <- function(object, ...)
{
mat <- object$matches
dist <- object$distance
cat("Frequency table of number of matches found per observation\n\n")
m <- (!is.na(dist)) %*% rep(1,ncol(mat))
print(table(m))
cat("\nMedian minimum distance by number of matches\n\n")
print(tapply(dist[m>0,1], m[m>0], median))
ta <- table(mat[m>0,1])
ta <- ta[ta>1]
if(length(ta)) {
cat("\nObservations selected first more than once (with frequencies)\n\n")
print(ta)
} else cat("\nNo observations selected first more than once\n\n")
invisible()
}
matchCases <- function(xcase, ycase, idcase=names(ycase),
xcontrol, ycontrol, idcontrol=names(ycontrol),
tol=NULL,
maxobs=max(length(ycase),length(ycontrol))*10,
maxmatch=20, which=c('closest','random'))
{
if(!length(tol))
stop('must specify tol')
if((length(xcase)!=length(ycase)) || (length(xcontrol)!=length(ycontrol)))
stop('lengths of xcase, ycase and of xcontrol, ycontrol must be same')
which <- match.arg(which)
ycase <- as.matrix(ycase)
ycontrol <- as.matrix(ycontrol)
if(!length(idcase))
idcase <- 1:length(ycase)
if(!length(idcontrol))
idcontrol <- 1:length(ycontrol)
idcase <- as.character(idcase)
idcontrol <- as.character(idcontrol)
j <- is.na(ycase %*% rep(1,ncol(ycase))) | is.na(xcase)
if(any(j)) {
warning(paste(sum(j),'cases removed due to NAs'))
ycase <- ycase[!j,,drop=FALSE]
xcase <- xcase[!j]
idcase <- idcase[!j]
}
j <- is.na(ycontrol %*% rep(1,ncol(ycontrol))) | is.na(xcontrol)
if(any(j)) {
warning(paste(sum(j),'controls removed due to NAs'))
ycontrol <- ycontrol[!j,,drop=FALSE]
xcontrol <- xcontrol[!j]
idcontrol <- idcontrol[!j]
}
idCase <- id <- character(maxobs)
type <- factor(rep(NA,maxobs), c('case','control'))
x <- numeric(maxobs)
y <- matrix(NA, ncol=ncol(ycase), nrow=maxobs)
last <- 0
ncase <- length(ycase)
ncontrol <- length(ycontrol)
matches <- integer(ncase)
for(i in 1:ncase) {
s <- abs(xcontrol-xcase[i]) <= tol
nmatch <- sum(s)
if(nmatch > maxmatch) {
s <- (1:ncontrol)[s] ## next line was sample(j,...) 4jun02
if(which=="random")
s <- sample(s, maxmatch, replace=FALSE)
else {
errors <- abs(xcontrol[s]-xcase[i])
serrors <- order(errors)
s <- (s[serrors])[1:maxmatch]
}
nmatch <- maxmatch
}
matches[i] <- nmatch
if(!nmatch)
next
end <- last + nmatch + 1
if(end > maxobs)
stop(paste('needed maxobs >',maxobs))
start <- last+1
last <- end
idCase[start:end] <- rep(idcase[i], nmatch+1)
type[start:end] <- c('case',rep('control',nmatch))
id[start:end] <- c(idcase[i], idcontrol[s])
x[start:end] <- c(xcase[i], xcontrol[s])
y[start:end,] <- rbind(ycase[i,,drop=FALSE], ycontrol[s,,drop=FALSE])
}
cat('\nFrequencies of Number of Matched Controls per Case:\n\n')
print(table(matches))
cat('\n')
structure(list(idcase=idCase[1:end], type=type[1:end],
id=id[1:end], x=x[1:end], y=drop(y[1:end,])),
row.names=as.character(1:end),
class='data.frame')
}
Hmisc/R/ciapower.s 0000644 0001762 0000144 00000005535 12243661443 013530 0 ustar ligges users ## tref time at which mortalities estimated
## n1 total sample size, stratum 1
## n2 total sample size, stratum 2
## m1c tref-year mortality, stratum 1 control
## m2c " " 2 "
## r1 % reduction in m1c by intervention, stratum 1
## r2 % reduction in m2c by intervention, stratum 2
## accrual duration of accrual period
## tmin minimum follow-up time
## alpha type I error
## pr set to T to print intermediate results
ciapower <- function(tref,
n1,
n2,
m1c,
m2c,
r1,
r2,
accrual,
tmin,
alpha=.05,
pr=TRUE)
{
## Find mortality in intervention groups
if(m1c>1 | m2c>1)
stop("m1c and m2c must be fractions")
m1i <- (1-r1/100)*m1c
m2i <- (1-r2/100)*m2c
if(pr) {
cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n")
cat("\nSample size Stratum 1:",n1," Stratum 2:",n2,"\n")
cat("\nAlpha=",alpha,"\n")
d <- list(c("Stratum 1","Stratum 2"), c("Control","Intervention"))
m <- cbind(c(m1c,m2c),c(m1i,m2i))
dimnames(m) <- d
cat("\n",tref,"-year Mortalities\n",sep=""); print(m)
}
## Find exponential hazards for all groups
lam1c <- -logb(1-m1c)/tref
lam2c <- -logb(1-m2c)/tref
lam1i <- -logb(1-m1i)/tref
lam2i <- -logb(1-m2i)/tref
if(pr) {
lam <- cbind(c(lam1c,lam2c),c(lam1i,lam2i))
dimnames(lam) <- d
cat("\nHazard Rates\n"); print(lam)
}
## Find probability that a subject will have her event observed during
## the study, for all groups
tmax <- tmin+accrual
p1c <- 1-1/accrual/lam1c*(exp(-tmin*lam1c)-exp(-tmax*lam1c))
p2c <- 1-1/accrual/lam2c*(exp(-tmin*lam2c)-exp(-tmax*lam2c))
p1i <- 1-1/accrual/lam1i*(exp(-tmin*lam1i)-exp(-tmax*lam1i))
p2i <- 1-1/accrual/lam2i*(exp(-tmin*lam2i)-exp(-tmax*lam2i))
if(pr) {
p <- cbind(c(p1c,p2c), c(p1i,p2i))
dimnames(p) <- d
cat("\nProbabilities of an Event During Study\n")
print(p)
}
##Find expected number of events, all groups
m1c <- p1c*n1/2
m2c <- p2c*n2/2
m1i <- p1i*n1/2
m2i <- p2i*n2/2
if(pr) {
m <- cbind(c(m1c,m2c), c(m1i,m2i))
dimnames(m) <- d
cat("\nExpected Number of Events\n")
print(round(m,1))
}
## Find expected value of observed log hazard ratio
delta <- logb((lam1i/lam1c)/(lam2i/lam2c))
if(pr)
cat("\nRatio of hazard ratios:",format(exp(delta)),"\n")
## Find its variance
v <- 1/m1c + 1/m2c + 1/m1i + 1/m2i
sd <- sqrt(v)
if(pr)
cat("Standard deviation of log ratio of ratios:",format(sd),"\n")
z <- -qnorm(alpha/2)
## if(pr) cat("\nCritical value:",format(z),"\n")
c(Power = 1 - ( pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd) ) )
}
Hmisc/R/mtitle.s 0000644 0001762 0000144 00000001167 12243661443 013212 0 ustar ligges users ## Thanks for Rick Becker for suggestions
mtitle <-
function(main,ll,lc,
lr=format(Sys.time(),'%d%b%y'),
cex.m=1.75, cex.l=.5, ...)
{
out <- any(par()$oma!=0)
g <-
if(out) function(...) mtext(..., outer=TRUE)
else function(z, adj, cex, side, ...)
if(missing(side))
title(z, adj=adj, cex=cex)
else
title(sub=z, adj=adj, cex=cex)
if(!missing(main))
g(main,cex=cex.m,adj=.5)
if(!missing(lc))
g(lc,side=1,adj=.5,cex=cex.l,...)
if(!missing(ll))
g(ll,side=1,adj=0,cex=cex.l,...)
if(lr!="")
g(lr,side=1,adj=1,cex=cex.l,...)
invisible()
}
Hmisc/R/latestFile.r 0000644 0001762 0000144 00000001766 14374202406 014011 0 ustar ligges users ##' Find File With Latest Modification Time
##'
##' Subject to matching on `pattern` finds the last modified file, and if `verbose` is `TRUE` reports on how many total files matched `pattern`.
##' @title latestFile
##' @param pattern a regular expression; see [base::list.files()]
##' @param path full path, defaulting to current working directory
##' @param verbose set to `FALSE` to not report on total number of matching files
##' @return the name of the last modified file
##' @author Frank Harrell
##' @seealso [base::list.files()]
##' @md
latestFile <- function(pattern, path='.', verbose=TRUE) {
f <- list.files(path=path, pattern=pattern)
if(length(f) == 1) return(f)
if(length(f) == 0) {
warning(paste('no files matching', pattern, 'were found'))
return(character(0))
}
i <- file.info(f, extra_cols=FALSE)
mtime <- i$mtime
j <- order(mtime, decreasing=TRUE)[1]
if(verbose) cat('\nLast modified file: ', f[j],
' (of ', length(f), ' files)\n\n', sep='')
f[j]
}
Hmisc/R/movStats.r 0000644 0001762 0000144 00000042360 14576024177 013543 0 ustar ligges users ##' Moving Estimates Using Overlapping Windows
##'
##' Function to compute moving averages and other statistics as a function
##' of a continuous variable, possibly stratified by other variables.
##' Estimates are made by creating overlapping moving windows and
##' computing the statistics defined in the stat function for each window.
##' The default method, `space='n'` creates varying-width intervals each having a sample size of `2*eps +1`, and the smooth estimates are made every `xinc` observations. Outer intervals are not symmetric in sample size (but the mean x in those intervals will reflect that) unless `eps=nignore`, as outer intervals are centered at observations `nignore` and `n - nignore + 1` where the default for `nignore` is 10. The mean x-variable within each windows is taken to represent that window. If `trans` and `itrans` are given, x means are computed on the `trans(x)` scale and then `itrans`'d. For `space='x'`, by default estimates are made on to the `nignore` smallest to the `nignore` largest
##' observed values of the x variable to avoid extrapolation and to
##' help getting the moving statistics off on an adequate start for
##' the left tail. Also by default the moving estimates are smoothed using `supsmu`.
##' When `melt=TRUE` you can feed the result into `ggplot` like this:
##' `ggplot(w, aes(x=age, y=crea, col=Type)) + geom_line() +`
##' `facet_wrap(~ Statistic)`
##'
##' See [here](https://hbiostat.org/rflow/analysis.html#sec-analysis-assoc) for several examples.
##'
##' @title movStats
##' @author Frank Harrell
##' @md
##' @param formula a formula with the analysis variable on the left and the x-variable on the right, following by optional stratification variables
##' @param stat function of one argument that returns a named list of computed values. Defaults to computing mean and quartiles + N except when y is binary in which case it computes moving proportions. If y has two columns the default statistics are Kaplan-Meier estimates of cumulative incidence at a vector of `times`.
##' @param discrete set to `TRUE` if x-axis variable is discrete and no intervals should be created for windows
##' @param space defines whether intervals used fixed width or fixed sample size
##' @param eps tolerance for window (half width of window). For `space='x'` is in data units, otherwise is the sample size for half the window, not counting the middle target point.
##' @param varyeps applies to `space='n'` and causes a smaller `eps` to be used in strata with fewer than `` observations so as to arrive at three x points
##' @param nignore see description, default is to exclude `nignore=10` points on the left and right tails from estimation and plotting
##' @param xinc increment in x to evaluate stats, default is xlim range/100 for `space='x'`. For `space='n'` `xinc` defaults to m observations, where m = max(n/200, 1).
##' @param xlim 2-vector of limits to evaluate if `space='x'` (default is `nignore` smallest to `nignore` largest)
##' @param times vector of times for evaluating one minus Kaplan-Meier estimates
##' @param tunits time units when `times` is given
##' @param msmooth set to `'smoothed'` or `'both'` to compute `lowess`-smooth moving estimates. `msmooth='both'` will display both. `'raw'` will display only the moving statistics. `msmooth='smoothed'` (the default) will display only he smoothed moving estimates.
##' @param tsmooth defaults to the super-smoother `'supsmu'` for after-moving smoothing. Use `tsmooth='lowess'` to instead use `lowess`.
##' @param bass the `supsmu` `bass` parameter used to smooth the moving statistics if `tsmooth='supsmu'`. The default of 8 represents quite heavy smoothing.
##' @param span the `lowess` `span` used to smooth the moving statistics
##' @param maxdim passed to `hare`, default is 6
##' @param penalty passed to `hare`, default is to use BIC. Specify 2 to use AIC.
##' @param trans transformation to apply to x
##' @param itrans inverse transformation
##' @param loess set to TRUE to also compute loess estimates
##' @param ols set to TRUE to include rcspline estimate of mean using ols
##' @param qreg set to TRUE to include quantile regression estimates w rcspline
##' @param lrm set to TRUE to include logistic regression estimates w rcspline
##' @param orm set to TRUE to include ordinal logistic regression estimates w rcspline (mean + quantiles in `tau`)
##' @param hare set to TRUE to include hazard regression estimtes of incidence at `times`, using the `polspline` package
##' @param lrm_args a `list` of optional arguments to pass to `lrm` when `lrm=TRUE`, e.g., `list(maxit=20)`
##' @param family link function for ordinal regression (see `rms::orm`)
##' @param k number of knots to use for ols and/or qreg rcspline
##' @param tau quantile numbers to estimate with quantile regression
##' @param melt set to TRUE to melt data table and derive Type and Statistic
##' @param data
##' @param pr defaults to no printing of window information. Use `pr='plain'` to print in the ordinary way, `pr='kable` to convert the object to `knitr::kable` and print, or `pr='margin'` to convert to `kable` and place in the `Quarto` right margin. For the latter two `results='asis'` must be in the chunk header.
##' @param data data.table or data.frame, default is calling frame
##' @return a data table, with attribute `infon` which is a data frame with rows corresponding to strata and columns `N`, `Wmean`, `Wmin`, `Wmax` if `stat` computed `N`. These summarize the number of observations used in the windows. If `varyeps=TRUE` there is an additional column `eps` with the computed per-stratum `eps`. When `space='n'` and `xinc` is not given, the computed `xinc` also appears as a column. An additional attribute `info` is a `kable` object ready for printing to describe the window characteristics.
##'
movStats <- function(formula, stat=NULL, discrete=FALSE,
space=c('n', 'x'),
eps =if(space=='n') 15, varyeps=FALSE, nignore=10,
xinc=NULL, xlim=NULL,
times=NULL, tunits='year',
msmooth=c('smoothed', 'raw', 'both'),
tsmooth=c('supsmu', 'lowess'),
bass=8, span=1/4, maxdim=6, penalty=NULL,
trans=function(x) x, itrans=function(x) x,
loess=FALSE,
ols=FALSE, qreg=FALSE, lrm=FALSE,
orm=FALSE, hare=FALSE,
lrm_args=NULL, family='logistic',
k=5, tau=(1:3)/4, melt=FALSE,
data=environment(formula),
pr=c('none', 'kable', 'plain', 'margin')) {
space <- match.arg(space)
msmooth <- match.arg(msmooth)
movlab <- 'Moving '
if(discrete) {
msmooth <- 'raw'
movlab <- ''
}
tsmooth <- match.arg(tsmooth)
pr <- match.arg(pr)
if(! requireNamespace('data.table', quietly=TRUE))
stop('data.table package must be installed')
if(ols || qreg || lrm)
if(! requireNamespace('rms', quietly=TRUE))
stop('rms package must be installed if using ols, qreg, or lrm')
if(pr %in% c('kable', 'margin'))
if(! requireNamespace('kableExtra', quietly=TRUE))
stop('must install kableExtra package if pr is kable or margin')
envi <- .GlobalEnv
if(! discrete) assign('.knots.', k, envir=envi)
mf <- model.frame(formula, data=data)
v <- names(mf)
Y <- mf[[1]]
sec <- NCOL(Y) == 2
if(sec && ! length(times))
stop('when dependent variable has two columns you must specify times')
if(sec && (loess || ols || qreg || lrm || orm))
stop('loess, ols, qreg, lrm, orm do not apply when dependent variable has two columns')
if(varyeps && space == 'x')
stop('varyeps applies only to space="n"')
if(sec) {
if(hare) if(! requireNamespace('polspline', quietly=TRUE))
stop('polspline package must be installed if hare=TRUE')
Y2 <- Y[, 2]
Y <- Y[, 1]
} else Y2 <- rep(1, nrow(mf))
X <- trans(mf[[2]])
bythere <- length(v) > 2
By <- if(bythere)
do.call(interaction, c(mf[3:length(mf)], list(sep='::')))
else
rep(1, length(X))
i <- is.na(X) | is.na(Y) | is.na(Y2) | is.na(By)
if(any(i)) {
i <- ! i
X <- X [i]
Y <- Y [i]
Y2 <- Y2[i]
By <- By[i]
}
ybin <- ! sec && all(Y %in% 0:1)
qformat <- function(x)
fcase(x == 0.05, 'P5', x == 0.1, 'P10',
x == 0.25, 'Q1', x == 0.5, 'Median', x == 0.75, 'Q3',
x == 0.9, 'P90', x == 0.95, 'P95',
default=as.character(x))
if(! length(stat))
stat <- if(ybin) function(y)
if(discrete) list(Proportion = mean(y), N= length(y)) else
list('Moving Proportion' = mean(y), N = length(y))
else if(sec)
function(y, y2) {
# km.quick is in Hmisc
z <- c(1. - km.quick(survival::Surv(y, y2), times), length(y))
names(z) <- c(paste0(movlab, times, '-', tunits), 'N')
as.list(z)
}
else
function(y) {
if(! length(y)) return(list(Mean=NA, Median=NA, Q1=NA, Q3=NA))
qu <- quantile(y, (1:3)/4)
z <- list('Mean' = mean(y),
'Median' = qu[2],
'Q1' = qu[1],
'Q3' = qu[3],
N=length(y))
names(z)[1:4] <- paste0(movlab, names(z)[1:4])
z
}
statx <- function(y, y2, x) {
s <- if(sec) stat(y, y2) else stat(y)
if(! discrete && space == 'n') s$.xmean. <- mean(x)
s
}
R <- NULL
Xinc <- xinc
uby <- if(is.factor(By)) levels(By) else sort(unique(By))
needxinc <- space == 'n' && ! length(Xinc)
info <- if(discrete) matrix(NA, nrow=length(uby), ncol=1,
dimnames=list(uby, 'N'))
else
matrix(NA,
nrow=length(uby),
ncol=5 + needxinc,
dimnames=list(uby, c('N', 'Wmean', 'Wmin', 'Wmax',
'eps', if(needxinc) 'xinc')))
if(discrete) xlev <- if(is.factor(X)) levels(X) else sort(unique(X))
for(by in uby) {
j <- By == by
if(sum(j) < nignore) {
warning(paste('Stratum', by, 'has <', nignore, 'observations and is ignored'))
next
}
x <- X [j]
y <- Y [j]
y2 <- Y2[j]
n <- length(x)
if(discrete) {
xv <- x
xseq <- xlev
}
else
switch(space,
x = {
xl <- xlim
if(! length(xl)) {
xs <- sort(x)
xl <- c(xs[nignore], xs[n - nignore + 1])
if(diff(xl) >= 25) xl <- round(xl)
}
xinc <- Xinc
if(! length(xinc)) xinc <- diff(xl) / 100.
xseq <- seq(xl[1], xl[2], by=xinc)
xv <- x
},
n = {
i <- order(x)
x <- x [i]
y <- y [i]
y2 <- y2[i]
xinc <- Xinc
if(! length(xinc)) {
xinc <- max(floor(n / 200.), 1)
info[by, 'xinc'] <- xinc
}
xseq <- seq(nignore, n - nignore + 1, by=xinc)
xv <- 1 : n
} )
ep <- eps
if(space == 'n' && varyeps) {
## Requirement to have >= 3 x-points; new eps=h
## First estimate at observation min(xseq), last at max(xseq)
## First window goes to min(xseq) + h on right
## Last window goes to max(xseq) - h on left
## Need difference in these two to be >= 2*xinc
lowesteps <- floor((max(xseq) - min(xseq) - 2 * xinc) / 2.)
if(lowesteps < eps) ep <- lowesteps
info[by, 'eps'] <- ep
}
if(discrete) m <- data.table::data.table(x, y, y2, tx=xv)
else {
s <- data.table::data.table(x, y, y2, xv)
a <- data.table::data.table(tx=xseq, key='tx') # target xs for estimation
a[, .q(lo, hi) := .(tx - ep, tx + ep)] # define all windows
m <- a[s, on=.(lo <= xv, hi >= xv)] # non-equi join
}
data.table::setkey(m, tx)
## Non-equi join adds observations tx=NA
m <- m[! is.na(tx), ]
w <- m[, statx(y, y2, x), by=tx]
info[by, 'N'] <- n
if('N' %in% names(w)) {
N <- w[, N]
if(! discrete) info[by, 2:4] <- c(round(mean(N), 1), min(N), max(N))
}
if(! discrete && space == 'n') {
w[, tx := .xmean.]
w[, .xmean. := NULL ]
}
if(msmooth != 'raw') {
computed <- setdiff(names(w), c('tx', 'N'))
for(vv in computed) {
smoothed <- switch(tsmooth,
lowess = lowess(w[, tx], w[[vv]], f =span),
supsmu = supsmu(w[, tx], w[[vv]], bass=bass))
if(length(smoothed$x) < 2)
stop(paste0('Only 1 x point for stratum ', by,
' with ', n,
' observations. Consider specifying varyeps=TRUE.'))
smfun <- function(x) approx(smoothed, xout=x)$y
switch(msmooth,
smoothed = {
w[, (vv) := smfun(tx)]
},
both = {
newname <- sub('Moving', 'Moving-smoothed', vv)
w[, (newname) := smfun(tx)]
}
)
}
}
## Also compute loess estimates
dat <- data.frame(x=w[, tx])
if(loess) {
np <- loess(y ~ x, data=s)
pc <- predict(np, dat)
if(ybin)
w[, 'Loess Proportion' := pc]
else
w[, `Loess Mean` := pc]
}
if(ols) {
f <- rms::ols(y ~ rms::rcs(x, .knots.), data=s)
pc <- predict(f, dat)
w[, `OLS Mean` := pc]
}
if(lrm) {
f <- if(length(lrm_args))
do.call(rms::lrm, c(list(y ~ rms::rcs(x, .knots.), data=s), lrm_args)) else
rms::lrm(y ~ rms::rcs(x, .knots.), data=s)
pc <- predict(f, dat, type='fitted')
w[, 'LR Proportion' := pc]
}
if(orm) {
f <- rms::orm(y ~ rms::rcs(x, .knots.), data=s, family=family)
pc <- predict(f, dat, type='mean')
w[, 'ORM Mean' := pc]
if(length(tau)) {
pc <- predict(f, dat)
qu <- Quantile(f)
for(ta in tau) {
w[, ormqest := qu(ta, pc)]
cta <- qformat(ta)
data.table::setnames(w, 'ormqest', paste('ORM', cta))
}
}
}
if(qreg)
for(ta in tau) {
f <- rms::Rq(y ~ rms::rcs(x, .knots.), tau=ta, data=s)
pc <- predict(f, dat)
w[, qrest := pc]
cta <- qformat(ta)
data.table::setnames(w, 'qrest', paste('QR', cta))
}
if(hare) {
f <- if(length(penalty))
polspline::hare(y, y2, x, maxdim=maxdim, penalty=penalty)
else
polspline::hare(y, y2, x, maxdim=maxdim)
for(ti in times) {
inc <- polspline::phare(ti, dat$x, f)
newname <- paste0('HARE ', ti, '-', tunits)
w[, (newname) := inc]
}
}
w[, by := by]
R <- rbind(R, w)
}
R[, tx := itrans(tx)]
if(bythere) {
if(length(v) == 3) data.table::setnames(R, c('tx', 'by'), v[2:3])
else {
splitby <- strsplit(R[, by], split='::')
data.table::setnames(R, 'tx', v[2])
data.table::set(R, j='by', value=NULL) # remove 'by'
for(iby in 3 : length(v))
data.table::set(R, j=v[iby],
value=sapply(splitby, function(u) u[[iby - 2]]))
}
}
else {
R[, by := NULL]
data.table::setnames(R, 'tx', v[2])
}
if(melt) {
if(sec) v[1] <- 'incidence'
# Exclude N if present or would mess up melt
if('N' %in% names(R)) R[, N := NULL]
R <- data.table::melt(R, id.vars=v[-1], variable.name='Statistic',
value.name=v[1])
if(sec) {
addlab <- function(x) {
label(x) <- 'Cumulative Incidence'
x
}
R[, incidence := addlab(incidence)]
}
R[, Type := sub (' .*', '', Statistic)]
R[, Statistic := sub ('.* ', '', Statistic)]
R[, Type := gsub('~', ' ', Type)]
R[, Statistic := gsub('~', ' ', Statistic)]
}
if('eps' %in% colnames(info) && all(is.na(info[, 'eps'])))
info <- info[, setdiff(colnames(info), 'eps'), drop=FALSE]
if(! bythere) row.names(info) <- NULL
infon <- info
if(pr %in% c('kable', 'margin')) {
n <- colnames(info)
nam <- .q(N=N, Wmean=Mean, Wmin=Min, Wmax=Max, eps=eps, xinc=xinc)
colnames(info) <- nam[n]
## rownames returns "1" "2" "3" ... even if row names not really there
ri <- rownames(info)
rnp <- length(ri) && ! all(ri == as.character(1 : length(ri)))
ghead <- c(if(rnp) c(' ' = 1),
c(' ' = 1, 'Window Sample Sizes' = 3),
if('eps' %in% n) c(' ' = 1),
if('xinc' %in% n) c(' ' = 1))
info <- if(discrete) knitr::kable(info)
else
info <- knitr::kable(info) |>
kableExtra::add_header_above(ghead) |>
kableExtra::kable_styling(font_size=9)
}
if(pr == 'margin' && ! requireNamespace('qreport', quietly=TRUE))
stop('must install qreport package when specifying pr=margin')
switch(pr,
plain = print(info),
kable = cat(info),
margin = qreport::makecolmarg(info) )
attr(R, 'infon') <- infon
attr(R, 'info') <- info
R
}
utils::globalVariables(c('lo', 'hi', 'tx', '.xmean.', 'Loess Mean', 'OLS Mean',
'ormqest', 'qrest', 'incidence', 'Type', 'Statistic', 'Min', 'Max'))
Hmisc/R/format.pval.s 0000644 0001762 0000144 00000002313 12243661443 014137 0 ustar ligges users ## Use R function for S-Plus, just changed to .Options
format.pval <- function (x, pv=x, digits = max(1, .Options$digits - 2),
eps = .Machine$double.eps,
na.form = "NA", ...) {
if ((has.na <- any(ina <- is.na(pv))))
pv <- pv[!ina]
r <- character(length(is0 <- pv < eps))
if (any(!is0)) {
rr <- pv <- pv[!is0]
expo <- floor(log10(ifelse(pv > 0, pv, 1e-50)))
fixp <- expo >= -3 | (expo == -4 & digits > 1)
if (any(fixp))
rr[fixp] <- format(round(pv[fixp], digits = digits),
...)
if (any(!fixp))
rr[!fixp] <- format(round(pv[!fixp], digits = digits),
...)
r[!is0] <- rr
}
if (any(is0)) {
digits <- max(1, digits - 2)
if (any(!is0)) {
nc <- max(nchar(rr))
if (digits > 1 && digits + 6 > nc)
digits <- max(1, nc - 7)
sep <- if (digits == 1 && nc <= 6)
""
else " "
}
else sep <- if(digits == 1)
""
else " "
r[is0] <- paste("<", format(eps, digits = digits, ...), sep = sep)
}
if (has.na) {
rok <- r
r <- character(length(ina))
r[!ina] <- rok
r[ina] <- na.form
}
r
}
Hmisc/R/print.char.list.s 0000644 0001762 0000144 00000041574 12243661443 014744 0 ustar ligges users if(!exists("string.bounding.box")) {
string.bounding.box <- function(string, type=c("chars", "width")) {
thisfun <- function(x, type) {
height <- length(x)
# get rid of ':' on last string
x[height] <- substr(x[height], start=1, stop=nchar(x[height], type='chars') - 1)
c(height = height, width = max(nchar(x, type=type)))
}
mode(string) <- "character"
type <- match.arg(type)
## Add remove '\n' if it is ends the string and add a ':' so that string split
## functions the way I want it to.
string <- paste(string, ':', sep='')
ans <- sapply(strsplit(string, '\n', fixed=TRUE), FUN=thisfun, type=type, USE.NAMES=FALSE)
return(list(columns = ans[2,], rows = ans[1,]))
}
}
equalBins <- function(widths, subwidths) {
## The length of widths and subwidths must be the same
if(length(widths) != length(subwidths)) {
stop("width and subwidth must be of the same length")
}
## adjust width for column spacers
widths <- widths - unlist(lapply(subwidths, length)) + 1
unlist(mapply(function(width, subwidths) {
if(sum(subwidths) < width) {
div <- width %/% length(subwidths)
mod <- width %% length(subwidths)
c(rep.int(div + 1, mod), rep.int(div, length(subwidths) - mod))
} else {
subwidths
}
}, widths, subwidths, SIMPLIFY = FALSE))
}
stringDims <- function(string) {
if(is.null(string)) {
return(height = 0, width = 0)
}
dims <- dim(string)
bbox <- string.bounding.box(string)
height <- bbox$rows
width <- bbox$columns
if(any(dims)) {
dim(height) <- dims
dim(width) <- dims
}
list(height = height, width = width)
}
simplifyDims <- function(x) {
if(any(sapply(x, FUN=is.matrix)))
do.call(rbind, x)
else
do.call(c, x)
}
partition.vector <- function(x, sep, ...) {
if(missing(sep)) {
stop("sep is a required arg")
}
if(sum(sep) != length(x)) {
stop("sep must sum to the number of columns in x")
}
split(x, rep(seq(along.with=sep), times=sep))
}
partition.matrix <- function(x, rowsep, colsep, ...) {
colmissing <- missing(colsep)
rowmissing <- missing(rowsep)
if(rowmissing && colmissing) {
stop("Atleast one of rowsep or colsep args must be specified")
}
## If length of group is equal to length of x assume that this is a
## a vector of group numbers
if(!rowmissing) {
if(sum(rowsep) != NROW(x)) {
stop("rowsep must sum to the number of columns in x")
}
if(!is.numeric(rowsep)) {
stop("the rowsep vector must be numeric")
}
}
if(!colmissing) {
if(sum(colsep) != NCOL(x)) {
stop("colsep must sum to the number of rows in x")
}
if(!is.numeric(colsep)) {
stop("the colsep vector must be numeric")
}
}
## Separate x into row chunks
if(!rowmissing) {
set <- lapply(split(seq(NROW(x)), rep(seq(along.with=rowsep), times=rowsep)), function(index) x[index,,drop=FALSE])
} else {
set <- NULL
}
if(!colmissing) {
FUN <- function(x)
lapply(split(seq(NCOL(x)), rep(seq(along.with=colsep), times=colsep)), function(index) x[,index,drop=FALSE])
if(is.null(set)) {
FUN(x)
} else {
lapply(set, FUN)
}
} else {
set
}
}
print.char.list <- function(x, ..., hsep = c("|"), vsep = c("-"), csep = c("+"),
print.it = TRUE, rowname.halign = c("left", "centre", "right"),
rowname.valign = c("top", "centre", "bottom"),
colname.halign = c("centre", "left", "right"),
colname.valign = c("centre", "top", "bottom"),
text.halign = c("right", "centre", "left"),
text.valign = c("top", "centre", "bottom"), rowname.width,
rowname.height, min.colwidth = .Options$digits, max.rowheight = NULL,
abbreviate.dimnames = TRUE, page.width = .Options$width,
colname.width, colname.height, prefix.width,
superprefix.width = prefix.width) {
vjustText <- function(char.matrix, fieldHeight, vjust = c("top", "center", "bottom")) {
if(!is.matrix(char.matrix))
stop("char.matrix must be of type matrix")
d <- dim(char.matrix)
vjust <- match.arg(vjust)
if(! is.character(char.matrix))
char.matrix <- as.character(char.matrix)
# split the matrix strings up into multi lines.
char.matrix <- ifelse(is.na(char.matrix), NA, string.break.line(char.matrix))
# determine veritcal differentials
vdiff <- fieldHeight - unlist(lapply(char.matrix, length))
ans <- mapply(function(element, vdiff, target) {
if(is.na(element) || vdiff == 0) {
return(element)
}
if(vdiff < 0) {
# Trim trailing extra lines
lines <- rev(seq(along.with=element)[element != ''])
if(lines[1] <= target) {
return(element[1:target])
}
length(element) <- lines[1]
element <- element[element != '']
if(length(element) == target)
return(element)
vdiff <- target - length(element)
}
switch(vjust,
top = c(element, character(vdiff)),
bottom = c(character(vdiff), element),
center = c(character(half <- vdiff%/%2), element, character(vdiff - half)))
}, char.matrix, vdiff, fieldHeight, USE.NAMES=FALSE)
matrix(unlist(ans), ncol=d[[2]])
}
hjustText <- function(char.matrix, fieldWidth, hjust=c("left","right","center")) {
if(!is.matrix(char.matrix))
stop("text must be of type matrix")
d <- dim(char.matrix)
hjust <- match.arg(hjust)
ans <- mapply(function(column, target) {
column <- unlist(column)
column <- ifelse(is.na(column), NA, format(column, justify=hjust, width=target))
column <- ifelse(is.na(column) || target - nchar(column, type="width"), column, strtrim(column, target))
}, split(char.matrix, col(char.matrix)), fieldWidth, USE.NAMES=FALSE)
# dim(ans) <- d
ans
}
justText <- function(char.matrix, fieldWidth, fieldHeight, hjust=c("left","right","centre"),
vjust = c("top", "centre", "bottom"), trim=FALSE) {
if(!is.matrix(char.matrix))
stop("text must be of type matrix")
## Get the original dims of the matrix
d <- dim(char.matrix)
## Determin the wanted justification.
hjust <- match.arg(hjust)
vjust <- match.arg(vjust)
## If this is a charater matrix then break in on the lines
if(is.character(char.matrix)) {
## split the matrix strings up into multi lines.
ans <- ifelse(is.na(char.matrix), NA, string.break.line(char.matrix))
} else {
ans <- char.matrix
}
## format the text horizontaly.
ans <- mapply(function(column, target) {
fun <- function(x) ifelse(is.na(x), NA, format(x, justify=hjust, width=target))
if(is.list(column)) {
lapply(column, fun)
} else {
fun(column)
}
}, split(ans, col(char.matrix)), fieldWidth, USE.NAMES=FALSE)
spacer <- makeNstr(' ', fieldWidth)
## Add extra rows to justify the text vericaly.
ans <- mapply(function(row, target) {
fun <- function(element, spacer) {
vdiff <- target - length(element)
if(is.na(element) || vdiff == 0) {
return(element)
}
if(vdiff < 0) {
## Trim trailing extra lines
lines <- rev(seq(along.with=element)[element != ''])
if(lines[1] <= target) {
return(element[1:target])
}
length(element) <- lines[1]
element <- element[element != '']
if(length(element) == target)
return(element)
vdiff <- target - length(element)
}
switch(vjust,
top = c(element, rep(spacer, vdiff)),
bottom = c(rep(spacer, vdiff), element),
centre = c(rep(spacer, half <- vdiff%/%2), element, rep(spacer, vdiff - half)))
}
mapply(fun, row, spacer, USE.NAMES=FALSE, SIMPLIFY=FALSE)
}, split(ans, row(char.matrix)), fieldHeight, USE.NAMES=FALSE, SIMPLIFY=FALSE)
matrix(unlist(unsplit(ans, row(char.matrix))), ncol=d[[2]])
}
printRow <- function(entries, widths, sep) {
if(length(entries) != length(widths))
stop("arguments must be the same length")
first <- TRUE
last <- TRUE
env <- environment()
nval <- ' '
sep <- hsep
out <- mapply(function(entry, width) {
if(is.na(entry)) {
if(is.null(last)) {
out <- c(nval, makeNstr(nval, width))
} else {
out <- c(sep, makeNstr(nval, width))
assign("last", NULL, envir=env)
}
}else{
if(is.null(last)) {
assign("last", TRUE, envir=env)
}
out <- c(sep, entry)
}
out
}, entries, widths)
paste(c(out, sep), collapse='')
}
printBars <- function(entries, blank, widths, hsep, csep, vsep) {
bars <- character(length(entries) + 1)
alt <- rep(c(1,2), length.out=length(widths))
# blank <- c(list(rep(TRUE, length(widths))), blank)
for(i in seq(along.with=entries)) {
len <- length(entries[[i]])
comp <- entries[[i]][-len]
comp.last <- entries[[i]][len]
bnk <- blank[[i]]
bnk.last <- bnk[length(bnk)]
dividers <- ifelse(comp & bnk, hsep, ifelse(bnk, ' ', ifelse(comp, csep, vsep)))
dividers <- c(dividers, ifelse(bnk.last, hsep, csep))
betweens <- c(makeNstr(ifelse(bnk, ' ', vsep), widths), '')
bars[i] <- paste(dividers, betweens, sep='', collapse='')
}
dividers <- ifelse(entries[[length(entries)]], csep, vsep)
betweens <- c(makeNstr(vsep, widths), '')
bars[length(bars)] <- paste(dividers, betweens, sep='', collapse='')
bars
}
rjustText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'right', trim)
ljustText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'left', trim)
centerText <- function(text, fieldWidth, trim=FALSE) justText(text, fieldWidth, 'center', trim)
colnames <- NULL
rownames <- NULL
colDims <- NULL
rowDims <- NULL
supercols <- NULL
superrows <- NULL
supercolDims <- NULL
superrowDims <- NULL
colsets <- NULL
rowsets <- NULL
if(is.list(x)) {
rownames <- lapply(x, function(x) {
if(is.null(rownames <- dimnames(x)[[1]])) {
rep(NA, NROW(x))
} else {
rownames
}
})
rowsets <- unlist(lapply(rownames, length), use.names=FALSE)
superrows <- names(rownames)
rownames <- matrix(unlist(rownames, use.names=FALSE), ncol=1)
if(all(is.na(rownames))) {
rownames <- NULL
}
colnames <- lapply(x[[1]][1,], names)
colsets <- unlist(lapply(colnames, length), use.names=FALSE)
supercols <- names(colnames)
colnames <- matrix(unlist(colnames, use.names=FALSE), nrow=1)
if(all(is.na(colnames))) {
colnames <- NULL
}
## Convert to a matrix
matrix <- do.call(rbind, x)
matrix <- do.call(rbind, tapply(matrix, row(matrix), FUN='unlist'))
} else {
rownames <- dimnames(x)[[1]]
colnames <- dimnames(x)[[2]]
matrix <- x
}
## get widths of each column in table.
listDims <- stringDims(matrix(sapply(matrix, format),ncol=ncol(matrix)))
## find the widths and heights of the row names and col names
## if any elements do not have rownames the set them equal to 0
if(length(colnames)) {
colDims <- stringDims(colnames)
} else {
colDims <- list(height = integer(nrow(matrix)), width = integer(nrow(matrix)))
}
if(length(rownames)) {
rowDims <- stringDims(rownames)
} else {
rowDims <- list(height = integer(ncol(matrix)), width = integer(ncol(matrix)))
}
## calculate the superdim info
## do it for the supercols
if(length(supercols)) {
supercolDims <- stringDims(supercols)
} else {
supercolDims <- list(height = 0, width = 0)
}
## do it for the superrows
if(length(superrows)) {
superrowDims <- stringDims(superrows)
} else {
superrowDims <- list(height = 0, width = 0)
}
## Calculate the max column width
if(!missing(colname.width)) {
colwidth <- pmin(colDims$width, colname.width)
supercolwidth <- pmin(supercolDims$width, colname.width*colsets)
} else {
colwidth <- colDims$width
supercolwidth <- supercolDims$width
}
## Calculate the max row hight
if(!missing(rowname.height)) {
rowheight <- pmin(rowDims$height, rowname.height)
superrowheight <- pmin(superrowDims$height, rowname.height*rowsets)
} else {
rowheight <- rowDims$height
superrowheight <- superrowDims$height
}
## Find the overall height of the matrix
height <- pmax(tapply(listDims$height, row(matrix), max), rowheight)
height <- equalBins(superrowheight, partition.vector(height, rowsets))
## Find the overall width of the matrix
width <- pmax(tapply(listDims$width, col(matrix), max), colwidth)
width <- equalBins(supercolwidth, partition.vector(width, colsets))
## Calculate actual supercol widths that is the sum of the subcol or total lenght to supercol
## which ever is greater
supercolwidth <- tapply(width, rep.int(seq(along.with=colsets), times=colsets), sum) + colsets - 1
supercolheight <- max(superrowDims$height)
colheight <- max(colDims$height)
superrowheight <- tapply(height, rep.int(seq(along.with=rowsets), times=rowsets), sum)
if(missing(prefix.width)) {
if(!is.null(rownames)) {
prefix.width <- max(max(na.rm = TRUE, as.integer(median(width)), max(rowDims$width)))
} else {
prefix.width <- 0
}
}
if(missing(superprefix.width)) {
if(!is.null(superrows)) {
superprefix.width <- max(na.rm = TRUE, as.integer(median(width)), max(superrowDims$width))
} else {
superprefix.width <- 0
}
}
header <- NULL
headerwidth <- NULL
rows <- NULL
entries <- list()
blanks <- list()
## Figure out the centering of the cells.
rowNameHalign <- match.arg(rowname.halign)
rowNameValign <- match.arg(rowname.valign)
colNameHalign <- match.arg(colname.halign)
colNameValign <- match.arg(colname.valign)
cellHalign <- match.arg(text.halign)
cellValign <- match.arg(text.valign)
## create the superrowname column
superrow <- if(!is.null(superrows)) {
superrows <- matrix(superrows, ncol=1)
header <- NA
headerwidth <- superprefix.width
## perform verical and horizontal centering.
justText(superrows, superprefix.width, superrowheight,
rowNameHalign, rowNameValign)
}
row <- if(!is.null(rownames)) {
header <- cbind(header, NA)
headerwidth <- c(headerwidth, prefix.width)
justText(rownames, prefix.width, height, rowNameHalign, rowNameValign)
}
body <- cbind(superrow, row,
justText(matrix, width, height, cellHalign, cellValign))
width <- c(headerwidth, width)
body <- split(body, row(body))
## Create the super column name row and the column name row
if(!is.null(supercols)) {
supercols <- matrix(supercols, nrow=1)
supercolwidth <- c(headerwidth, supercolwidth)
entry <- c(header, rep(seq(along.with=colsets), colsets), 0)
entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry))))
blank <- ifelse(is.na(c(header, rep(supercols, colsets))), TRUE, FALSE)
blanks <- c(blanks, list(blank))
rows <- printRow(justText(cbind(header, supercols), supercolwidth, supercolheight,
colNameHalign, colNameValign), width=supercolwidth, sep=hsep)
}
if(!is.null(colnames)) {
entry <- c(header, rep(seq(along.with=colnames), 1), 0)
entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry))))
blank <- ifelse(is.na(c(header, colnames)), TRUE, FALSE)
blanks <- c(blanks, list(blank))
rows <- c(rows,
printRow(justText(cbind(header, colnames), width, colheight,
colNameHalign, colNameValign), width=width, sep=hsep))
}
env <- environment()
rows <- c(rows, unlist(lapply(split(body, rep(seq(along.with=rowsets), rowsets)), function(set) {
index <- seq(along.with=set)
mapply(FUN = function(line, index) {
entry <- c(ifelse(is.na(line), NA, rep(seq(along.with=line), 1)), 0)
entry <- ifelse(is.na(entry), FALSE, !duplicated(entry))
assign('entries', c(entries, list(entry)), env)
blank <- ifelse(is.na(line), FALSE, FALSE)
if(index != 1) {
blank[1] <- TRUE
}
assign('blanks', c(blanks, list(blank)), env)
printRow(line, width=width, sep=hsep)
}, set, index)
}), use.names=FALSE))
blanks[[1]] <- logical(length(width))
entries <- lapply(entries, function(entry) {entry[1] <- TRUE; entry})
bars <- printBars(entries, blanks, width, hsep=hsep, vsep=vsep, csep=csep)
total <- paste(bars, c(rows, ""), sep='\n', collapse='\n')
if(print.it) {
cat(total)
invisible(x)
} else {
total
}
}
Hmisc/R/Misc.s 0000644 0001762 0000144 00000177273 14762626300 012624 0 ustar ligges users if(!exists("NROW", mode='function')) {
NROW <- function(x)
if (is.array(x) || is.data.frame(x)) nrow(x) else length(x)
}
if(!exists("NCOL", mode='function')) {
NCOL <- function(x)
if (is.array(x) && length(dim(x)) > 1 || is.data.frame(x)) ncol(x) else as.integer(1)
}
prn <- function(x, txt, file='', head=deparse(substitute(x), width.cutoff=500)[1])
{
if(file != '') sink(file, append=TRUE)
if(!missing(txt)) {
if(nchar(txt) + nchar(head) +3 > .Options$width)
head <- paste('\n\n ', head, sep='')
else
txt <- paste(txt, ' ', sep='')
cat('\n', txt, head, '\n\n', sep='')
}
else cat('\n', head, '\n\n',sep='')
print(x)
if(file != '') sink()
invisible()
}
formatSep <- function(x, digits, ...)
{
y <- character(length(x))
for(i in 1:length(x))
y[i] <- if(missing(digits)) format(x[i], ...)
else format(x[i],digits=digits, ...)
names(y) <- names(x)
y
}
nomiss <- function(x)
{
if(is.data.frame(x)) na.exclude(x)
else if(is.matrix(x))
x[!is.na(x %*% rep(1,ncol(x))),]
else x[!is.na(x)]
}
fillin <- function(v, p)
{
v.f <- ifelse(is.na(v),p,v)
if(length(p)==1)
label(v.f) <- paste(label(v),"with",sum(is.na(v)),
"NAs replaced with",format(p))
else
label(v.f) <- paste(label(v),"with",sum(is.na(v)),"NAs replaced")
v.f
}
spearman <- function(x, y)
{
x <- as.numeric(x)
y <- as.numeric(y) ## 17Jul97
notna <- !is.na(x+y) ##exclude NAs
if(sum(notna) < 3)
c(rho=NA)
else
c(rho=cor(rank(x[notna]), rank(y[notna])))
}
plotCorrPrecision <- function(rho=c(0,0.5), n=seq(10,400,length.out=100),
conf.int=0.95, offset=.025, ...)
{
## Thanks to Xin Wang for computations
curves <- vector('list', length(rho))
names(curves) <- paste('r',format(rho),sep='=')
zcrit <- qnorm(1-(1-conf.int)/2)
for(i in 1:length(rho)) {
r <- rho[i]
z <- .5*log((1+r)/(1-r))
lo <- z - zcrit/sqrt(n-3)
hi <- z + zcrit/sqrt(n-3)
rlo <- (exp(2*lo)-1)/(exp(2*lo)+1)
rhi <- (exp(2*hi)-1)/(exp(2*hi)+1)
precision <- pmax(rhi-r, r-rlo)
curves[[i]] <- list(N=n, Precision=precision)
}
labcurve(curves, pl=TRUE, xrestrict=quantile(n,c(.25,1)), offset=offset, ...)
invisible()
}
trap.rule <- function(x,y) sum(diff(x)*(y[-1]+y[-length(y)]))/2
uncbind <- function(x, prefix="", suffix="")
{
nn <- dimnames(x)[[2]]
warning("You are using uncbind. That was a really bad idea. If you had any variables in the global environment named ", paste(prefix, nn, suffix, sep="", collapse=", "), " they are now over writen.\n\nYou are now warned.", immediate. = TRUE, )
for(i in 1:ncol(x))
assign(paste(prefix,nn[i],suffix,sep=""), x[,i], pos=parent.env())
invisible()
}
## Function to pick off ordinates of a step-function at user-chosen abscissas
stepfun.eval <- function(x, y, xout, type=c("left","right"))
{
s <- !is.na(x+y)
type <- match.arg(type)
approx(x[s], y[s], xout=xout, method="constant", f=if(type=="left")0 else 1)$y
}
km.quick <- function(S, times, q, type=c('kaplan-meier', 'fleming-harrington', 'fh2'),
interval=c('>', '>='), method=c('constant', 'linear'), fapprox=0, n.risk=FALSE)
{
sRequire('survival')
type <- match.arg(type)
interval <- match.arg(interval)
method <- match.arg(method)
S <- S[! is.na(S), ]
stratvar <- factor(rep(1, nrow(S)))
f <- if(attr(S, 'type') == 'right')
survival::survfitKM(stratvar, S, se.fit=FALSE, conf.type='none', type=type)
else survival::survfit(S ~ stratvar, se.fit=FALSE, conf.type='none')
nr <- if(n.risk) list(time=f$time, n.risk=f$n.risk)
if(missing(times) & missing(q)) {
time <- f$time[f$n.event > 1e-10] # survfit.formula for left censoring
surv <- f$surv[f$n.event > 1e-10] # creates a tiny nonzero value for n.event
if(interval == '>=') surv <- c(1e0, surv[-length(surv)])
res <- list(time=time, surv=surv)
} else {
tt <- c(0, f$time)
ss <- c(1, f$surv)
if(missing(times)) res <- min(tt[ss <= q])
else {
if(interval == '>=') {tt <- f$time; ss <- ss[-length(ss)]}
res <- approxExtrap(tt, ss, xout=times, method=method, f=fapprox)$y
}
}
if(n.risk) attr(res, 'n.risk') <- nr
res
}
oPar <- function()
{
## Saves existing state of par() and makes changes suitable
## for restoring at the end of a high-level graphics functions
oldpar <- par()
oldpar$fin <- NULL
oldpar$new <- FALSE
invisible(oldpar)
}
setParNro <- function(pars)
{
## Sets non-read-only par parameters from the input list
i <- names(pars) %nin%
c('cin','cra','csi','cxy','din','xlog','ylog','gamma','page')
invisible(par(pars[i]))
}
mgp.axis.labels <- function(value, type=c('xy','x','y','x and y'))
{
type <- match.arg(type)
if(missing(value)) {
value <- .Options$mgp.axis.labels
pr <- par(c('mgp', 'las'))
mgp <- pr$mgp
if(! length(value)) value <- c(.7, .7)
return(switch(type,
xy = value,
x = c(mgp[1], value[1], mgp[3]),
y = c(mgp[1], value[2], mgp[3]),
'x and y' = list(x = c(mgp[1], value[1], mgp[3]),
y = c(mgp[1], value[2], mgp[3]))))
}
if(value[1]=='default')
value <- c(.7,.7)
##c(.6, if(par('las')==1) 1.3 else .6)
options(mgp.axis.labels=value, TEMPORARY=FALSE)
invisible()
}
mgp.axis <-
function(side, at=NULL, ...,
mgp=mgp.axis.labels(type=if(side==1 | side==3)'x' else 'y'),
axistitle=NULL, cex.axis=par('cex.axis'), cex.lab=par('cex.lab'))
{
## Version of axis() that uses appropriate mgp from mgp.axis.labels and
## gets around bug in axis(2, ...) that causes it to assume las=1
mfrow <- par('mfrow')
tcl <- max(par('tcl'), -0.4)
nr <- mfrow[1]; nc <- mfrow[2]
w <- list(side=side)
w <- c(w, list(...))
w$cex.axis <- cex.axis
if(length(at)) w$at <- at
if(side == 1 || side == 3) {
w$mgp <- mgp / nr
w$tcl <- tcl / nr
if(side==1 && length(axistitle))
title(xlab=axistitle, mgp = mgp / min(2.25, nr), cex.lab=cex.lab)
} else {
w$mgp <- mgp / nc
w$tcl <- tcl / nc
las <- par('las')
w$srt <- 90 * (las == 0)
w$adj <- if(las == 0) 0.5
else 1
if(side == 2 && length(axistitle))
title(ylab=axistitle, mgp=mgp / min(2.25, nc), cex.lab=cex.lab)
}
do.call('axis', w)
invisible()
}
trellis.strip.blank <- function()
{
s.b <- lattice::trellis.par.get("strip.background")
s.b$col <- 0
lattice::trellis.par.set("strip.background", s.b)
s.s <- lattice::trellis.par.get("strip.shingle")
s.s$col <- 0
lattice::trellis.par.set("strip.shingle", s.s)
invisible()
}
lm.fit.qr.bare <- function(x, y,
tolerance = NULL,
intercept=TRUE, xpxi=FALSE,
singzero=FALSE)
{
if(!length(tolerance)) tolerance <- 1e-7
if(intercept)
x <- cbind(Intercept=1, x)
else x <- as.matrix(x)
z <- lm.fit(x, y, tol=tolerance)
coef <- z$coefficients
if(singzero && any(isna <- is.na(coef))) coef[isna] <- 0.
res <- z$residuals
sse <- sum(res^2)
sst <- sum((y - mean(y))^2)
res <- list(coefficients = coef, residuals = res,
rsquared = 1 - sse / sst,
fitted.values = z$fitted.values)
if(xpxi) {
p <- 1L : z$rank
res$xpxi <- chol2inv(z$qr$qr[p, p, drop=FALSE])
}
res
}
all.is.numeric <- function(x, what=c('test','vector','nonnum'),
extras=c('.','NA'))
{
what <- match.arg(what)
x <- sub('[[:space:]]+$', '', x)
x <- sub('^[[:space:]]+', '', x)
xs <- x[x %nin% c('', extras)]
if(! length(xs) || all(is.na(x)))
return(switch(what, test = FALSE, vector=x, nonnum=x[0]))
isnon <- suppressWarnings(! is.na(xs) & is.na(as.numeric(xs)))
isnum <- ! any(isnon)
# suppressWarnings below handles extras present in x
switch(what,
test = isnum,
vector = if(isnum) suppressWarnings(as.numeric(x)) else x,
nonnum = xs[isnon])
}
Lag <- function(x, shift=1)
{
## Lags vector x shift observations, padding with NAs or blank strings
## preserving attributes of x
xLen <- length(x)
if(shift == 0) return(x)
# Create base vector use character to generate "" for mode "character"
# Coerce base vector to be type of x
ret <- as.vector(character(xLen), mode=storage.mode(x))
# set resp attributes equal to x attributes
attrib <- attributes(x)
if(length(attrib$label))
attrib$label <- paste(attrib$label, 'lagged', shift, 'observations')
if(abs(shift) < xLen)
{
if(shift > 0) ret[-(1:shift)] <- x[1:(xLen - shift)]
else ret[1:(xLen+shift)] <- x[(1-shift):xLen]
}
attributes(ret) <- attrib
return(ret)
}
xySortNoDupNoNA <- function(x, y)
{
if(is.list(x)) {
y <- x[[2]]; x <- x[[1]]
}
s <- !is.na(x + y)
if(any(s)) {
x <- x[s]; y <- y[s]
}
i <- order(x)
x <- x[i]
y <- y[i]
i <- !duplicated(x)
list(x=x[i], y=y[i])
}
outerText <-
function(string, y, cex=par('cex'), ...) {
usr <- par('usr'); plt <- par('plt')
pos <- usr[2] + (usr[2] - usr[1])/(plt[2] - plt[1]) * (1 - plt[2])
axis(2, at=y, labels=string, tick=FALSE, las=1,
pos=pos, cex.axis=cex, xpd=NA)
}
## if(missing(space)) space <- max(nchar(string))*.5
## mtext(string, side=side, las=1, at=y, adj=adj, cex=cex, line=space)
# This method does not survive shrinking the graphics window
# Right justifies (if adj=1) a vector of strings against the right margin
# (side=4) or against the y-axis (side=2)
#outerText <-
# function(string, y, side=4, cex=par('cex'), adj=1, ...) {
# if(side %nin% c(2,4)) stop('only works for side=2 or 4')
# x <- if(side==4) grconvertX(1, from='nfc', to='user') else
# par('usr')[1]
# text(x, y, paste(string,''), cex=cex, adj=adj, xpd=NA)
#}
## Old method [dropped because does not scale upon resizing device]
## Use text() to put test strings in left or right margins
## Temporarily sets par(xpd=NA) if using R
## For adj=1 side=4, setAside is a character string used to determine
## the space to set aside for all strings
## space is the number of extra characters to leave to the left of
## the string(s) (adj=0) or to the right (adj=1)
if(FALSE) outerText <- function(string, y, setAside=string[1], side=4, space=1,
adj=1, cex=par('cex'))
{
usr <- par('usr')
xpd <- par('xpd')
if(!is.na(xpd)) {
on.exit(par(xpd=xpd))
par(xpd=NA)
}
ie <- is.expression(string) ## 1sep02
if(ie)
adj <- 0 ## adj=1 not work well for expressions in R
if(side!=4)
stop('only side=4 implemented')
if(adj==0)
text(usr[2], y,
if(ie)
string
else
paste(space,string,sep=''),
adj=0)
else {
usr.space.needed <- strwidth(setAside, units='user', cex=cex)
text(usr[2]+0.5*strwidth(space, units='user', cex=cex)+usr.space.needed,
y, string, adj=1, cex=cex) # was usr[2]- 18jul02;added 0* 25jul02
## was 0*strwidth(space,...) 31jan03
}
invisible()
}
if(FALSE) {
expandUsrCoord <- function()
{
## Expands usr coordinates of current plot to entire figure region
## so that out of range plots may be plotted
pr <- par()
usr <- pr$usr
p <- pr$plt
invisible(pr)
}
}
## Author: Patrick Connolly
## HortResearch
## Mt Albert
## Auckland, New Zealand
print.char.matrix <-
function (x, file = "",
col.name.align = "cen", col.txt.align = "right",
cell.align = "cen", hsep = "|", vsep = "-", csep = "+",
row.names = TRUE, col.names = FALSE,
append = FALSE, top.border = TRUE, left.border = TRUE, ...)
{
### To print a data frame or matrix to a text file or screen
### and having names line up with stacked cells
###
### First, add row names as first column (might be removed later)
ndimn <- names(dimnames(x)) ## FEH
rownames <- dimnames(x)[[1]]
x <- cbind(rownames, x)
names(dimnames(x)) <- ndimn ## FEH
cnam <- dimnames(x)[[2]] ## FEH
if(length(ndimn))
cnam[1] <- ndimn[1] ## FEH
##dimnames(x)[[1]] <- seq(nrow(x)) 25Mar02 for R FEH
dimnames(x) <- list(as.character(seq(nrow(x))), cnam)
names(dimnames(x)) <- ndimn ## 26Mar02 FEH
### Set up some padding functions:
###
pad.left <- function(z, pads)
{
## Pads spaces to left of text
padding <- paste(rep(" ", pads), collapse = "")
paste(padding, z, sep = "")
}
pad.mid <- function(z, pads)
{
## Centres text in available space
padding.right <- paste(rep(" ", pads%/%2), collapse = "")
padding.left <- paste(rep(" ", pads - pads%/%2), collapse = "")
paste(padding.left, z, padding.right, sep = "")
}
pad.right <- function(z, pads) {
## Pads spaces to right of text
padding <- paste(rep(" ", pads), collapse = "")
paste(z, padding, sep = "")
}
## (Padding happens on the opposite side to alignment)
pad.types <- c("left", "mid", "right")
names(pad.types) <- c("right", "cen", "left")
pad.name <- pad.types[col.name.align]
pad.txt <- pad.types[col.txt.align]
pad.cell <- pad.types[cell.align]
## Padding character columns
## Need columns with uniform number of characters
pad.char.col.right <- function(y)
{
## For aligning text to LHS of column
col.width <- nchar(y)
biggest <- max(col.width)
smallest <- min(col.width)
padding <- biggest - col.width
out <- NULL
for (i in seq(y))
out[i] <- pad.right(y[i], pads = padding[i])
out
}
pad.char.col.left <- function(y)
{
## For aligning text to RHS of column
col.width <- nchar(y)
biggest <- max(col.width)
smallest <- min(col.width)
padding <- biggest - col.width
out <- NULL
for (i in seq(y))
out[i] <- pad.left(y[i], pads = padding[i])
out
}
pad.char.col.mid <- function(y) {
## For aligning text to centre of column
col.width <- nchar(y)
biggest <- max(col.width)
smallest <- min(col.width)
padding <- biggest - col.width
out <- NULL
for (i in seq(y))
out[i] <- pad.mid(y[i], pads = padding[i])
out
}
## which functions to use this time.
pad.name.fn <- get(paste("pad.", pad.name, sep = ""))
pad.txt.fn <- get(paste("pad.char.col.", pad.txt, sep = ""))
pad.cell.fn <- get(paste("pad.", pad.cell, sep = ""))
## Remove troublesome factors
x <- as.data.frame(x)
fac.col <- names(x)[sapply(x, is.factor)]
for (i in fac.col)
x[, i] <- I(as.character(x[, i]))
## ARE ANY LINE BREAKS IN ANY COLUMNS?
break.list <- list()
for (i in seq(nrow(x))) {
x.i <- unlist(x[i, ])
rows.i <- sapply(strsplit(unlist(x[i, ]), "\n"), length)
rows.i[rows.i < 1] <- 1
break.list[[i]] <- rows.i
}
break.row <- sapply(break.list, function(x) any(x > 1))
names(break.row) <- seq(nrow(x))
xx <- x
if (any(break.row)) {
## add in extra row/s
xx <- NULL
reprow <- lapply(break.list, unique)
for (k in seq(nrow(x))) {
x.k <- unlist(x[k, ])
x.k[x.k == ""] <- " "
if (break.row[k]) {
l.k <- strsplit(x.k, "\n")
add.blanks <- max(break.list[[k]]) - break.list[[k]]
names(l.k) <- names(add.blanks) <- seq(length(l.k))
if (any(add.blanks > 0)) {
for (kk in names(add.blanks[add.blanks > 0]))
l.k[[kk]] <- c(l.k[[kk]], rep(" ", add.blanks[kk]))
}
l.k.df <- as.data.frame(l.k)
names(l.k.df) <- names(x)
xx <- rbind(xx, as.matrix(l.k.df))
}
else xx <- rbind(xx, x.k)
}
row.names(xx) <- paste(rep(row.names(x), sapply(reprow,
max)),
unlist(reprow), sep = ".")
## Make an index for the rows to be printed
rn <- row.names(xx)
rnb <- strsplit(rn, "\\.")
rpref <- as.numeric(factor(sapply(rnb, function(z) z[1])))
## was codes( ) 10oct03
}
else
rpref <- seq(nrow(x))
x <- as.data.frame(xx)
## Character columns need different treatment from numeric columns
char.cols <- sapply(x, is.character)
if (any(char.cols))
x[char.cols] <- sapply(x[char.cols], pad.txt.fn)
## Change numeric columns into character
if (any(!char.cols))
x[!char.cols] <- sapply(x[!char.cols], format)
## now all character columns each of which is uniform element width
##
## Lining up names with their columns
## Sometimes the names of columns are wider than the columns they name,
## sometimes vice versa.
names.width <- nchar(names(x))
if (!col.names)
names.width <- rep(0, length(names.width))
cell.width <- sapply(x, function(y) max(nchar(as.character(y))))
## (the width of the characters in the cells as distinct
## from their names)
name.pads <- cell.width - names.width
cell.pads <- -name.pads
name.pads[name.pads < 0] <- 0
cell.pads[cell.pads < 0] <- 0
pad.names <- name.pads > 0
pad.cells <- cell.pads > 0
## Pad out the column names if necessary:
if (any(pad.names)) {
stretch.names <- names(x)[pad.names]
for (i in stretch.names) {
names(x)[names(x) == i] <- pad.name.fn(i, name.pads[i])
}
}
## likewise for the cells and columns
if (any(pad.cells)) {
stretch.cells <- names(x)[pad.cells]
for (j in stretch.cells) x[, j] <- pad.cell.fn(x[, j],
cell.pads[j])
}
## Remove row names if not required
if (!row.names)
x <- x[-1]
## Put the column names on top of matrix
if (col.names)
mat2 <- rbind(names(x), as.matrix(x))
else
mat2 <- as.matrix(x)
mat.names.width <- nchar(mat2[1, ])
## character string to separate rows
space.h <- ""
for (k in seq(along=mat.names.width)) { ## added along= FEH 26Mar02
space.h <- c(space.h, rep(vsep, mat.names.width[k]), csep)
}
line.sep <- paste(c(ifelse(left.border, csep, ""), space.h),
collapse = "")
if (col.names)
rpref <- c(0, rpref, 0)
else
rpref <- c(rpref, 0)
## print to screen or file
if(top.border && line.sep !='') {
write(line.sep, file = file, append = append)
append <- TRUE
}
for (i in 1:nrow(mat2)) {
if (left.border)
write(paste(paste(c("", mat2[i, ]), collapse = hsep),
hsep, sep = ""), file = file, append = append)
else
write(paste(paste(mat2[i, ], collapse = hsep), hsep,
sep = ""), file = file, append = append)
append <- TRUE
## print separator if row prefix is not same as next one
if (rpref[i] != rpref[i + 1] && line.sep != '')
write(line.sep, file = file, append = TRUE)
}
}
unPaste <- function(str, sep='/')
{
w <- strsplit(str, sep)
w <- matrix(unlist(w), ncol=length(str))
nr <- nrow(w)
ans <- vector('list', nr)
for(j in 1:nr)
ans[[j]] <- w[j,]
ans
}
get2rowHeads <- function(str) {
w <- strsplit(str, '\n')
## strsplit returns character(0) when element="" 23may03
list(sapply(w, function(x)if(length(x)) x[[1]] else ''),
sapply(w, function(x)if(length(x) > 1)x[[2]] else ''))
}
## Note: can't say f[vector of names] <- list(...) to update args
## In R you have to put ALL arguments in list(...) so sometimes we set
## unneeded ones to NULL. Ignore this assignment in S<
## Two lists of functions, one for primitives for S+ or R (either Trellis
## or low-level), one for R grid
## Note: rect is only defined in R, not S+
ordGridFun <- function(grid)
{
if(!grid)
list(lines = function(...) lines(...),
points = function(..., size=NULL)
{
if(length(size))
warning('size not implemented yet')
points(...)
},
text = function(...) text(...),
segments = function(...) segments(...),
arrows = function(..., open, size) arrows(..., length=size*.8),
rect = function(...) rect(...),
polygon = function(x, y=NULL, ..., type=c('l','s'))
{
type <- match.arg(type)
if(!length(y))
{
y <- x$y
x <- x$x
}
j <- !is.na(x+y)
x <- x[j]
y <- y[j]
if(type=='s') polygon(makeSteps(x, y), ..., border=NA)
else polygon(x, y, ..., border=NA)
},
abline = function(...) abline(...),
unit = function(x, units='native')
{
if(units!='native')
stop('units="native" is only units implemented outside of grid')
x
},
axis = function(...) axis(...))
else {
sRequire('lattice')
list(lines = function(x, y, ...)
{
if(is.list(x)) {
y <- x[[2]]; x <- x[[1]]
}
lattice::llines(if(is.unit(x))
convertX(x, 'native', valueOnly=TRUE)
else x,
if(is.unit(y))
convertY(y, 'native', valueOnly=TRUE)
else y,
...)
},
points = function(x, y, ...)
{
if(is.list(x)) {
y <- x[[2]]; x <- x[[1]]
}
lattice::lpoints(if(is.unit(x))
convertX(x, 'native', valueOnly=TRUE)
else x,
if(is.unit(y))
convertY(y, 'native', valueOnly=TRUE)
else y,
...)
},
text = function(x, y, ...)
{
if(is.list(x)) {
y <- x[[2]]; x <- x[[1]]
}
lattice::ltext(if(is.unit(x))
convertX(x, 'native', valueOnly=TRUE)
else x,
if(is.unit(y))
convertY(y, 'native', valueOnly=TRUE)
else y,
...)
},
segments = function(x0, y0, x1, y1, ...)
{
grid.segments(x0, y0, x1, y1, default.units='native',
gp=gpar(...))
},
arrows = function(...) lattice::larrows(...),
rect = function(xleft, ybottom, xright, ytop, density, angle,
border, xpd, ...)
{
grid.rect(xleft, ybottom, width=xright-xleft,
height=ytop-ybottom, just='left',
default.units='native', gp=gpar(...))
},
polygon = function(x, y=NULL, col=par('col'), type=c('l','s'), ...)
{
type <- match.arg(type)
if(!length(y))
{
y <- x$y
x <- x$x
}
j <- !is.na(x+y)
x <- x[j]
y <- y[j]
if(type=='s') grid.polygon(makeSteps(x, y),
default.units='native',
gp=gpar(fill=col, col='transparent', ...))
else grid.polygon(x, y, default.units='native',
gp=gpar(fill=col,col='transparent',...))
},
abline=function(...) lattice::panel.abline(...),
unit = function(x, units='native', ...) unit(x, units=units, ...),
axis = function(side=1, at=NULL, labels, ticks=TRUE,
distn, line, pos, outer, ...)
{
if(!length(at))stop('not implemented for at= unspecified')
if(side > 2) stop('not implemented for side=3 or 4')
## ticks=ticks removed from grid.?axis FEH 30Aug09
if(side==1) grid.xaxis(at=at, label=labels, gp=gpar(...))
if(side==2) grid.yaxis(at=at, label=labels, gp=gpar(...))
})
}
}
parGrid <- function(grid=FALSE)
{
pr <- par()
cin <- pr$cin
cex <- pr$cex
lwd <- pr$lwd
if(grid) {
## cvp <- current.viewport()
## usr <- c(cvp$xscale, cvp$yscale)
usr <- c(convertX(unit(0:1, "npc"), "native", valueOnly=TRUE),
convertY(unit(0:1, "npc"), "native", valueOnly=TRUE))
pin <-
c(convertWidth(unit(1, "npc"), "inches", valueOnly=TRUE),
convertHeight(unit(1, "npc"), "inches", valueOnly=TRUE))
uin <-
c(convertWidth(unit(1, "native"), "inches", valueOnly=TRUE),
convertHeight(unit(1, "native"), "inches", valueOnly=TRUE))
}
else {
usr <- pr$usr
pin <- pr$pin
uin <- c(pin[1]/(usr[2]-usr[1]), pin[2]/(usr[4]-usr[3]))
## 22Mar01 - R does not have par(uin)
}
list(usr=usr, pin=pin, uin=uin, cin=cin, cex=cex, lwd=lwd)
}
## Replaces R's xinch, yinch, extending them to grid
## These convert inches to data units
xInch <- function(x=1, warn.log=!grid, grid=FALSE)
{
if (warn.log && par("xlog"))
warning("x log scale: xInch() is nonsense")
pr <- parGrid(grid)
x * diff(pr$usr[1:2])/pr$pin[1]
}
yInch <- function (y = 1, warn.log=!grid, grid=FALSE)
{
if (warn.log && par("ylog"))
warning("y log scale: yInch is nonsense")
pr <- parGrid(grid)
y * diff(pr$usr[3:4])/pr$pin[2]
}
na.include <- function(obj) {
if(inherits(obj,'data.frame'))
for(i in seq(along=obj))
obj[[i]] <- na.include(obj[[i]])
else {
if(length(levels(obj)) && any(is.na(obj)))
obj <- factor(obj,exclude=NULL)
}
obj
}
if(FALSE) {
whichClosest <- function(x, w)
{
## x: vector of reference values
## w: vector of values to find closest matches in x
## Returns: subscripts in x corresponding to w
i <- order(x)
x <- x[i]
n <- length(x)
br <- c(-1e30, x[-n]+diff(x)/2,1e30)
m <- length(w)
i[.C("bincode", as.double(w), m, as.double(br),
length(br), code = integer(m), right = TRUE,
include = FALSE, NAOK = TRUE, DUP = FALSE,
PACKAGE = "base")$code]
}
NULL
}
## Just as good, ties shuffled to end
## function(x, w) round(approx(x,1:length(x),xout=w,rule=2,ties='ordered')$y)
## Remove ties= for S-Plus. Note: does not work when 2nd arg to
## approx is not uniformly spaced
## NO! ties='ordered' bombs in x not ordered
## Try
## approx(c(1,3,5,2,4,2,4),1:7,xout=c(1,3,5,2,4,2,4),rule=2,ties=function(x)x[1])
## NO: only works in general if both x and y are already ordered
## The following runs the same speed as the previous S version (in R anyway)
whichClosest <- function(x, w)
{
## x: vector of reference values
## w: vector of values for which to lookup closest matches in x
## Returns: subscripts in x corresponding to w
## Assumes no NAs in x or w
.Fortran(F_wclosest,as.double(w),as.double(x),
length(w),length(x),
j=integer(length(w)))$j
}
whichClosePW <- function(x, w, f=0.2) {
lx <- length(x)
lw <- length(w)
.Fortran(F_wclosepw,as.double(w),as.double(x),
as.double(runif(lw)),as.double(f),
lw, lx, double(lx), j=integer(lw))$j
}
whichClosek <- function(x, w, k) {
## x: vector of reference values
## w: vector of values for which to lookup close matches in x
## Returns: subscripts in x corresponding to w
## Assumes no NAs in x or w
## First jitters x so there are no ties
## Finds the k closest matches and takes a single random pick of these k
y <- diff(sort(x))
mindif <- if(all(y == 0)) 1 else min(y[y > 0])
x <- x + runif(length(x), -mindif/100, mindif/100)
z <- abs(outer(w, x, "-"))
s <- apply(z, 1, function(u) order(u)[1:k])
if(k == 1) return(s)
apply(s, 2, function(u) sample(u, 1))
}
if(FALSE) {
sampWtdDist <- function(x, w)
{
## x: vector of reference values
## w: vector of values to find closest matches in x
## Returns: subscripts in x corresponding to w
## 25% slower but simpler method:
## z <- abs(outer(w, x, "-"))
## s <- apply(z, 1, max)
## z <- (1 - sweep(z, 1, s, FUN='/')^3)^3
## sums <- apply(z, 1, sum)
## z <- sweep(z, 1, sums, FUN='/')
lx <- length(x)
lw <- length(w)
z <- matrix(abs( rep( x , lw ) - rep( w, each = lx ) ),
nrow=lw, ncol=lx, byrow=TRUE) ## Thanks: Chuck Berry
## s <- pmax( abs( w - min(x) ), abs( w - max(x) ) ) # to use max dist
s <- rowSums(z)/lx/3 # use 1/3 mean dist for each row
tricube <- function(u) (1 - pmin(u,1)^3)^3
## z <- (1 - (z/rep(s,length.out=lx*lw))^3)^3 # Thanks: Tim Hesterberg
z <- tricube(z/s) # Thanks: Tim Hesterberg
sums <- rowSums(z)
z <- z/sums
as.vector(rMultinom(z, 1))
}
NULL
}
approxExtrap <- function(x, y, xout, method='linear', n=50, rule=2,
f=0, ties='ordered', na.rm=FALSE)
{
## Linear interpolation using approx, with linear extrapolation
## beyond the data
if(is.list(x)) {
y <- x[[2]]; x <- x[[1]]
}
## remove duplicates and order so can do linear extrapolation
if(na.rm) {
d <- ! is.na(x + y)
x <- x[d]; y <- y[d]
}
x <- as.numeric(x) # handles dates etc.
y <- as.numeric(y)
d <- ! duplicated(x)
x <- x[d]
y <- y[d]
d <- order(x)
x <- x[d]
y <- y[d]
w <- approx(x, y, xout=xout, method=method, n=n,
rule=2, f=f, ties=ties)$y
r <- range(x)
d <- xout < r[1]
if(any(is.na(d)))
stop('NAs not allowed in xout')
if(any(d))
w[d] <- (y[2]-y[1])/(x[2]-x[1])*(xout[d]-x[1])+y[1]
d <- xout > r[2]
n <- length(y)
if(any(d))
w[d] <- (y[n]-y[n-1])/(x[n]-x[n-1])*(xout[d]-x[n-1])+y[n-1]
list(x=xout, y=w)
}
inverseFunction <- function(x, y) {
d <- diff(y)
xd <- x[-1]
dl <- c(NA, d[-length(d)])
ic <- which(d>=0 & dl<0 | d>0 & dl<=0 | d<=0 & dl>0 | d<0 & dl>=0)
nt <- length(ic)
k <- nt + 1
if(k==1) {
h <- function(y, xx, yy, turns, what, coef)
approx(yy, xx, xout=y, rule=2)$y
formals(h) <- list(y=numeric(0), xx=x, yy=y, turns=numeric(0),
what=character(0), coef=numeric(0))
return(h)
}
turns <- x[ic]
turnse <- c(-Inf, turns, Inf)
xrange <- yrange <- matrix(NA, nrow=k, ncol=2)
for(j in 1:k) {
l <- which(x >= turnse[j] & x <= turnse[j+1])
xrange[j,] <- x[l[c(1,length(l))]]
yrange[j,] <- y[l[c(1,length(l))]]
}
for(j in 1:length(ic)) {
l <- (ic[j]-1):(ic[j]+1)
turns[j] <- approxExtrap(d[l], xd[l], xout=0, na.rm=TRUE)$y
}
hh <- function(y, xx, yy, turns, xrange, yrange, what, coef) {
what <- match.arg(what)
## Find number of monotonic intervals containing a given y value
ylo <- pmin(yrange[,1],yrange[,2])
yhi <- pmax(yrange[,1],yrange[,2])
n <- outer(y, ylo, function(a,b) a >= b) &
outer(y, yhi, function(a,b) a <= b)
## Columns of n indicate whether or not y interval applies
ni <- nrow(yrange)
fi <- matrix(NA, nrow=length(y), ncol=ni)
turnse <- c(-Inf, turns, Inf)
for(i in 1:ni) {
w <- n[,i]
if(any(w)) {
l <- xx >= turnse[i] & xx <= turnse[i+1]
fi[w,i] <- approx(yy[l], xx[l], xout=y[w])$y
}
}
noint <- !apply(n, 1, any)
if(any(noint)) {
## Determine if y is closer to yy at extreme left or extreme right
## of an interval
m <- length(yy)
yl <- as.vector(yrange); xl <- as.vector(xrange)
fi[noint,1] <- xl[whichClosest(yl, y[noint])]
}
if(what=='sample')
apply(fi, 1, function(x) {
z <- x[!is.na(x)]
if(length(z)==1) z else if(length(z)==0) NA else sample(z, size=1)
}) else fi
}
formals(hh) <- list(y=numeric(0), xx=x, yy=y, turns=turns,
xrange=xrange, yrange=yrange,
what=c('all', 'sample'), coef=numeric(0))
## coef is there for compatibility with areg use
hh
}
Names2names <- function(x)
{
if(is.list(x)) {
}
else {
n <- names(attributes(x))
if(any(n=='.Names'))
names(attributes(x)) <- ifelse(n=='.Names','names',n)
}
x
}
##xedit <- function(file, header, title, delete.file=FALSE) {
## In R, use e.g. options(pager=xedit); page(x,'p')
## sys(paste('xedit -title "', title, '" ', file, ' &',
## sep=''))
## invisible()
##}
if(FALSE) {
gless <- function(x, ...)
{
## Usage: gless(x) - uses print method for x, puts in window with
## gless using name of x as file name prefixed by ~, leaves window open
nam <- substring(deparse(substitute(x)), 1, 40)
file <- paste('/tmp/',nam,sep='~') #tempfile('Rpage.')
sink(file)
## cat(nam,'\n' )
## if(length(attr(x,'label')) && !inherits(x,'labelled'))
## cat(attr(x,'label'),'\n')
## cat('\n')
print(x, ...)
sink()
sys(paste('gless --geometry=600x400 "',file,'" &',sep=''))
## gless does not have a title option
invisible()
}
NULL
}
xless <-
function(x, ..., title=substring(deparse(substitute(x)),1,40))
{
## Usage: xless(x) - uses print method for x, puts in persistent window with
## xless using name of x as title (unless title= is specified)
## If running under MacOS, use the system open command instead of xless
file <- tempfile()
sink(file)
print(x, ...)
sink()
cmd <- if(Sys.info()['sysname'] == 'Darwin') paste('open -a TextEdit', file) else
paste('xless -title "',title,'" -geometry "90x40" "',
file,'" &',sep='')
system(cmd)
invisible()
}
pasteFit <- function(x, sep=',', width=.Options$width)
{
## pastes as many elements of character vector x as will fit in a line
## of width 'width', starting new lines when needed
## result is the lines of pasted text
m <- nchar(x)
out <- character(0)
cur <- ''
n <- 0
for(i in 1:length(x)) {
if(cur=='' | (m[i] + nchar(cur) <= width))
cur <- paste(cur, x[i],
sep=if(cur=='')''
else sep)
else {
out <- c(out, cur)
cur <- x[i]
}
}
if(cur != '') out <- c(out, cur)
out
}
## Determine if variable is a date, time, or date/time variable in R.
## The following 2 functions are used by describe.vector
## timeUsed assumes is date/time combination variable and has no NAs
testDateTime <- function(x, what=c('either','both','timeVaries'))
{
what <- match.arg(what)
cl <- class(x)
if(!length(cl))
return(FALSE)
dc <- c('Date', 'POSIXt','POSIXct','dates','times','chron')
dtc <- c('POSIXt','POSIXct','chron')
switch(what,
either = any(cl %in% dc),
both = any(cl %in% dtc),
timeVaries = {
if('chron' %in% cl || 'Date' %in% cl) {
## chron or S+ timeDate
y <- as.numeric(x)
length(unique(round(y - floor(y),13))) > 1
}
else length(unique(format(x,'%H%M%S'))) > 1
})
}
## Format date/time variable from either R or S+
## x = a numeric summary of the original variable (e.g., mean)
## at = attributes of original variable
formatDateTime <- function(x, at, roundDay=FALSE)
{
cl <- at$class
w <- if(any(cl %in% c('chron','dates','times'))){
attributes(x) <- at
fmt <- at$format
if(roundDay) {
if (!requireNamespace("chron", quietly = TRUE))
stop("'roundDay = TRUE' requires the 'chron' package.")
if(length(fmt)==2 && is.character(fmt))
format(chron::dates(x), fmt[1])
else
format(chron::dates(x))
}
else x
} else {
attributes(x) <- at
if(roundDay && 'Date' %nin% at$class)
as.POSIXct(round(x, 'days'))
else x
}
format(w)
}
## Try to guess whether a factor or character variable is a date, and
## allow for partial dates of the form YYYY and mm/YYYY, the former
## only used if at least one observation has a month in it. If a minority
## fraction of observations fracnn or less is not convertable to a date,
## set those observations to NA
## Allows for mixing of common date forms across observations
##
## Example:
##
## x <- convertPdate(c(rep('2019-03-04',7), '2018', '03/2018', 'junk', '3/11/17','2017-01-01','2017-01-01', NA, ''))
## x
## describe(x)
convertPdate <- function(x, fracnn=0.3, considerNA=NULL) {
xo <- x
if(is.factor(x)) x <- as.character(x)
if(! is.character(x)) return(xo)
x <- trimws(x)
if(all(is.na(x)) || all(x =='')) return(xo)
ymd <- grepl('^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}$', x)
mdy <- grepl('^[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}$', x)
y <- grepl('^[0-9]{4}$', x)
my <- grepl('^[0-9]{1,2}/[0-9]{4}$', x)
m <- sum(x %nin% c('', considerNA) & ! is.na(x)) * (1 - fracnn)
dny <- ymd | mdy | my # date other than just YYYY
d <- dny | (y & any(dny))
## The variable is a date if at least m values are dates
if(sum(d) < m) return(xo)
special <- obs <- NULL
ndnm <- ! d & ! is.na(x) # not date and not missing
if(any(ndnm)) {
special <- setdiff(x[ndnm], c('', ' '))
obs <- x %in% special
x[ndnm] <- NA # values such as text comments
}
x <- ifelse(y, paste0(x, '-07-03'),
ifelse(my, gsub('^([0-9]{1,2})/([0-9]{4})$', '\\2-\\1-15', x),
ifelse(ymd, x,
ifelse(mdy, gsub('^([0-9]{1,2})/([0-9]{1,2})/([0-9]{4})$',
'\\3-\\1-\\2', x), NA))))
x <- as.Date(x)
if(length(special))
attr(x, 'special.miss') <- list(codes=special, obs=obs)
if(any(y | my)) attr(x, 'imputed') <- which(y | my)
x
}
getHdata <-
function(file, what=c('data','contents','description','all'),
where='https://hbiostat.org/data/repo') {
what <- match.arg(what)
fn <- as.character(substitute(file))
localrepo <- .Options$localHfiles
localrepo <- length(localrepo) && is.logical(localrepo) && localrepo
if(localrepo) where <- '~/web/data/repo'
ads <- readLines(paste0(where, '/Rcontents.txt'))
a <- unlist(strsplit(ads,'.sav|.rda'))
if(missing(file)) return(a)
wds <- paste(substitute(file), c('rda','sav'), sep='.')
if(!any(wds %in% ads))
stop(paste(paste(wds, collapse=','),
'are not on the web site.\nAvailable datasets:\n',
paste(a, collapse=' ')))
wds <- wds[wds %in% ads]
if(what %in% c('contents','all')) {
w <- paste(if(fn=='nhgh')'' else 'C',fn,'.html',sep='')
browseURL(paste(where, w, sep='/'))
}
if(what %in% c('description','all')) {
ades <- scan(paste(where,'Dcontents.txt',sep='/'),list(''),
quiet=TRUE)[[1]]
i <- grep(paste(fn,'\\.',sep=''),ades)
if(!length(i))
warning(paste('No description file available for',fn))
else {
w <- ades[i[1]]
browseURL(paste(where, w, sep='/'))
}
}
if(what %nin% c('data','all'))
return(invisible())
f <- paste(where, wds, sep='/')
if(length(f) > 1)
warning(paste('More than one file matched; using the first:',
paste(f, collapse=', ')))
if(localrepo) return(invisible(load(f, .GlobalEnv)))
tf <- tempfile()
download.file(f, tf, mode='wb', quiet=TRUE)
load(tf, .GlobalEnv)
invisible()
}
hdquantile <- function(x, probs=seq(0, 1, 0.25), se=FALSE,
na.rm=FALSE, names=TRUE, weights=FALSE)
{
if(na.rm) {
na <- is.na(x)
if(any(na))
x <- x[!na]
}
x <- sort(x, na.last=TRUE)
n <- length(x)
if(n < 2)
return(rep(NA, length(probs)))
m <- n + 1
ps <- probs[probs > 0 & probs < 1]
qs <- 1 - ps
a <- outer((0:n)/n, ps,
function(x,p,m) pbeta(x, p*m, (1-p)*m), m=m)
w <- a[-1,,drop=FALSE] - a[-m,,drop=FALSE]
r <- drop(x %*% w)
rp <- range(probs)
pp <- ps
if(rp[1]==0) {
r <- c(x[1], r); pp <- c(0,pp)
}
if(rp[2]==1) {
r <- c(r, x[n]); pp <- c(pp,1)
}
r <- r[match(pp, probs)]
if(names) names(r) <- format(probs)
if(weights)
attr(r,'weights') <- structure(w, dimnames=list(NULL,format(ps)))
if(!se)
return(r)
if(n < 3)
stop('must have n >= 3 to get standard errors')
l <- n - 1
a <- outer((0:l)/l, ps,
function(x,p,m) pbeta(x, p*m, (1-p)*m), m=m)
w <- a[-1,,drop=FALSE] - a[-n,,drop=FALSE]
storage.mode(x) <- 'double'
storage.mode(w) <- 'double'
nq <- length(ps)
## Get all n leave-out-one quantile estimates
S <- matrix(.Fortran(F_jacklins, x, w, as.integer(n), as.integer(nq),
res=double(n*nq))$res, ncol=nq)
se <- l * sqrt(diag(var(S))/n)
if(rp[1]==0)
se <- c(NA, se)
if(rp[2]==1)
se <- c(se, NA)
se <- se[match(pp,probs)]
if(names)
names(se) <- names(r)
attr(r, 'se') <- se
r
}
sepUnitsTrans <- function(x,
conversion=c(day=1, month=365.25/12, year=365.25, week=7),
round=FALSE, digits=0)
{
if(!any(is.present(x)))
return(x)
target <- names(conversion[conversion==1])
if(!length(target))
stop('must specify a target unit with conversion factor=1')
lab <- attr(x,'label')
x <- ifelse(is.present(x),casefold(as.character(x)),'')
for(w in names(conversion)) {
i <- grep(w, x)
if(length(i)) x[i] <-
as.character(as.numeric(gsub(paste(w,'s*',sep=''), '', x[i]))*
conversion[w])
}
i <- grep('[a-z]', x)
if(any(i))
warning(paste('variable contains units of measurement not in',
paste(names(conversion), collapse=','),':',
paste(unique(x[i]),collapse=' ')))
x <- as.numeric(x)
if(round)
x <- round(x, digits)
units(x) <- target
if(length(lab))
label(x) <- lab
x
}
makeNames <- function(names, unique=FALSE, allow=NULL)
{
## Runs make.names with exceptions in vector allow
## By default, R 1.9 make.names is overridden to convert _ to . as
## with S-Plus and previous versions of R. Specify allow='_' otherwise.
n <- make.names(names, unique)
if(!length(allow))
n <- gsub('_', '.', n)
n
}
Load <- function(object)
{
nam <- deparse(substitute(object))
path <- .Options$LoadPath
if(length(path))
path <- paste(path,'/',sep='')
file <- paste(path, nam, '.rda', sep='')
load(file, .GlobalEnv)
}
Save <- function(object, name=deparse(substitute(object)), compress=TRUE)
{
path <- .Options$LoadPath
if(length(path))
path <- paste(path, '/', sep='')
.FileName <- paste(path, name, '.rda', sep='')
assign(name, object)
if(is.logical(compress) && compress) compress <- 'gzip'
eval(parse(text=paste('save(', name, ', file="',
.FileName, '", compress="', compress, '")', sep='')))
}
getZip <- function(url, password=NULL) {
## Allows downloading and reading a .zip file containing one file
## File may be password protected. Password will be requested unless given.
## Password is 'foo'
## url may also be a local file
## Note: to make password-protected zip file z.zip, do zip -e z myfile
if(grepl("^https?://", tolower(url))) {
f <- tempfile()
download.file(url, f)
} else f <- url
cmd <- if(length(password))
paste('unzip -p -P', password) else 'unzip -p'
pipe(paste(cmd, f))
}
getLatestSource <- function(x=NULL, package='Hmisc',
recent=NULL, avail=FALSE) {
urlf <- paste0('https://hbiostat.org/R/', package, '/dir.txt')
fs <- scan(urlf, what=list('', ''), sep=' ', quiet=TRUE)
dates <- fs[[1]]
files <- fs[[2]]
url <- if(length(recent))
paste0('https://github.com/harrelfe/', package, '/commits/master/R')
else
paste0('https://github.com/harrelfe/', package, '/tree/master/R/')
if(avail) return(data.frame(file=files, date=as.Date(dates)))
if(length(recent)) x <- files[1:recent]
if(length(x)==1 && x=='all') x <- files
for(fun in x) {
i <- which(files==fun)
if(!length(i)) stop(paste('no file ', fun,' in ',package, sep=''))
cat('Fetching', fun, dates[i],'\n')
url <- paste0('https://raw.githubusercontent.com/harrelfe/', package,
'/master/R/', fun)
source(url)
}
}
clowess <- function(x, y=NULL, iter=3, ...) {
## to get around bug in lowess with occasional wild values with iter>0
r <- range(if(length(y)) y else x$y)
f <- lowess(x, y, iter=iter, ...)
if(iter != 0 && any(f$y < r[1] | f$y > r[2]))
f <- lowess(x, y, iter=0)
f
}
prselect <- function(x, start=NULL, stop=NULL, i=0, j=0, pr=TRUE)
{
f <- function(pattern, x)
{
y <- grep(pattern, x)
if(length(y) > 1) y <- y[1]
y
}
lx <- length(x)
k <- if(length(start)) f(start, x) else 1
if(length(k))
{
k <- k + i
m <- if(length(stop))
{
w <- f(stop, x[k:lx])
if(length(w)) w + k - 1 + j else -1
}
else lx
if(m > 0) x <- if(k==1) (if(m==lx) '...' else c('...', x[-(k:m)]))
else
{
if(m==lx) c(x[-(k:m)], '...')
else c(x[1:(k-1)], '...', x[(m+1):lx])
}
}
else # no start specified; keep lines after stop
{
m <- f(stop, x)
if(length(m) > 0)
{
m <- if(length(m)) m + j - 1 else lx
x <- if(m==lx) '...' else c('...', x[-(1:m)])
}
}
if(pr) cat(x, sep='\n')
invisible(x)
}
## The following is taken from survival:::plot.survfit internal dostep function
## Remove code to remove duplicates in y
makeSteps <- function(x, y)
{
if (is.na(x[1] + y[1]))
{
x <- x[-1]
y <- y[-1]
}
n <- length(x)
if (n > 2)
{
xrep <- rep(x, c(1, rep(2, n - 1)))
yrep <- rep(y, c(rep(2, n - 1), 1))
list(x = xrep, y = yrep)
}
else if (n == 1)
list(x = x, y = y)
else list(x = x[c(1, 2, 2)], y = y[c(1, 1, 2)])
}
latexBuild <- function(..., insert=NULL, sep='') {
w <- list(...)
l <- length(w)
if(l %% 2 != 0) stop('# arguments must be multiple of 2')
k <- l / 2
j <- 1
txt <- op <- character(0)
for(i in 1 : k) {
a <- w[[j]]
if(length(a)) {
txt <- c(txt, a)
if(w[[j + 1]] != '') op <- c(op, w[[j + 1]])
}
j <- j + 2
}
txt <- paste(txt, collapse=sep)
w <- character(0)
close <- if(length(op)) {
for(y in rev(op)) {
if(length(insert))
for(ins in insert)
if(length(ins) &&
ins[[1]] == y && ins[[2]] == 'before')
w <- c(w, '\n', ins[[3]])
w <- c(w,
if(y == '(') ')'
else if(y == '{') '}'
else if(y == '[') ']'
else sprintf('\\end{%s}', y))
if(length(insert))
for(ins in insert)
if(length(ins) &&
ins[[1]] == y && ins[[2]] == 'after')
w <- c(w, '\n', ins[[3]])
}
paste(w, collapse=sep)
}
structure(txt, close=close)
}
getRs <- function(file=NULL,
guser='harrelfe', grepo='rscripts',
gdir='raw/master', dir=NULL,
browse=c('local', 'browser'), cats=FALSE,
put=c('source', 'rstudio')) {
browse <- match.arg(browse)
put <- match.arg(put)
localrepo <- .Options$localHfiles
localrepo <- length(localrepo) && is.logical(localrepo) && localrepo
if(localrepo) where <- '~/R/rscripts'
else {
where <- paste('https://github.com', guser, grepo, gdir, sep='/')
if(length(dir)) where <- paste(where, dir, sep='/')
}
trim <- function(x) sub('^[[:space:]]+','',sub('[[:space:]]+$','', x))
pc <- function(s) {
wr <- function(x) {
n <- length(x)
z <- character(n)
for(i in 1 : n) z[i] <- paste(strwrap(x[i], width=15), collapse='\n')
z
}
s <- with(s, cbind(Major = wr(Major),
Minor = wr(Minor),
File = wr(File),
Type = wr(Type),
Description = wr(Description)))
print.char.matrix(s, col.names=TRUE)
}
read.table.HTTPS <- function(url) {
res <- tryCatch(read.table(url,
sep='|', quote='', header=TRUE, as.is=TRUE),
error=function(e) e)
if(inherits(res, "simpleError")) {
if(res$message == "https:// URLs are not supported") {
res$message <- paste(res$message, "Try installing R version >= 3.2.0", sep="\n\n")
}
stop(res)
}
res
}
download.file.HTTPS <- function(url, file, method='libcurl',
quiet=TRUE, extra='--no-check-certificate') {
res <- tryCatch(download.file(url, file, method, quiet=quiet, extra=extra),
error=function(e) e)
if(inherits(res, "simpleError")) {
if(res$message == "download.file(method = \"libcurl\") is not supported on this platform") {
warning(paste(res$message, "Try installing R version >= 3.2.0", "Attempting method=\"wget\"", sep="\n\n"))
return(download.file.HTTPS(url, file, method='wget'))
}
if(res$message == "https:// URLs are not supported") {
res$message <- paste(res$message, "Try installing R version >= 3.2.0", sep="\n\n")
}
stop(res)
}
invisible(res)
}
if(! length(file)) {
s <- read.table.HTTPS(paste(where, 'contents.md', sep='/'))
s <- s[-1,]
names(s) <- c('Major', 'Minor', 'File', 'Type', 'Description')
sd <- s; n <- nrow(s) # sd = s with dittoed items duplicated
for(x in c('Major', 'Minor')) {
u <- v <- gsub('\\*\\*', '', trim(s[[x]]))
for(i in 2 : n) if(u[i] == '"') u[i] <- u[i - 1]
v <- gsub('"', '', v)
s[[x]] <- v; sd[[x]] <- u
}
s$File <- trim(gsub('\\[(.*)\\].*', '\\1', s$File))
d <- trim(gsub('\\[.*\\]\\(.*\\)', '', s$Description))
s$Description <- gsub('\\[report\\].*', '', d)
if(is.logical(cats)) {
if(cats) {
## List all major and minor categories
maj <- sort(unique(sd$Major))
min <- setdiff(sort(unique(sd$Minor)), '')
cat('\nMajor categories:\n', maj,
'\nMinor categories:\n', min, '', sep='\n')
return(invisible(list(Major=maj, Minor=min)))
}
} else { ## list all scripts whose "first hit" major category contains cats
i <- grepl(tolower(cats), tolower(sd$Major))
if(! any(i)) cat('No scripts with', cats, 'in major category\n')
else pc(s[i, ])
return(invisible(s[i, ]))
}
if(browse == 'local') pc(s)
else
browseURL(if(localrepo) '~/R/rscripts/contents.md'
else
'https://github.com/harrelfe/rscripts/blob/master/contents.md')
return(invisible(s))
}
if(put == 'source')
return(invisible(source(paste(where, file, sep='/'))))
if(localrepo) file.copy(paste(where, file, sel='/'), file)
else download.file.HTTPS(paste(where, file, sep='/'), file)
if(requireNamespace('rstudioapi', quietly=TRUE) &&
rstudioapi::isAvailable()) rstudioapi::navigateToFile(file)
else file.edit(file)
invisible()
}
knitrSet <-
function(basename = NULL,
w=if(! bd) 4,
h=if(! bd) 3,
wo=NULL, ho=NULL,
fig.path = if(length(basename)) basename else '',
fig.align = if(! bd) 'center',
fig.show = 'hold',
fig.pos = if(! bd) 'htbp',
fig.lp = if(! bd) paste('fig', basename, sep=':'),
dev = switch(lang,
latex='pdf', markdown='png',
blogdown=NULL, quarto=NULL),
tidy=FALSE, error=FALSE,
messages=c('messages.txt', 'console'),
width=61, decinline=5, size=NULL, cache=FALSE,
echo=TRUE, results='markup', capfile=NULL,
lang=c('latex','markdown','blogdown','quarto')) {
if(! requireNamespace('knitr')) stop('knitr package not available')
messages <- match.arg(messages)
lang <- match.arg(lang)
options(knitrSet.lang = lang)
bd <- lang %in% c('blogdown', 'quarto')
## Specify e.g. dev=c('pdf','png') or dev=c('pdf','postscript')
## to produce two graphics files for each plot
## But: dev='CairoPNG' is preferred for png
if(length(basename)) basename <- paste(basename, '-', sep='')
## Default width fills Sweavel boxes when font size is \small and svmono.cls
## is in effect (use 65 without svmono)
if(lang == 'latex') knitr::render_listings()
if(messages != 'console') {
unlink(messages) # Start fresh with each run
hook_log = function(x, options) cat(x, file=messages, append=TRUE)
knitr::knit_hooks$set(warning = hook_log, message = hook_log)
}
else
knitr::opts_chunk$set(message=FALSE, warning=FALSE)
if(length(size)) knitr::opts_chunk$set(size = size)
## For htmlcap see http://stackoverflow.com/questions/15010732
## Causes collisions in html and plotly output; Original (no better)
## enclosed in
"))
# })
if(length(decinline)) {
rnd <- function(x, dec) if(!is.numeric(x)) x else round(x, dec)
formals(rnd) <- list(x=NULL, dec=decinline)
knitr::knit_hooks$set(inline = rnd)
}
knitr::knit_hooks$set(par=function(before, options, envir)
if(before && options$fig.show != 'none') {
p <- c('bty','mfrow','ps','bot','top','left','rt','lwd',
'mgp','las','tcl','axes','xpd')
pars <- knitr::opts_current$get(p)
pars <- pars[! is.na(names(pars))]
## knitr 1.6 started returning NULLs for unspecified pars
i <- sapply(pars, function(x) length(x) > 0)
.spar. <-
function(mar=if(!axes)
c(2.25+bot-.45*multi,2*(las==1)+2+left,.5+top+.25*multi,
.5+rt) else
c(3.25+bot-.45*multi,2*(las==1)+3.5+left,.5+top+.25*multi,
.5+rt),
lwd = if(multi)1 else 1.75,
mgp = if(!axes) mgp=c(.75, .1, 0) else
if(multi) c(1.5, .365, 0) else c(2.4-.4, 0.475, 0),
tcl = if(multi)-0.25 else -0.4, xpd=FALSE, las=1,
bot=0, left=0, top=0, rt=0, ps=if(multi) 14 else 12,
mfrow=NULL, axes=TRUE, cex.lab=1.15, cex.axis=1,
...) {
multi <- length(mfrow) > 0
par(mar=mar, lwd=lwd, mgp=mgp, tcl=tcl, ps=ps, xpd=xpd,
cex.lab=cex.lab, cex.axis=cex.axis, las=las, ...)
if(multi) par(mfrow=mfrow)
}
if(any(i)) do.call(.spar., pars[i]) else .spar.()
})
knitr::opts_knit$set(width=width)
if(length(capfile)) {
options(FigCapFile=capfile)
cf <- function(before, options, envir) {
if(before) return()
lang <- getOption('knitrSet.lang')
label <- knitr::opts_current$get('label')
prefx <- if(lang == 'quarto') '' else options$fig.lp
figname <- paste0(prefx, label)
## Quarto uses a chunk figure label convention fig-...
## and figures are referenced by @fig-...
figref <- if(grepl('^fig-', figname))
paste0('@', figname) else paste0('\\@ref(', figname, ')')
cap <- options$fig.cap
scap <- options$fig.scap
if(length(cap) && is.call(cap)) cap <- eval(cap)
if(length(scap) && is.call(scap)) scap <- eval(scap)
if( ! length(scap) || scap == '') scap <- cap
if(length(scap) && scap != '')
cat(label, figref, paste0('"', scap, '"\n'), sep=',',
append=TRUE, file=getOption('FigCapFile'))
}
knitr::knit_hooks$set(capfileFun=cf)
}
## May want to see https://stackoverflow.com/questions/37116632/r-markdown-html-number-figures
## aliases=c(h='fig.height', w='fig.width', cap='fig.cap', scap='fig.scap'))
## eval.after = c('fig.cap','fig.scap'),
## error=error) #, keep.source=keep.source (TRUE))
## See if need to remove dev=dev from below because of plotly graphics
w <- list(fig.path=fig.path, fig.align=fig.align,
fig.width=w, fig.height=h,
out.width=wo,out.height=ho,
fig.show=fig.show, fig.lp=fig.lp, fig.pos=fig.pos,
dev=dev, par=TRUE, capfileFun=length(capfile) > 0,
tidy=tidy, cache=cache,
echo=echo, error=error, comment='', results=results)
if(bd) w$fig.path <- NULL
w <- w[sapply(w, function(x) length(x) > 0)]
## knitr doesn't like null fig.align etc.
do.call(knitr::opts_chunk$set, w)
if(lang %in% c('markdown', 'blogdown'))
knitr::knit_hooks$set(uncover=markupSpecs$html$uncover)
hook_chunk = knitr::knit_hooks$get('chunk')
## centering will not allow too-wide figures to go into left margin
if(lang == 'latex') knitr::knit_hooks$set(chunk = function(x, options) {
res = hook_chunk(x, options)
if (options$fig.align != 'center') return(res)
gsub('\\{\\\\centering (\\\\includegraphics.+)\n\n\\}',
'\\\\centerline{\\1}', res)
})
knitr::set_alias(w = 'fig.width', h = 'fig.height',
wo = 'out.width', ho = 'out.height',
cap = 'fig.cap', scap ='fig.scap')
}
## see http://yihui.name/knitr/options#package_options
## Use caption package options to control caption font size
grType <- function() {
if(! length(find.package('plotly', quiet=TRUE))) return('base')
if(length(g <- .Options$grType) && g == 'plotly') 'plotly' else 'base'
}
prType <- function() {
g <- .Options$prType
if(! length(g)) 'plain' else g
}
htmlSpecialType <- function() {
if(length(g <- .Options$htmlSpecialType) && g == '&') '&' else 'unicode'
}
## Save a plotly graphic with name foo.png where foo is the name of the
## current chunk
## http://stackoverflow.com/questions/33959635/exporting-png-files-from-plotly-in-r
plotlySave <- function(x, ...) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
chunkname <- knitr::opts_current$get("label")
path <- knitr::opts_chunk$get('fig.path')
if(is.list(x) & ! inherits(x, 'plotly_hash')) {
for(w in names(x)) {
file <- paste0(path, chunkname, '-', w, '.png')
plotly::plotly_IMAGE(x[[w]], format='png', out_file=file, ...)
}
}
else {
file <- paste0(path, chunkname, '.png')
plotly::plotly_IMAGE(x, format='png', out_file=file, ...)
}
invisible()
}
## Miscellaneous functions helpful for plotly specifications
plotlyParm = list(
## Needed height in pixels for a plotly dot chart given the number of
## rows in the chart
heightDotchart = function(rows, per=25, low=200, high=800)
min(high, max(low, per * rows)),
## Given a vector of row labels that appear to the left on a dot chart,
## compute the needed chart height taking label line breaks into account
## Since plotly devotes the same vertical space to each category,
## just need to find the maximum number of breaks present
heightDotchartb = function(x, per=40,
low=c(200, 200, 250, 300, 375)[min(nx, 5)],
high=1700) {
x <- if(is.factor(x)) levels(x) else sort(as.character(x))
nx <- length(x)
m <- sapply(strsplit(x, ' '), length)
# If no two categories in a row are at the max # lines,
# reduce max by 1
mx <- max(m)
lm <- length(m)
mlag <- if(lm == 1) 0 else c(0, m[1:(lm - 1)])
if(! any(m == mx & mlag == mx)) mx <- mx - 1
z <- 1 + (if(mx > 1) 0.5 * (mx - 1) else 0)
min(high, max(low, per * length(x) * z))
},
## Colors for unordered categories
colUnorder = function(n=5, col=colorspace::rainbow_hcl) {
if(! is.function(col)) rep(col, length.out=n)
else col(n)
},
## Colors for ordered levels
colOrdered = function(n=5, col=viridis::viridis) {
if(! is.function(col)) rep(col, length.out=n)
else col(n)
},
## Margin to leave enough room for long labels on left or right as
## in dotcharts
lrmargin = function(x, wmax=190, mult=7) {
if(is.character(x)) x <- max(nchar(x))
min(wmax, max(70, x * mult))
}
)
## Function written by Dirk Eddelbuettel:
tobase64image <- function (file, Rd = FALSE, alt = "image") {
input <- normalizePath(file, mustWork = TRUE)
buf <- readBin(input, raw(), file.info(input)$size)
base64 <- base64enc::base64encode(buf)
sprintf("%s%s",
if (Rd)
"\\out{"
else "", base64, alt, if (Rd)
"}"
else "")
}
plotp <- function(data, ...) UseMethod("plotp")
keepHattrib <- function(obj) {
g <- function(x) {
a <- attributes(x)
i <- intersect(names(a), c('label', 'units'))
if(length(i)) a[i]
}
if(! is.list(obj)) list(.single.variable.=g(obj)) else sapply(obj, g)
}
restoreHattrib <- function(obj, attribs) {
nam <- names(obj)
for(n in names(attribs)) {
a <- attribs[[n]]
if(length(a)) {
sv <- n == '.single.variable.'
if(sv || n %in% nam) {
x <- if(sv) obj else obj[[n]]
if(length(a$label)) label(x) <- a$label
if(length(a$units)) units(x) <- a$units
if(sv) return(x)
obj[[n]] <- x
}
}
}
obj
}
if(FALSE) {
Hglossary <-
list(Gmd=list('Gini\'s mean difference', 'a measure of dispersion defined as the mean absolute difference over all possible pairs of different observations. It is more robust than the standard deviation.', 'https://www.researchgate.net/publication/5182211_Gini\'s_Mean_Difference_A_Superior_Measure_of_Variability_for_Non-Normal_Distributions'),
Info=list('Information index', 'a measure of the information content in a numeric variable relative to the information in a continuous numeric variable with no ties. The lowest value of Info occurs in a very imbalanced binary variable. Info comes from the approximate formula for the variance of a log odds ratio for a proportional odds model/Wilcoxon test, due to Whitehead (1993). Info is the ratio of the variance if there no ties in the data to the variance for the frequency distribution of observed values.', 'https://hbiostat.org/bib/r2.html')
)
rHglossary <- function(x, html=TRUE, collapse=TRUE) {
nams <- names(Hglossary)
i <- which(tolower(nams) == tolower(x))
if(! length(i))
stop(paste(x, 'is not defined in Hglossary'))
w <- Hglossary[[i]]
sname <- nams[i]
lname <- w[[1]]
def <- w[[2]]
href <- w[[3]]
if(html) {
lname <- paste0('', lname, '')
if(length(href) > 1)
def <- paste0(def, ' More information')
}
if(collapse) paste0('', sname, '',
lname, ': ', def, '')
else paste0(sname, ': ', lname, ', ', def)
}
}
## Function to render HTML
## Converts argument to one character string with \n delimiters
## If knitr is currently running, runs this string through
## knitr::asis_output
## Otherwise, makes it browsable HTML using htmltools so that
## an RStudio Viewer or a new browser window will display the result
## See https://github.com/quarto-dev/quarto-cli/discussions/4248 which
## explains that you meed to enclose the text to keep from fooling
## Pandoc's reader
rendHTML <- function(x, html=TRUE) {
x <- paste(x, collapse='\n')
raw <- getOption('rawmarkup', FALSE)
if(raw) {
cat(x, '\n')
return(invisible())
}
if(length(getOption('knitr.in.progress'))) {
if(html)
return(htmltools::knit_print.html(x)) # includes htmlPreserve
## if(html) x <- paste0('```{=html}\n\n', x, '\n```\n')
return(knitr::asis_output(x))
}
if(! html) { # Convert from RMarkdown to html
tf <- tempfile(fileext='.Rmd')
o <- tempfile(fileext='.html')
cat('---\ntitle: ""\npagetitle: x\noutput: html_document\n---\n',
x, '\n', sep='', file=tf)
rmarkdown::render(tf, output_file=o, quiet=TRUE)
x <- readLines(o)
}
## The following has prettier output for model fits than the kableExtra method
print(htmltools::browsable(htmltools::HTML(x)))
}
## See ~/r/rmarkdown/html/render.qmd
## kableExtra print method did not render regression fit output
## as nicely as print(htmltools::browsable(...)) method above
## See kableExtra:::print.kableExtra
# class(x) <- 'kableExtra'
# dep <- list(rmarkdown::html_dependency_jquery(),
# rmarkdown::html_dependency_bootstrap(theme = "cosmo"),
# kableExtra::html_dependency_kePrint(),
# kableExtra::html_dependency_lightable())
# ht <- htmltools::browsable(htmltools::HTML(as.character(x),
# ""))
# htmltools::htmlDependencies(ht) <- dep
# htmltools::html_print(ht)
sRequire <- function(package) {
if(! requireNamespace(package, quietly=TRUE))
stop(paste('package', package, 'is required but not installed'))
invisible()
}
Hmisc/R/event.chart.s 0000644 0001762 0000144 00000054372 12250441246 014136 0 ustar ligges users ## event.chart.q: eventchart program 1.0 (creates function event.chart)
##
## last edited: 9-27-97
## last edited: 10-20-98, add pty='m' for the default plotting;
## one may change to pty='s' to get the 'square plot' for the Goldman's Event Chart
## FEH changes 9may02 for R
event.chart <-
function(data, subset.r = 1:dim(data)[1], subset.c = 1:dim(data)[2],
sort.by = NA, sort.ascending = TRUE,
sort.na.last = TRUE, sort.after.subset = TRUE,
y.var = NA, y.var.type = "n",
y.jitter = FALSE, y.jitter.factor = 1,
y.renum = FALSE, NA.rm = FALSE, x.reference = NA,
now = max(data[, subset.c], na.rm = TRUE),
now.line = FALSE, now.line.lty = 2,
now.line.lwd = 1, now.line.col = 1, pty = "m",
date.orig = c(1, 1, 1960), titl = "Event Chart",
y.idlabels = NA, y.axis = "auto",
y.axis.custom.at = NA, y.axis.custom.labels = NA,
y.julian = FALSE, y.lim.extend = c(0, 0),
y.lab = ifelse(is.na(y.idlabels), "", as.character(y.idlabels)),
x.axis.all = TRUE, x.axis = "auto",
x.axis.custom.at = NA, x.axis.custom.labels = NA,
x.julian = FALSE, x.lim.extend = c(0, 0), x.scale = 1,
x.lab = ifelse(x.julian, "Follow-up Time", "Study Date"),
line.by = NA, line.lty = 1, line.lwd = 1, line.col = 1,
line.add = NA, line.add.lty = NA,
line.add.lwd = NA, line.add.col = NA,
point.pch = 1:length(subset.c),
point.cex = rep(0.6, length(subset.c)),
point.col = rep(1, length(subset.c)),
point.cex.mult = 1., point.cex.mult.var = NA,
extra.points.no.mult = rep(NA, length(subset.c)),
legend.plot = FALSE, legend.location = "o", legend.titl = titl,
legend.titl.cex = 3, legend.titl.line = 1,
legend.point.at = list(x = c(5, 95), y = c(95, 30)),
legend.point.pch = point.pch,
legend.point.text = ifelse(rep(is.data.frame(data), length(subset.c)),
names(data[, subset.c]),
subset.c),
legend.cex = 2.5, legend.bty = "n",
legend.line.at = list(x = c(5, 95), y = c(20, 5)),
legend.line.text = names(table(as.character(data[, line.by]),
exclude = c("", "NA"))),
legend.line.lwd = line.lwd, legend.loc.num = 1,
...)
{
legnd <- function(..., pch) {
if(missing(pch)) legend(...)
else legend(..., pch = pch)
}
month.day.year <- function(jul, origin.) {
if (missing(origin.) || is.null(origin.)) {
origin. <- .Options$chron.origin
if (is.null(origin.))
origin. <- c(month = 1, day = 1, year = 1960)
}
shift <- if (all(origin. == 0)) 0 else julian(origin = origin.)
## relative origin
## "absolute" origin
j <- jul + shift
j <- j - 1721119
y <- (4 * j - 1) %/% 146097
j <- 4 * j - 1 - 146097 * y
d <- j %/% 4
j <- (4 * d + 3) %/% 1461
d <- 4 * d + 3 - 1461 * j
d <- (d + 4) %/% 4
m <- (5 * d - 3) %/% 153
d <- 5 * d - 3 - 153 * m
d <- (d + 5) %/% 5
y <- 100 * y + j
y <- y + ifelse(m < 10, 0, 1)
m <- m + ifelse(m < 10, 3, -9)
list(month = m, day = d, year = y)
}
## julian.r
## Convert between Julian and Calendar Dates
julian <- function(m, d, y, origin.) {
only.origin <- all(missing(m), missing(d), missing(y))
if (only.origin)
m <- d <- y <- NULL
## return days since origin
if (missing(origin.) || is.null(origin.)) {
origin. <- .Options$chron.origin
if (is.null(origin.))
origin. <- c(month = 1, day = 1, year = 1960)
}
nms <- names(d)
max.len <- max(length(m), length(d), length(y))
## prepend new origin value and rep out to common max. length:
m <- c(origin.[1], rep(m, length = max.len))
d <- c(origin.[2], rep(d, length = max.len))
y <- c(origin.[3], rep(y, length = max.len))
## code from julian date in the S book (p.269)
y <- y + ifelse(m > 2, 0, -1)
m <- m + ifelse(m > 2, -3, 9)
c <- y %/% 100
ya <- y - 100 * c
out <- (146097 * c) %/% 4 + (1461 * ya) %/% 4 + (153 * m + 2) %/% 5 +
d + 1721119
## now subtract the new origin from all dates
if (!only.origin) {
if (all(origin. == 0))
out <- out[-1]
else
out <- out[-1] - out[1]
}
names(out) <- nms
out
}
## stop function if unacceptable violations occur
## (other stops may occur later)
if (!is.matrix(data) && !is.data.frame(data))
stop("argument data must be a matrix or a data frame\n")
## section 1: do necessary subsetting and sorting of data
targodata <- apply(data[, subset.c, drop = FALSE], 2, as.numeric)
## targodata for target 'overall' data
if (!is.na(x.reference))
targodata <- apply(targodata - data[, x.reference], 2, as.numeric)
## start of sort routine
if (!is.na(sort.by[1])) {
if (sort.after.subset == TRUE)
data <- data[subset.r, ]
m <- dim(data)[1]
keys <- 1:m
rotate <- m:1
length.sort.by <- length(sort.by)
asc <- rep(sort.ascending, length.sort.by)
for (i in length.sort.by:1) {
if (asc[i])
keys[] <- keys[sort.list(data[, sort.by[[i]]][keys],
na.last = sort.na.last)]
else
keys[] <- keys[order(data[, sort.by[[i]]][keys],
rotate, na.last = sort.na.last)[rotate]]
}
data <- data[keys, ]
if (sort.after.subset == FALSE) {
subset.r <- (1:dim(data)[1])[subset.r]
targdata <- apply(data[subset.r, subset.c, drop = FALSE], 2, as.numeric)
} else if (sort.after.subset == TRUE) {
targdata <- apply(data[, subset.c, drop = FALSE], 2, as.numeric)
subset.ro <- (1:dim(data)[1])[subset.r]
subset.r <- seq(length(subset.ro))
}
} else if (is.na(sort.by[1])) {
subset.r <- (1:dim(data)[1])[subset.r]
targdata <- apply(data[subset.r, subset.c, drop = FALSE], 2, as.numeric)
}
## end of sort routine
## start to deal with missing values and renumbering y-axis
if (NA.rm == TRUE) {
whotoplot <- subset.r[!(apply(is.na(targdata), 1, all))]
## whotoplot is for use for data matrix(dataframe);
## essentially who will be plotted from data
t.whotoplot <- seq(dim(targdata)[1])[!(apply(is.na(targdata), 1, all))]
## t.whotoplot is for use for targdata matrix(dataframe);
## essentially, who will be plotted from targdata
if (y.renum == TRUE) {
whattoplot <- seq(subset.r[!(apply(is.na(targdata), 1, all))])
## whattoplot is what will be plotted on y-axis of event chart
} else if (y.renum == FALSE) {
if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1])))
whattoplot <- subset.r[!(apply(is.na(targdata), 1, all))]
else if (!is.na(sort.by[1]) & sort.after.subset == TRUE)
whattoplot <- subset.ro[!(apply(is.na(targdata), 1, all))]
}
} else if (NA.rm == FALSE) {
whotoplot <- subset.r
t.whotoplot <- seq(dim(targdata)[1])
if (y.renum == TRUE)
whattoplot <- seq(subset.r)
else if (y.renum == FALSE) {
if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1])))
whattoplot <- subset.r
else if (!is.na(sort.by[1]) & sort.after.subset == TRUE)
whattoplot <- subset.ro
}
}
## end of dealing with missing values and renumbering of y-axis
## section 2: perform necessary manipulations using x.reference and y.var
## deal with re-referencing x-axis with x.reference
if (!is.na(x.reference)) {
targdata <- apply(targdata - data[subset.r, x.reference], 2, as.numeric)
if (NA.rm == TRUE) {
x.referencew <- data[whotoplot, x.reference]
whotoplot <- whotoplot[!is.na(x.referencew)]
t.whotoplot <- t.whotoplot[!is.na(x.referencew)]
whattoplot.ref <- whattoplot[!is.na(x.referencew)]
if (y.renum == FALSE) {
if ((!is.na(sort.by[1]) & sort.after.subset == FALSE) | (is.na(sort.by[1])))
whattoplot <- seq(subset.r[1],
subset.r[1] + length(whattoplot.ref) - 1)
else if (!is.na(sort.by[1]) & sort.after.subset == TRUE)
whattoplot <- seq(subset.ro[1],
subset.ro[1] + length(whattoplot.ref) - 1)
} else if (y.renum == TRUE)
whattoplot <- seq(length(whattoplot.ref))
}
}
## deal with using a data frame variable to place lines on y-axis
if (!is.na(y.var)) {
if (!is.na(sort.by[1]))
stop("cannot use sort.by and y.var simultaneously\n")
y.varw <- as.numeric(data[whotoplot, y.var])
whotoplot <- whotoplot[!is.na(y.varw)]
t.whotoplot <- t.whotoplot[!is.na(y.varw)]
whattoplot <- y.varw[!is.na(y.varw)]
if (y.jitter == TRUE) {
range.data <- diff(range(whattoplot))
range.unif <- y.jitter.factor *
(range.data / (2 * (length(whattoplot) - 1)))
whattoplot <- whattoplot +
runif(length(whattoplot), -(range.unif), range.unif)
}
}
sort.what <- sort(whattoplot)
length.what <- length(whattoplot)
## section 3: perform all plotting
## first, make sure length of point.pch, cex, col is same as subset.c
len.c <- length(subset.c)
if (length(point.pch) < len.c) {
warning("length(point.pch) < length(subset.c)")
point.pch <- rep(point.pch, len.c)[1:len.c]
}
if (length(point.cex) < len.c) {
warning("length(point.cex) < length(subset.c)")
point.cex <- rep(point.cex, len.c)[1:len.c]
}
if (length(point.col) < len.c) {
warning("length(point.col) < length(subset.c)")
point.col <- rep(point.col, len.c)[1:len.c]
}
## set default of par(new = FALSE)
par(new = FALSE)
## plot external legend (if requested)
if (legend.plot == TRUE && legend.location == "o") {
plot(1, 1, type = "n", xlim = c(0, 100), ylim = c(0, 100),
axes = FALSE, xlab = "", ylab = "")
mtext(legend.titl, line = legend.titl.line, outer = FALSE,
cex = legend.titl.cex)
legnd(legend.point.at[[1]], legend.point.at[[2]],
leg = legend.point.text,
pch = legend.point.pch, cex = legend.cex,
col = point.col, bty = legend.bty)
if (!is.na(line.by)) {
par(new = TRUE)
legnd(legend.line.at[[1]], legend.line.at[[2]],
leg = legend.line.text, cex = legend.cex,
lty = line.lty, lwd = legend.line.lwd,
col = line.col, bty = legend.bty)
}
invisible(par(ask = TRUE))
}
## start creating objects to be used in determining plot region
targdata <- targdata / x.scale
targodata <- targodata / x.scale
minvec <- apply(targdata[t.whotoplot,, drop = FALSE], 1, min, na.rm = TRUE)
minotime <- ifelse(x.axis.all,
min(apply(targodata, 1, min, na.rm = TRUE), na.rm = TRUE),
min(minvec, na.rm = TRUE))
maxvec <- apply(targdata[t.whotoplot,, drop = FALSE], 1, max, na.rm = TRUE)
maxotime <- ifelse(x.axis.all,
max(apply(targodata, 1, max, na.rm = TRUE), na.rm = TRUE),
max(maxvec, na.rm = TRUE))
## determine par parameters and plot graphical region based
## on request of y.var and, subsequently, y.var.type and now.line
y.axis.top <- sort.what[length.what] + y.lim.extend[2]
y.axis.bottom <- sort.what[1] - y.lim.extend[1]
x.axis.right <- maxotime + x.lim.extend[2]
x.axis.left <- minotime - x.lim.extend[1]
if (!is.na(y.var) & y.var.type == "d") {
oldpar <- par(omi = rep(0, 4), lwd = 0.6,
mgp = c(3.05, 1.1, 0), tck = -0.006, ...)
## set pty
par(pty = pty)
plot(whattoplot, type = "n",
xlim = c(x.axis.left,
ifelse(now.line,
(now - (min(data[, subset.c], na.rm = TRUE))) / x.scale,
x.axis.right)),
ylim = c(y.axis.bottom,
ifelse(pty == "s",
now,
y.axis.top)),
xlab = x.lab, ylab = y.lab, axes = FALSE)
if (now.line == TRUE)
abline(now, ((sort.what[1] - now) /
(((now - min(data[, subset.c], na.rm = TRUE)) / x.scale) - minotime)),
lty = now.line.lty, lwd = now.line.lwd, col = now.line.col)
} else if (is.na(y.var) | (!is.na(y.var) & y.var.type == "n")) {
if (now.line == TRUE)
stop("with now.line == TRUE, y.var & y.var.type == 'd' must be specified\n")
oldpar <- par(omi = rep(0, 4), lwd = 0.6,
mgp = c(2.8, 1.1, 0), tck = -0.006, ...)
plot(whattoplot, type = "n",
xlim = c(x.axis.left, x.axis.right),
ylim = c(y.axis.bottom - 1, y.axis.top + 1),
xlab = x.lab, ylab = y.lab, axes = FALSE)
}
## plot y-axis labels
if (!is.na(y.idlabels)) {
if (!is.na(y.var)) {
warning("y.idlabels not used when y.var has been specified\n")
axis(side = 2)
} else if (is.na(y.var))
axis(side = 2, at = whattoplot,
labels = as.vector(data[whotoplot, y.idlabels]))
} else if (is.na(y.idlabels)) {
if (y.axis == "auto") {
if (is.na(y.var) | (!is.na(y.var) & y.var.type == "n"))
axis(side = 2)
else if (!is.na(y.var) & y.var.type == "d") {
if (y.julian == FALSE) {
y.axis.auto.now.bottom <- ifelse(now.line, sort.what[1],
y.axis.bottom)
## marked by JJL, disable square plot
##y.axis.auto.now.top <- ifelse(now.line, now, y.axis.top)
y.axis.auto.now.top <- ifelse(now.line, y.axis.top, y.axis.top)
y.axis.auto.at <- round(seq(y.axis.auto.now.bottom,
y.axis.auto.now.top, length = 5))
y.axis.auto.labels <-
paste(month.day.year(y.axis.auto.at, origin = date.orig)$month, "/",
month.day.year(y.axis.auto.at, origin = date.orig)$day, "/",
substring(month.day.year(y.axis.auto.at,
origin = date.orig)$year, 3, 4),
sep = "")
axis(side = 2, at = y.axis.auto.at, labels = y.axis.auto.labels)
} else if (y.julian == TRUE)
axis(side = 2)
}
} else if (y.axis == "custom") {
if (is.na(y.axis.custom.at[1]) || is.na(y.axis.custom.labels[1]))
stop("with y.axis == 'custom', must specify y.axis.custom.at and y.axis.custom.labels\n")
axis(side = 2, at = y.axis.custom.at, labels = y.axis.custom.labels)
}
}
## plot x-axis labels
if (x.axis == "auto") {
if (x.julian == FALSE) {
x.axis.auto.at <-
round(seq(x.axis.left, x.axis.right, length = 5))
x.axis.auto.labels <-
paste(month.day.year(x.axis.auto.at, origin = date.orig)$month, "/",
month.day.year(x.axis.auto.at, origin = date.orig)$day, "/",
substring(month.day.year(x.axis.auto.at,
origin = date.orig)$year, 3, 4),
sep = "")
axis(side = 1, at = x.axis.auto.at, labels = x.axis.auto.labels)
} else if (x.julian == TRUE)
axis(side = 1)
} else if (x.axis == "custom") {
if (is.na(x.axis.custom.at[1]) || is.na(x.axis.custom.labels[1]))
stop("with x.axis = 'custom', user must specify x.axis.custom.at and x.axis.custom.labels\n")
axis(side = 1, at = x.axis.custom.at, labels = x.axis.custom.labels)
}
if (!is.na(titl)) {
title(titl)
}
## plot lines and points
if (!is.na(line.by)) {
line.byw <- data[whotoplot, line.by]
table.by <- table(as.character(line.byw), exclude = c("", "NA"))
names.by <- names(table.by)
len.by <- length(table.by)
if (length(line.lty) < len.by)
warning("user provided length(line.lty) < num. of line.by categories")
if (length(line.lwd) < len.by)
warning("user provided length(line.lwd) < num. of line.by categories")
if (length(line.col) < len.by)
warning("user provided length(line.col) < num. of line.by categories")
line.lty <- rep(line.lty, len = len.by)
line.lwd <- rep(line.lwd, len = len.by)
line.col <- rep(line.col, len = len.by)
lbt.whotoplot <-
(1:(length(t.whotoplot)))[as.character(line.byw) != "" &
as.character(line.byw) != "NA"]
for (i in lbt.whotoplot) {
lines(c(minvec[i], maxvec[i]), rep(whattoplot[i], 2),
lty = as.vector(line.lty[names.by == line.byw[i]]),
lwd = as.vector(line.lwd[names.by == line.byw[i]]),
col = as.vector(line.col[names.by == line.byw[i]]))
}
} else if (is.na(line.by)) {
for (i in 1:length(t.whotoplot))
lines(c(minvec[i], maxvec[i]), rep(whattoplot[i], 2),
lty = line.lty[1], lwd = line.lwd[1], col = line.col[1])
}
if(is.na(point.cex.mult.var[1]))
for(j in 1:dim(targdata)[2])
points(as.vector(unlist(targdata[t.whotoplot, j])), whattoplot,
pch = point.pch[j], cex = point.cex[j], col = point.col[j])
else {
## loop only for extra points
for(j in which(!is.na(extra.points.no.mult)))
points(as.vector(unlist(targdata[t.whotoplot, j])), whattoplot,
pch = point.pch[j], cex = point.cex[j], col = point.col[j])
## loop for points to magnify based on level of covariates
for(i in 1:length(t.whotoplot)) {
k <- 0
for(j in which(is.na(extra.points.no.mult))) {
k <- k + 1
points(as.vector(unlist(targdata[t.whotoplot[i], j])),
whattoplot[i], pch = point.pch[j],
cex = ifelse(is.na(data[whotoplot[i], point.cex.mult.var[k]]),
point.cex[j],
point.cex.mult * data[whotoplot[i], point.cex.mult.var[k]] * point.cex[j]),
col = point.col[j])
}
}
}
## add line.add segments (if requested)
if (!is.na(as.vector(line.add)[1])) {
if (any(is.na(line.add.lty)))
stop("line.add.lty can not have missing value(s) with non-missing line.add\n")
if (any(is.na(line.add.lwd)))
stop("line.add.lwd can not have missing value(s) with non-missing line.add\n")
if (any(is.na(line.add.col)))
stop("line.add.col can not have missing value(s) with non-missing line.add\n")
line.add.m <- as.matrix(line.add)
dim.m <- dim(line.add.m)
if (dim.m[1] != 2)
stop("line.add must be a matrix with two rows\n")
if (length(line.add.lty) != dim.m[2])
stop("length of line.add.lty must be the same as number of columns in line.add\n")
if (length(line.add.lwd) != dim.m[2])
stop("length of line.add.lwd must be the same as number of columns in line.add\n")
if (length(line.add.col) != dim.m[2])
stop("length of line.add.col must be the same as number of columns in line.add\n")
for (j in (1:dim.m[2])) {
for (i in (1:length(t.whotoplot))) {
add.var1 <- subset.c == line.add.m[1, j]
if (!any(add.var1))
stop("variables chosen in line.add must also be in subset.c\n")
add.var2 <- subset.c == line.add.m[2, j]
if (!any(add.var2))
stop("variables chosen in line.add must also be in subset.c\n")
segments(targdata[i, (1:len.c)[add.var1]], whattoplot[i],
targdata[i, (1:len.c)[add.var2]], whattoplot[i],
lty = line.add.lty[j], lwd = line.add.lwd[j],
col = line.add.col[j])
}
}
}
## plot internal legend (if requested)
if (legend.plot == TRUE & legend.location != "o") {
if (legend.location == "i") {
legnd(legend.point.at[[1]], legend.point.at[[2]],
leg = legend.point.text,
pch = legend.point.pch, cex = legend.cex,
col = point.col, bty = legend.bty)
if (!is.na(line.by))
legnd(legend.line.at[[1]], legend.line.at[[2]],
leg = legend.line.text, cex = legend.cex,
lty = line.lty, lwd = legend.line.lwd,
col = line.col, bty = legend.bty)
} else if (legend.location == "l") {
cat("Please click at desired location to place legend for points.\n")
legnd(locator(legend.loc.num), leg = legend.point.text,
pch = legend.point.pch, cex = legend.cex,
col = point.col, bty = legend.bty)
if (!is.na(line.by)) {
cat("Please click at desired location to place legend for lines.\n")
legnd(locator(legend.loc.num), leg = legend.line.text,
cex = legend.cex, lty = line.lty,
lwd = legend.line.lwd, col = line.col, bty = legend.bty)
}
}
}
## add box to main plot and clean up
invisible(box())
invisible(par(ask = FALSE))
par(oldpar)
}
## event.convert.s
## convert 2-column coded events to multiple event time for event.chart()
## input: a matrix or dataframe with at least 2 columns
## by default, the first column contains the event time and
## the second column contains the k event codes (e.g. 1=dead, 0=censord)
## ouput: a matrix of k columns, each column contains the time of kth coded event
##
event.convert <- function(data2, event.time = 1, event.code = 2)
{
dim.d <- dim(data2)
len.t <- length(event.time)
if(len.t != length(event.code))
stop("length of event.time and event.code must be the same")
if(any(event.time > dim.d[2]))
stop(paste("Column(s) in event.time cannot be greater than ", dim.d[2]))
if(any(event.code > dim.d[2]))
stop(paste("Column(s) in event.code cannot be greater than ",
dim.d[2]))
name.data <- names(data2)[event.time]
if(is.null(name.data)) {
name.data <- paste("V", event.time, sep = "")
}
n.level <- rep(NA, len.t)
for (i in (1:len.t)) {
n.level[i] <- length(table(data2[, event.code[i]]))
}
tot.col <- sum(n.level)
data.out <- matrix(NA, dim.d[1], tot.col)
name.col <- rep(NA, tot.col)
n.col <- 1
for (i in (1:len.t)) {
tab.d <- table(data2[, event.code[i]])
if(is.null(class(data2[, event.code[i]])))
level.value <- as.numeric(names(tab.d))
else
level.value <- names(tab.d)
for (j in (1:length(tab.d))) {
data.out[, n.col] <- rep(NA, dim.d[1])
check <- data2[, event.code[i]] == level.value[j]
check[is.na(check)] <- FALSE
data.out[, n.col][data2[, event.code[i]] == level.value[j]] <-
data2[, event.time[i]][check]
name.col[n.col] <-
paste(name.data[i], ".", names(tab.d)[j], sep = "")
n.col <- n.col + 1
}
}
dimnames(data.out) <- list(1:dim.d[1], name.col)
return(as.matrix(data.out))
}
Hmisc/R/ynbind.s 0000644 0001762 0000144 00000004211 13070455033 013163 0 ustar ligges users ynbind <- function(..., label=deparse(substitute(...)), asna=c('unknown', 'unspecified'), sort=TRUE) {
w <- list(...)
k <- length(w)
if(! k) stop('no variables to process')
nam <- as.character(sys.call())[-1]
nam <- nam[1 : k]
lab <- nam
W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam))
for(j in 1 : k) {
x <- w[[j]]
na <- is.na(x)
la <- label(x)
if(la != '') lab[j] <- la
if(is.numeric(x) && all(x %in% 0 : 1)) x <- x == 1
if(! is.logical(x)) {
x <- tolower(as.character(x))
if(length(asna)) {
i <- x %in% asna
if(any(i)) na[i] <- TRUE
}
x <- x %in% c('y', 'yes', 'present')
if(any(na)) x[na] <- NA
}
W[, j] <- x
}
## Sort columns in ascending order of overall proportion
prop <- apply(W, 2, mean, na.rm=TRUE)
if(sort) {
i <- order(prop)
W <- W[, i, drop=FALSE]
lab <- lab[i]
}
structure(W, label=label, labels=lab, class=c('ynbind', 'matrix'))
}
'[.ynbind' <- function(x, rows=1:d[1], cols=1:d[2], ...) {
d <- dim(x)
at <- attributes(x)[c('label', 'labels')]
x <- NextMethod('[')
at$labels <- at$labels[cols]
attributes(x) <- c(attributes(x), at)
if(is.matrix(x)) class(x) <- 'ynbind'
x
}
pBlock <- function(..., subset=NULL, label=deparse(substitute(...))) {
w <- list(...)
k <- length(w)
if(! k) stop('no variables to process')
nam <- as.character(sys.call())[-1]
nam <- nam[1 : k]
lab <- nam
W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam))
for(j in 1 : k) {
x <- w[[j]]
na <- is.na(x)
la <- label(x)
if(la != '') lab[j] <- la
W[, j] <- if(is.factor(x)) as.character(x) else x
}
if(length(subset)) {
if(is.logical(subset) && (length(subset) != nrow(W)))
stop('length of subset does not match length of analysis variables')
subset <- if(is.logical(subset)) ! subset else - subset
W[subset, ] <- NA
}
structure(W, label=label, labels=lab, class=c('pBlock', 'matrix'))
}
'[.pBlock' <- function(x, rows=1:d[1], cols=1:d[2], ...) {
d <- dim(x)
at <- attributes(x)[c('label', 'labels')]
x <- NextMethod('[')
if (is.matrix(x)) {
at$labels <- at$labels[cols]
attributes(x) <- c(attributes(x), at)
class(x) <- 'pBlock'
}
x
}
Hmisc/R/pstamp.s 0000644 0001762 0000144 00000002241 13067146400 013206 0 ustar ligges users pstamp <- function(txt, pwd=FALSE, time.=TRUE)
{
stamp <- function(string = Sys.time(), print = TRUE, plot = TRUE)
{
opar <- par('yaxt', 'xaxt', 'xpd')
par(yaxt='s',xaxt='s',xpd=NA)
on.exit(par(opar))
plt <- par('plt')
usr <- par('usr')
## when a logrithmic scale is in use (i.e. par('xlog') is true),
## then the x-limits would be 10^par('usr')[1:2]. Similarly for
## the y axis
xcoord <- usr[2] + (usr[2] - usr[1])/(plt[2] - plt[1]) *
(1-plt[2]) - .6*strwidth('m')
ycoord <- usr[3] - diff(usr[3:4])/diff(plt[3:4])*(plt[3]) +
0.6*strheight('m')
if(par('xlog'))
xcoord <- 10^(xcoord)
if(par('ylog'))
ycoord <- 10^(ycoord)
## Print the text on the current plot
text(xcoord, ycoord, string, adj=1)
invisible(string)
}
date.txt <- if(time.) format(Sys.time())
else format(Sys.time(), '%Y-%m-%d')
if(pwd)
date.txt <- paste(getwd(), date.txt)
oldpar <- par('mfrow', 'cex')
par(mfrow=c(1,1), cex = 0.5)
on.exit(par(oldpar))
if(!missing(txt))
date.txt <- paste(txt,' ',date.txt, sep='')
stamp(string=date.txt,print=FALSE,plot=TRUE)
invisible()
}
Hmisc/R/rcorr.cens.s 0000644 0001762 0000144 00000002514 14400370410 013753 0 ustar ligges users ## Computes rank correlation measures between a variable X and a possibly
## censored variable Y, with event/censoring indicator EVENT
## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5)
## See Harrell et al JAMA 1984(?)
## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal
## gamma-type rank correlation)
rcorr.cens <- function(x, S, outx=FALSE) {
if(inherits(S, 'Surv')) {
if(attr(S, 'type') != 'right')
stop('only handles right censored times')
} else S <- cbind(S, rep(1, length(S)))
y <- S[,1]
event <- S[,2]
if(length(y)!=length(x))
stop("y must have same length as x")
miss <- is.na(x) | is.na(y) | is.na(event)
nmiss <- sum(miss)
if(nmiss>0) {
miss <- !miss
x <- x[miss]
y <- y[miss]
event <- event[miss]
}
n <- length(x)
ne <- sum(event)
storage.mode(x) <- "double"
storage.mode(y) <- "double"
storage.mode(event) <- "logical"
z <-
.Fortran(F_cidxcn,x,y,event,length(x),nrel=double(1),nconc=double(1),
nuncert=double(1),
c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx))
r <- c(z$c.index,z$gamma,z$sd,n,nmiss,ne,z$nrel,z$nconc,z$nuncert)
names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored",
"Relevant Pairs",
"Concordant","Uncertain")
r
}
Hmisc/R/scat1d.s 0000644 0001762 0000144 00000100047 14422467613 013074 0 ustar ligges users ## Improvements due to Martin Maechler
scat1d <- function(x, side=3, frac=.02, jitfrac=.008, tfrac,
eps=ifelse(preserve,0,.001),
lwd=0.1, col=par('col'), y=NULL, curve=NULL,
bottom.align=FALSE, preserve=FALSE, fill=1/3, limit=TRUE,
nhistSpike=2000, nint=100,
type=c('proportion','count','density'),
grid=FALSE,
...)
{
type <- match.arg(type)
if(length(x) >= nhistSpike)
return(histSpike(x, side=side, type=type,
frac=2.5 * frac, col=col, y=y, curve=curve,
bottom.align=if(type == 'density') TRUE else bottom.align,
add=TRUE, nint=nint, grid=grid, ...))
gfun <- ordGridFun(grid)
if(side == 1 || side == 3 || length(y) || length(curve)) {
l <- 1:2
ax <- 1
} else {
l <- 3:4
ax <- 2
}
pr <- parGrid(grid)
usr <- pr$usr; pin <- pr$pin; uin <- pr$uin
u <- usr[ l]
u.opp <- usr[- l]
w <- u[2] - u[1]
## Start JOA 12.8.97 : handle xy missings parallel
if (length(y) > 1) { ## length=1 special case needed for datadensity
if (length(x) != length(y))
stop("y must have same length as x (or length(y)=1)")
selector <- ! (is.na(x) | is.na(y))
x <- unclass(x[selector])
y <- unclass(y[selector])
} else x <- unclass(x[! is.na(x)])
## Stop JOA 12.8.97
if(length(curve))
y <- approx(curve, xout=x, rule=2)$y
n <- length(x)
if(missing(tfrac))
tfrac <- if(n < 125) 1 else max(.1, 125 / n)
else if (tfrac < 0 || tfrac > 1)
stop("must have 0 <= tfrac <= 1")
## Start JOA 19.8.97
if(jitfrac > 0 && anyDuplicated( if(eps > 0) round(x / w / eps) else x ))
if (preserve)
x <- jitter2(x, fill=fill, limit=limit, eps=w * eps)
else
## Stop JOA 19.8.97
x <- x + runif(n, -w * jitfrac, w * jitfrac)
h <- min(pin) * frac / uin[- ax]
h2 <- h / 2
if(grid && length(y) && inherits(y, 'unit')) {
h <- unit(frac, 'npc')
h2 <- unit(frac/2, 'npc')
}
if(length(y)) {
a <- y - h2
b <- y + h2
} else {
a <- if(side < 3) u.opp[1]
else u.opp[2] - h
b <- if(side < 3) u.opp[1] + h
else u.opp[2]
}
if(tfrac < 1) {
l <- tfrac * (b - a)
a <- a + runif(n) * (b - l - a) ##runif(n, a, b-l) if frac>0
b <- a + l
}
if(ax == 1 && bottom.align) {
a <- a + h2
b <- b + h2
}
if(ax == 1)
gfun$segments(x, a, x, b, lwd=lwd, xpd=frac < 0, col=col)
else
gfun$segments(a, x, b, x, lwd=lwd, xpd=frac < 0, col=col)
invisible()
}
jitter2 <- function(x,...) UseMethod("jitter2")
jitter2.default <- function(x, fill=1/3, limit=TRUE, eps=0,
presorted=FALSE, ...)
{
x2 <- x[!is.na(x)]
if (!presorted){
o <- order(x2)
x2 <- x2[o]
}
r <- if (eps > 0) rle(round(x2 / eps) * eps) else rle(x2)
if ( length(r$length) < 2 || max(r$length) < 2 )
return(x)
d <- abs(diff(r$values))
d <- pmin( c(d[1], d), c(d, d[length(d)]) )
who <- rep(r$lengths > 1, r$lengths)
d <- d[r$lengths > 1] * fill / 2
if (is.logical(limit) && limit) limit <- min(d)
if (limit) d <- pmin(d,limit)
r$values <- r$values[r$lengths > 1] - d
r$lengths <- r$lengths[r$lengths > 1]
d <- d * 2 / (r$lengths - 1)
k <- length(r$lengths)
n <- sum(who)
val <- rep(r$values, r$lengths)
add <- (0 : (n - 1)) - rep(c(0, cumsum(r$lengths[-k])), r$lengths)
add <- add[order(rep(1 : k, r$lengths), runif(n))]
add <- add * rep(d, r$lengths)
val <- val + add
x2[who] <- val
if (!presorted)
x2[o]<-x2
x[!is.na(x)] <- x2
x
}
jitter2.data.frame <- function(x, ...)
{
as.data.frame(lapply(x,
function(z,...)
{
if (is.numeric(z))
jitter2.default(z,...)
else z
},
...))
}
datadensity <- function(object, ...)
{
if(!length(class(object)))
class(object) <- data.class(object)
UseMethod('datadensity')
}
datadensity.data.frame <-
function(object, group,
which=c('all','continuous','categorical'),
method.cat=c('bar','freq'),
col.group=1:10,
n.unique=10, show.na=TRUE, nint=1, naxes,
q, bottom.align=nint > 1,
cex.axis=sc(.5,.3), cex.var=sc(.8,.3),
lmgp=NULL, tck=sc(-.009,-.002),
ranges=NULL, labels=NULL, ...)
{
which <- match.arg(which)
method.cat <- match.arg(method.cat)
maxna <- 0
mgroup <- missing(group) # before R changes it
z <-
sapply(object,
function(x, n.unique)
{
xp <- x[!is.na(x)]
nu <- if(length(xp)) length(unique(xp))
else 0
if(nu < 2) c(0,0)
else
c(type=if(is.factor(x) || is.character(x) || nu < n.unique)
1
else 2,
na=sum(is.na(x)))
},
n.unique=n.unique)
types <- c('nil','cat','cont')[z[1,]+1]
numna <- z[2,]
fnumna <- format(numna)
maxna <- max(numna)
w <- switch(which,
all = types != 'nil',
continuous = types == 'cont',
categorical= types == 'cat')
if(missing(naxes)) naxes <- sum(w)
## Function to scale values such that when naxes<=3 get hi, >=50 get
## lo, otherwise linearly interpolate between 3 and 50
sc <- function(hi,lo,naxes)
approx(c(50,3),c(lo,hi),xout=naxes,rule=2)$y
formals(sc) <- list(hi=NA,lo=NA,naxes=naxes)
nams <- names(object)
max.length.name <- max(nchar(nams))
if(!length(lmgp))
lmgp <- sc(0,0)
oldpar <- oPar() # in Hmisc Misc.s
mgp <- c(0,lmgp,0)
mai <- oldpar$mai
plot.new();
par(new=TRUE)
## enables strwidth
mxlb <- .1 + max(strwidth(nams, units='inches', cex=cex.var))
mai[2] <- mxlb
if(!show.na) maxna <- 0
max.digits.na <- if(maxna == 0) 0
else trunc(log10(maxna)) + 1
if(maxna > 0)
mai[4] <- .1 + strwidth('Missing', units='inches', cex=cex.var)
par(mgp=mgp, mai=mai,tck=tck)
on.exit(setParNro(oldpar))
if(!mgroup)
group <- as.factor(group)
else
{
group <- factor(rep(1,length(object[[1]])))
ngroup <- 0
}
ngroup <- length(levels(group))
col.group <- rep(col.group, length.out=ngroup)
y <- 0
for(i in (1:length(nams))[w])
{
if(y < 1)
{
plot(c(0,1),c(1,naxes),xlim=c(.02,.98),ylim=c(1,naxes),
xlab='',ylab='',type='n',axes=FALSE)
usr <- par('usr')
y <- naxes + 1
if(maxna > 0)
{
outerText('Missing',
y=naxes+strheight('Missing',units='user',cex=cex.var),
cex=cex.var)
}
charheight <- strheight('X',units='user',cex=.6) ## par('cxy')[2]
}
y <- y - 1
x <- object[[i]]
if(types[i] == 'cont' )
{ ## continuous variable
x <- unclass(x) ## handles dates
isna <- is.na(x)
nna <- sum(isna)
N <- length(x) - nna
r <-
if(length(ranges) && length(ranges[[nams[i]]]))
ranges[[nams[i]]]
else
range(x, na.rm=TRUE)
p <- pretty(r,
if(nint == 1)5
else nint)
if(nint < 2)
p <- c(p[1],p[length(p)]) ##bug in pretty for nint=1
xmin <- p[1]
xmax <- p[length(p)]
cex <- par(cex=cex.axis) # Bug in R: cex= ignored in
# axis( )
axis(side=1, at=(p-xmin)/(xmax-xmin), labels=format(p),
pos=y, cex=cex.axis)
par(cex=cex)
if(mgroup)
scat1d((x-xmin)/(xmax-xmin), y=y, bottom.align=bottom.align,
minf=.075, frac=sc(.02,.005), ...)
else for(g in 1:ngroup)
{
j <- group == levels(group)[g]
scat1d((x[j]-xmin)/(xmax-xmin), y=y, bottom.align=bottom.align,
col=col.group[g], tfrac=if(N<125) 1 else max(.1, 125/N),
minf=.075, frac=sc(.02,.005), ...)
}
if(!missing(q))
{
quant <- quantile(x, probs=q, na.rm=nna>0)
points((quant-xmin)/(xmax-xmin),
rep(y-.5*charheight,length(q)),
pch=17, cex=.6)
}
} else { ## character or categorical or discrete numeric
if(is.character(x)) x <- as.factor(x)
isna <- is.na(x)
nna <- sum(isna)
if(length(group) != length(x)) {
if(is.data.frame(object))
stop('length of group must equal length of variables in data frame')
group <- rep(1, length(x))
}
tab <- table(group,x)
lev <- dimnames(tab)[[2]]
nl <- length(lev)
if(is.numeric(x))
{
xx <- as.numeric(lev)
xx <- (xx - min(xx)) / (max(xx) - min(xx))
} else {
if(sum(nchar(lev)) > 200)
lev <- substring(lev, 1, max(1, round(200 / length(lev))))
xx <- (0 : (nl - 1)) / (nl - 1)
}
cex <- par(cex=cex.axis)
axis(side=1, at=xx, labels=lev, pos=y, cex=cex.axis, tick=FALSE)
par(cex=cex)
lines(c(0,1), c(y,y))
maxfreq <- max(tab)
for(g in if(ngroup == 0) 1 else 1 : ngroup)
{
tabg <- tab[g,]
if(method.cat == 'bar')
symbols(xx, y + .4 * tabg / maxfreq / 2, add=TRUE,
rectangles=cbind(.02, .4 * tabg / maxfreq),
inches=FALSE,
col=col.group[g])
else text(xx, rep(y + .1, nl), format(tabg),
cex=cex.axis * sqrt(tab / maxfreq),
adj=.5)
}
}
mtext(if(length(labels))labels[i]
else nams[i],
2, 0, at = y, srt = 0, cex = cex.var, adj = 1, las=1)
## las=1 for R (also 3 lines down)
if(show.na && nna > 0)
outerText(fnumna[i], y, cex=cex.var)
}
invisible()
}
histSpike <-
function(x, side=1, nint=100, bins=NULL, frac=.05, minf=NULL, mult.width=1,
type=c('proportion','count','density'),
xlim=range(x),
ylim=c(0, max(f)), xlab=deparse(substitute(x)),
ylab=switch(type, proportion='Proportion',
count ='Frequency',
density ='Density'),
y=NULL, curve=NULL, add=FALSE, minimal=FALSE,
bottom.align=type == 'density',
col=par('col'), lwd=par('lwd'), grid=FALSE, ...)
{
type <- match.arg(type)
if(! minimal) {
if(! add && side != 1)
stop('side must be 1 if add=FALSE')
if(add && type == 'count')
warning('type="count" is ignored if add=TRUE')
}
if(length(y) > 1) {
if(length(y) != length(x))
stop('lengths of x and y must match')
if(length(curve))
warning('curve ignored when y specified')
i <- ! is.na(x + y)
curve <- list(x=x[i], y=y[i])
}
if(length(curve) && !missing(bottom.align) && bottom.align)
warning('bottom.align=T specified with curve or y; ignoring bottom.align')
gfun <- ordGridFun(grid)
x <- x[!is.na(x)]
x <- x[x >= xlim[1] & x <= xlim[2]]
if(type != 'density') {
if(is.character(nint) || length(x) <= 10) {
f <- table(x)
x <- as.numeric(names(f))
} else {
ncut <- nint + 1
if(! length(bins)) bins <- seq(xlim[1], xlim[2], length = ncut)
delta <- (bins[2] - bins[1]) / 2
f <- table(cut(x, c(bins[1] - delta, bins)))
x <- bins
j <- f > 0
x <- x[j]
f <- f[j]
}
if(type == 'proportion') f <- f / sum(f)
} else {
nbar <- logb(length(x), base = 2) + 1
width <- diff(range(x)) / nbar * .75 * mult.width
den <- density(x, width=width, n=200, from=xlim[1], to=xlim[2])
x <- den$x
f <- den$y
}
if(! minimal && ! add) {
if(grid)
stop('add=FALSE not implemented for lattice')
plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type='n')
}
if(side == 1 || side == 3) {
l <- 1:2;
ax <- 1
} else {
l <- 3:4;
ax <- 2
}
f <- f / max(f)
if(length(minf)) f <- pmax(f, minf)
if(minimal) {
plot.new() # sets (0,0) to (1,1)
del <- diff(xlim)
usr <- c(xlim[1] - 0.04 * del, xlim[2] + 0.04 * del, 0, 1)
par(mar=c(3.5, 0.5, 0.5, 0.5), mgp=c(2, 0.5, 0), usr=usr)
axis(1, line=0.25)
title(xlab=xlab)
segments(x, rep(0, length(x)), x, f, lwd=lwd, col=col)
return(invisible(xlim))
}
pr <- parGrid(grid)
usr <- pr$usr;
pin <- pr$pin;
uin <- pr$uin
u <- usr[l]
u.opp <- usr[- l]
h <- min(pin) * frac / uin[- ax] * f
h2 <- h / 2
if(length(y) && inherits(y, 'unit')) {
h <- unit(frac, 'npc')
h2 <- unit(frac / 2, 'npc')
}
if(length(curve) || length(y)) {
if(length(curve))
y <- approx(curve, xout=x, rule=2)$y
a <- y - h2; b <- y + h2
} else {
a <- if(side < 3) u.opp[1]
else u.opp[2] - h
b <- if(side < 3) u.opp[1] + h
else u.opp[2]
}
if(ax == 1 && bottom.align && type != 'density') {
a <- a + h2
b <- b + h2
}
if(type == 'density') {
lll <- gfun$lines
## Problem in S+ getting right value of lwd
if(ax == 1)
do.call('lll',list(x,
if(side == 1)b
else a,
lwd=lwd, col=col))
else
do.call('lll',list(if(side == 2)b
else a,
x, lwd=lwd, col=col))
} else {
lll <- gfun$segments
if(ax == 1)
do.call('lll',list(x, a, x, b, lwd=lwd, xpd=frac < 0, col=col))
else
do.call('lll',list(a, x, b, x, lwd=lwd, xpd=frac < 0, col=col))
}
invisible(xlim)
}
if(FALSE)
histSpikep <- function(p, x, y, z, group=NULL, color=NULL, hovertext=NULL,
colors=NULL,
bottom.align=TRUE, tracename='Proportion', ...) {
d <- data.frame(x=rep(x, each=3),
y=rep(y, each=3),
z=rep(z, each=3))
origcolor <- color
if(length(group)) d$group <- rep(group, each=3)
if(length(hovertext)) d$hovertext <- rep(hovertext, each=3)
if(length(color)) d$color <- rep(color, each=3)
n <- nrow(d)
j <- seq(1, n, by=3)
if(length(hovertext)) d$hovertext[j] <- ''
if(! bottom.align) d$y[j] <- d$y[j] - d$z[j] / 2
j <- seq(3, n, by=3)
d$x[j] <- NA
if(length(hovertext)) d$hovertext[j] <- ''
j <- seq(2, n, by=3)
d$y[j] <- d$y[j] + d$z[j] / ifelse(bottom.align, 1, 2)
plotly::plot_ly(d, x=~ x, y=~ y, mode='lines', type='scatter',
line=list(color=d$color, width=1.4), # ...
text=~ hovertext,
hoverinfo=if(length(hovertext)) 'text' else 'none')
}
histboxp <- function(p=plotly::plot_ly(height=height),
x, group=NULL, xlab=NULL,
gmd=TRUE, sd=FALSE, bins=100, wmax=190, mult=7,
connect=TRUE, showlegend=TRUE) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
if(! length(xlab)) xlab <- label(x, html=TRUE, plot=TRUE,
default=deparse(substitute(x)))
if(! length(group)) group <- rep(1, length(x))
if(length(x) != length(group)) stop('x and group must be same length')
distinct <- unique(x)
distinct <- distinct[! is.na(distinct)]
xmin <- min(distinct)
xr <- x
## Still do slight rounding if < bins distinct values because
## values extremely close to each other won't show otherwise
if((length(distinct) > bins) ||
min(diff(sort(distinct))) < diff(range(distinct)) / (5 * bins)) {
pret <- pretty(x, if(length(distinct) > bins) bins else 5 * bins)
dist <- pret[2] - pret[1]
r <- range(pret)
xr <- r[1] + dist * round((x - r[1]) / dist)
}
mu <- markupSpecs$html
fmt <- function(x) htmlSN(x, digits=5)
y <- 0
dh <- dm <- dq1 <- dq2 <- dq3 <- dgmd <- dsd <- levs <- NULL
group <- as.factor(group)
mis <- is.na(x)
levs <- levels(group)
ng <- length(levs)
Qu <- matrix(NA, nrow=ng, ncol=5)
j <- 0
for(g in levels(group)) {
i <- group == g
j <- j + 1
miss <- sum(mis[i])
if(miss > 0) i <- i & ! mis
if(! any(i)) next
u <- x[i]
ur <- xr[i]
tab <- as.data.frame(table(ur))
z <- as.numeric(as.character(tab$ur))
prop <- tab$Freq / length(ur)
y <- y - 1
dh <- rbind(dh, data.frame(x=z, prop=prop, freq=tab$Freq,
txt=paste0(fmt(z), ' ', round(prop, 3),
' n=', tab$Freq),
y=y))
dm <- rbind(dm, data.frame(Mean=mean(u), n=length(u), miss=miss, y=y))
if(gmd) {
Gmd <- GiniMd(u)
dgmd <- rbind(dgmd, data.frame(Gmd, x=xmin,
txt=paste0('Gini mean difference:',
fmt(Gmd)),
y=y))
}
if(sd) {
Sd <- sd(u)
dsd <- rbind(dsd, data.frame(sd=Sd, x=xmin,
txt=paste0('SD:', fmt(Sd)),
y=y))
}
probs <- c(0.05, 0.25, 0.5, 0.75, 0.95)
qu <- quantile(u, probs)
Qu[j, ] <- qu
nam <- paste0('Q', mu$sub(probs))
txt <- paste0(nam, ':', fmt(qu))
dq1 <- rbind(dq1, data.frame(Median=qu[3], txt=txt[3], y=y))
dq2 <- rbind(dq2, data.frame(quartiles=qu[c(2,4)], txt=txt[c(2,4)], y=y))
dq3 <- rbind(dq3, data.frame(outer=qu[c(1,5)], txt=txt[c(1,5)], y=y))
}
height <- plotlyParm$heightDotchart(1.2 * ng) + 50 * (gmd & sd)
if(length(.Options$plotlyauto) && .Options$plotlyauto) height <- NULL
dh$prop <- 0.6 * dh$prop / max(dh$prop)
p <- plotly::add_segments(p, data=dh,
x = ~ x,
y = ~ y,
xend = ~ x,
yend = ~ y + prop,
text = ~ txt,
hoverinfo = 'text',
color = I('black'),
name = 'Histogram',
legendgroup = 'Histogram',
showlegend=showlegend)
dm$txt <- with(dm, paste0('Mean:', fmt(Mean), ' n=', n,
' ', miss, ' missing'))
a <- 0.05
b <- 0.4
k <- (a + b) / 2
w <- (b - a) / 2
p <- plotly::add_markers(p, data=dm, mode='markers', color=I('black'),
x = ~ Mean, y = ~ y - k,
text = ~ txt,
hoverinfo = 'text', size=I(5),
name='Mean', legendgroup='Mean',
showlegend=showlegend)
segs <- function(p, x, y, yend, text, data, color, name, width=2) {
plotly::add_segments(p, data=data,
x=x, y=y,
xend=x, yend=yend,
text=text, hoverinfo='text',
name=name, legendgroup=name,
showlegend=showlegend,
color=color, line=list(width=width))
}
p <- segs(p, x=~Median, y=~y-k-w, yend=~y-k+w, text=~txt,
data=dq1, color=I('black'), name='Median', width=3)
p <- segs(p, x=~quartiles, y=~y-k-w*.8, yend=~y-k+w*.8, text=~txt,
data=dq2, color=I('blue'), name='Quartiles')
onam <- '0.05, 0.95 Quantiles'
p <- segs(p, x=~outer, y=~y-k-w*.64, yend=~y-k+w*.64, text=~txt,
data=dq3, color=I('red'), name=onam)
if(connect) {
ys <- -(1 : ng) - k
qs <- function(p, x, xend, color, lg)
plotly::add_segments(p, x=x, xend=xend, y=~ys, yend=~ys,
hoverinfo='none', showlegend=FALSE,
alpha=0.3, color=color,
legendgroup=lg, name='ignored')
p <- qs(p, x= ~ Qu[,1], xend=~ Qu[,2], color=I('red'), lg=onam)
p <- qs(p, x= ~ Qu[,2], xend=~ Qu[,4], color=I('blue'), lg='Quartiles')
p <- qs(p, x= ~ Qu[,4], xend=~ Qu[,5], color=I('red'), lg=onam)
}
gnam <- paste0('Gini ', mu$overbar(paste0('|', htmlGreek('Delta'), '|')))
if(gmd)
p <- plotly::add_segments(p, data=dgmd,
x = ~ x,
y = ~ y - 0.19,
xend = ~ x + Gmd,
yend = ~ y - 0.19,
text = ~ txt,
hoverinfo = 'text',
color = I('light gray'),
name = gnam, legendgroup=gnam,
visible='legendonly',
showlegend=showlegend)
if(sd)
p <- plotly::add_segments(p, data=dsd,
x = ~ x,
y = ~ y - 0.23,
xend = ~ x + sd,
yend = ~ y - 0.23,
text = ~ txt,
hoverinfo = 'text',
color = I('light blue'),
name = 'SD', legendgroup='SD',
visible='legendonly',
showlegend=showlegend)
p <- plotly::layout(p,
margin = list(l=plotlyParm$lrmargin(levs,
wmax=wmax, mult=mult)),
xaxis = list(title=xlab, zeroline=FALSE),
yaxis = list(title='',
tickvals= - (1 : ng),
ticktext = levs))
p
}
dhistboxp <- function(x, group=NULL, strata=NULL, xlab=NULL,
gmd=FALSE, sd=FALSE, bins=100, nmin=5, ff1=1, ff2=1) {
if(! length(group)) group <- rep(1, length(x))
if(length(x) != length(group)) stop('x and group must be same length')
if(! length(strata)) strata <- rep(1, length(x))
if(length(x) != length(strata)) stop('x and strata must be same length')
distinct <- unique(x)
distinct <- distinct[! is.na(distinct)]
xmin <- min(distinct)
xr <- x
ustrata <- sort(unique(strata))
## Still do slight rounding if < bins distinct values because
## values extremely close to each other won't show otherwise
if(length(distinct) > bins ||
min(diff(sort(distinct))) < diff(range(distinct)) / (5 * bins)) {
pret <- pretty(x, if(length(distinct) > bins) bins else 5 * bins)
dist <- pret[2] - pret[1]
r <- range(pret)
xr <- r[1] + dist * round((x - r[1]) / dist)
}
mu <- markupSpecs$html
fmt <- function(x) htmlSN(x, digits=5)
quant <- function(x, probs=0.5) {
x <- x[! is.na(x)]
n <- length(x)
if(! n) return(rep(NA, length(probs)))
if(n < 3) return(quantile(x, probs))
hdquantile(x, probs, se=FALSE)
}
group <- as.factor(group)
mis <- is.na(x)
levs <- levels(group)
ng <- length(levs)
stdel <- diff(range(ustrata)) * 0.025 * ff1
stmin <- min(strata, na.rm=TRUE)
R <- NULL
## Compute maximum proportion in any bin in any group/stratum
prop <- numeric(0)
nn <- integer(0)
j <- 0
for(st in ustrata) {
for(g in levels(group)) {
i <- strata == st & group == g
if(any(i)) {
tab <- table(xr[i])
den <- sum(tab)
if(den > 0) {
j <- j + 1
prop <- c(prop, max(tab) / den)
nn <- c(nn, den)
}
}
}
}
maxp <- if(any(nn >= nmin)) max(prop[nn >= nmin]) else max(prop)
maxp <- min(maxp, wtd.quantile(prop, probs=0.98, weights=nn))
propfac <- stdel / maxp
j <- 0
for(st in ustrata) {
stdir <- 1
ig <- 0
for(g in levels(group)) {
ig <- ig + 1
stdir <- - stdir
stoffset <- stdir * (stdel * floor((ig + 1) / 2))
i <- strata == st & group == g
miss <- sum(mis[i])
if(miss > 0) i <- i & ! mis
if(! any(i)) next
u <- x[i]
ur <- xr[i]
nn <- length(ur)
if(nn >= nmin) {
tab <- as.data.frame(table(ur))
z <- as.numeric(as.character(tab$ur))
prop <- tab$Freq / nn
R <- rbind(R,
data.frame(x=z,
y =st + 0.5 * stdir * stdel, xhi=NA,
yhi=st + 0.5 * stdir * stdel +
stoffset * prop * propfac * ff2,
group=g, strata=st,
txt=paste0(fmt(z), ' ', round(prop, 3),
' n=', tab$Freq),
type='histogram', connect=NA))
}
med <- quant(u)
R <- rbind(R,
data.frame(x=med, y=st, xhi=NA, yhi=NA,
group=g, strata=st,
txt=paste0('Median:', fmt(med),
' n=', length(u),
' ', miss, ' missing'),
type='median', connect=TRUE))
if(gmd) {
gnam <- paste0('Gini ', mu$overbar(paste0('|', htmlGreek('Delta'), '|')))
Gmd <- GiniMd(u)
R <- rbind(R,
data.frame(x = xmin,
xhi = xmin + Gmd,
y = st - stdel * 2,
yhi = NA,
group=g, strata=st,
txt=paste0(gnam, ': ', fmt(Gmd)),
type=gnam, connect=NA))
}
if(sd) {
Sd <- sd(u)
R <- rbind(R,
data.frame(x = xmin,
xhi = xmin + Sd,
y = st - stdel * (if(gmd) 3 else 2),
yhi = NA,
group=g, strata=st,
txt = paste0('SD:', fmt(Sd)),
type = 'SD', connect=NA))
}
if(nn >= nmin) {
probs <- c(0.5, 0.25, 0.75, 0.05, 0.95)
qu <- quant(u, probs)
nam <- paste0('Q', mu$sub(probs))
txt <- paste0(nam, ':', fmt(qu))
mult <- c(1.15, 1, 1, 0.64, 0.64)
yinc <- 0.3 * stdir * stdel * mult
ycenter <- st + stdir * stdel + stoffset * maxp * propfac
R <- rbind(R,
data.frame(x = qu,
xhi = NA,
y = ycenter - yinc,
yhi = ycenter + yinc,
group=g, strata=st,
txt = txt,
type='quantiles', connect=FALSE))
R <- rbind(R,
data.frame(x = min(qu),
xhi = max(qu),
y = ycenter,
yhi = ycenter,
group=g, strata=st,
txt = '',
type='quantiles', connect=FALSE))
}
} ## end groups
} ## end strata
R
}
histboxpM <- function(p=plotly::plot_ly(height=height, width=width),
x, group=NULL,
gmd=TRUE, sd=FALSE, width=NULL, nrows=NULL, ncols=NULL, ...) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
## See stackoverflow.com/questions/26939121
## stackoverflow.com/questions/39948151
nx <- if(is.data.frame(x)) ncol(x) else 1
ng <- if(length(group)) length(unique(group)) else 1
height <- nx * (plotlyParm$heightDotchart(1.2 * ng) + 50 * (gmd & sd))
height <- min(height, 1700)
auto <- .Options$plotlyauto
if(length(auto) && auto) height <- width <- NULL
nam <- deparse(substitute(x))
if(is.data.frame(x) && ncol(x) == 1) x <- x[[1]]
if(! is.data.frame(x))
return(histboxp(p=p, x=x, group=group,
xlab=labelPlotmath(label(x, default=nam), units(x),
html=TRUE), gmd=gmd, sd=sd, ...))
P <- list()
for(i in 1 : nx) {
y <- x[[i]]
xlab <- labelPlotmath(label(y, default=names(x)[i]), units(y), html=TRUE)
P[[i]] <- histboxp(p, x=y, group=group, xlab=xlab, showlegend=i==nx,
gmd=gmd, sd=sd, ...)
}
if(length(ncols)) nrows <- ceil(ncol(x) / ncols)
else if(! length(nrows)) nrows <- ncol(x)
plotly::subplot(P, nrows=nrows, shareX=FALSE, shareY=FALSE,
titleX=TRUE, margin=c(.02, .02, .05, .04))
}
ecdfpM <- function(x, group=NULL, what=c('F','1-F','f','1-f'), q=NULL,
extra=c(0.025, 0.025), xlab=NULL, ylab=NULL,
height=NULL, width=NULL,
colors=NULL, nrows=NULL, ncols=NULL, ...) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
auto <- .Options$plotlyauto
if(length(auto) && auto) height <- width <- NULL
what <- match.arg(what)
nam <- deparse(substitute(x))
if(! is.data.frame(x)) x <- data.frame(x)
nx <- ncol(x)
if(! length(group)) group <- rep('', nrow(x))
group <- as.factor(group)
fmt <- function(x) htmlSN(x, digits=5)
trans <- switch(what,
'F' = function(y) y,
'1-F' = function(y) 1 - y,
'f' = function(y) n * y,
'1-f' = function(y) n * (1 - y))
P <- list()
kq <- length(q)
for(i in 1 : nx) {
y <- x[[i]]
rng <- range(y, na.rm=TRUE)
xl <- if(! length(xlab))
labelPlotmath(label(y, default=names(x)[i]), units(y), html=TRUE)
else
xlab[min(length(xlab), i)]
D <- Dq <- NULL
p <- plotly::plot_ly(height=height, width=width)
for(gv in levels(group)) {
j <- group == gv & ! is.na(y)
yg <- sort(y[j])
n <- length(yg)
vals <- unique(yg) # see stats::ecdf
a <- approx(vals, cumsum(tabulate(match(yg, vals))) / n,
method='constant', yleft=0, yright=1, f=0,
ties='ordered', xout=vals)
delta <- diff(rng)
a$x <- c(min(a$x) - extra[1] * delta, a$x, max(a$x) + extra[2] * delta)
a$y <- c(0, a$y, 1)
yg <- a$y
d <- data.frame(x = a$x, y = trans(yg), g=gv)
D <- rbind(D, d)
if(kq) for(k in 1 : kq) {
quant <- min(a$x[yg >= q[k]])
tx <- paste0(q[k], ' quantile of ', names(x)[i], ':', fmt(quant))
if(gv != '') tx <- paste0(tx, ' ', gv)
Dq <- rbind(Dq,
data.frame(x=rng[1], xe=quant,
y=trans(q[k]), ye=trans(q[k]), g=gv, txt=''),
data.frame(x=quant, xe=quant,
y=trans(q[k]), ye=trans(0), g=gv, txt=tx))
}
}
linet <- if(what %in% c('F', 'f')) 'hv' else 'vh'
p <- plotly::add_lines(p, data=D, x= ~ x, y= ~ y,
showlegend = i == nx,
color= ~ g,
colors=colors,
line=list(shape=linet),
...)
if(kq)
p <- plotly::add_segments(p, data=Dq,
x = ~ x, xend = ~ xe,
y = ~ y, yend = ~ ye,
color = ~ g,
colors = colors,
alpha = 0.4,
text = ~ txt, hoverinfo='text',
name = 'Quantiles',
legendgroup = "quantiles",
showlegend = i == nx)
yt <- if(length(ylab)) ylab
else
if(what %in% c('F', '1-F')) 'Cumulative Proportion'
else
'Number of Observations'
p <- plotly::layout(p,
xaxis = list(title=xl, zeroline=FALSE),
yaxis = list(title=yt))
P[[i]] <- p
}
if(nx == 1) return(p)
if(length(ncols)) nrows <- ceil(nx / ncols)
else if(! length(nrows)) nrows <- nx
plotly::subplot(P, nrows=nrows, shareX=FALSE,
titleX=TRUE, margin=c(.02, .02, .05, .04))
}
Hmisc/R/upData.s 0000644 0001762 0000144 00000042345 14763313341 013135 0 ustar ligges users cleanup.import <-
function(obj, labels=NULL, lowernames=FALSE,
force.single=TRUE, force.numeric=TRUE,
rmnames=TRUE,
big=1e20, sasdict,
print=prod(dimobj) > 5e5,
datevars=NULL, datetimevars=NULL,
dateformat='%F', fixdates=c('none','year'),
autodate=FALSE, autonum=FALSE, fracnn=0.3,
considerNA=NULL,
charfactor=FALSE)
{
fixdates <- match.arg(fixdates)
nam <- names(obj)
dimobj <- dim(obj)
nv <- length(nam)
if(!missing(sasdict)) {
sasvname <- makeNames(sasdict$NAME)
if(any(w <- nam %nin% sasvname))
stop(paste('The following variables are not in sasdict:',
paste(nam[w],collapse=' ')))
saslabel <- structure(as.character(sasdict$LABEL),
names=as.character(sasvname))
labels <- saslabel[nam]
names(labels) <- NULL
}
if(length(labels) && length(labels) != dimobj[2])
stop('length of labels does not match number of variables')
if(lowernames)
names(obj) <- casefold(nam)
if(print)
cat(dimobj[2],'variables; Processing variable:')
for(i in 1:dimobj[2]) {
if(print) cat(i,'')
x <- obj[[i]]
modif <- FALSE
if(length(dim(x)))
next
if(rmnames) {
if(length(attr(x,'names'))) {
attr(x,'names') <- NULL
modif <- TRUE
} else if(length(attr(x,'.Names'))) {
attr(x,'.Names') <- NULL
modif <- TRUE
}
}
if(length(attr(x,'Csingle'))) {
attr(x,'Csingle') <- NULL
modif <- TRUE
}
if(length(c(datevars,datetimevars)) &&
nam[i] %in% c(datevars,datetimevars) &&
!all(is.na(x))) {
if(!(is.factor(x) || is.character(x)))
stop(paste('variable',nam[i],
'must be a factor or character variable for date conversion'))
x <- as.character(x)
## trim leading and trailing white space
x <- sub('^[[:space:]]+','',sub('[[:space:]]+$','', x))
xt <- NULL
if(nam[i] %in% datetimevars) {
xt <- gsub('.* ([0-9][0-9]:[0-9][0-9]:[0-9][0-9])','\\1',x)
xtnna <- setdiff(xt, c('',' ','00:00:00'))
if(!length(xtnna)) xt <- NULL
x <- gsub(' [0-9][0-9]:[0-9][0-9]:[0-9][0-9]','',x)
}
if(fixdates != 'none') {
if(dateformat %nin% c('%F','%y-%m-%d','%m/%d/%y','%m/%d/%Y'))
stop('fixdates only supported for dateformat %F %y-%m-%d %m/%d/%y %m/%d/%Y')
x <-
switch(dateformat,
'%F' =gsub('^([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})',
'20\\1-\\2-\\3',x),
'%y-%m-%d'=gsub('^[0-9]{2}([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})',
'\\1-\\2-\\3',x),
'%m/%d/%y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/[0-9]{2}([0-9]{2})',
'\\1/\\2/\\3',x),
'%m/%d/%Y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/([0-9]{2})$',
'\\1/\\2/20\\3',x)
)
}
x <- if(length(xt) && requireNamespace("chron", quietly = TRUE)) {
cform <- if(dateformat=='%F') 'y-m-d'
else gsub('%','',tolower(dateformat))
chron::chron(x, xt, format=c(dates=cform,times='h:m:s'))
}
else as.Date(x, format=dateformat)
modif <- TRUE
}
if(autodate) {
x <- convertPdate(x, fracnn=fracnn, considerNA=considerNA)
if(inherits(x, 'Date')) modif <- TRUE
}
if(autonum && (is.character(x) || is.factor(x))) {
xc <- trimws(as.character(x))
bl <- is.na(xc) | xc == ''
xn <- suppressWarnings(as.numeric(xc))
ignore <- bl | tolower(xc) %in% tolower(trimws(considerNA))
illegal <- is.na(xn) & ! bl
illegal.not.ignored <- xc[illegal & ! ignore]
# if(length(illegal) < sum(tolower(xc) %nin%
# c('', tolower(considerNA)) & ! is.na(xc)) * fracnn) {
if(length(illegal.not.ignored) < sum(! ignore) * fracnn) {
labx <- attr(x, 'label')
x <- xn
attr(x, 'label') <- labx # was label(x) <- 2025-03-09
attr(x, 'special.miss') <-
list(codes=xc[illegal], obs=which(illegal))
class(x) <- c(class(x), 'special.miss')
modif <- TRUE
}
}
if(length(labels)) {
label(x) <- labels[i]
modif <- TRUE
}
if(force.numeric && length(lev <- levels(x))) {
if(all.is.numeric(lev)) {
labx <- attr(x,'label')
x <- as.numeric(as.character(x))
attr(x, 'label') <- labx # was label(x) <- 2025-03-09
modif <- TRUE
}
}
if(storage.mode(x) == 'double') {
xu <- unclass(x)
j <- is.infinite(xu) | is.nan(xu) | abs(xu) > big
if(any(j,na.rm=TRUE)) {
x[j] <- NA
modif <- TRUE
if(print)
cat('\n')
cat(sum(j,na.rm=TRUE),'infinite values set to NA for variable',
nam[i],'\n')
}
isdate <- testDateTime(x)
if(force.single && !isdate) {
allna <- all(is.na(x))
if(allna) {
storage.mode(x) <- 'integer'
modif <- TRUE
}
if(!allna) {
notfractional <- !any(floor(x) != x, na.rm=TRUE)
if(max(abs(x),na.rm=TRUE) <= (2^31-1) && notfractional) {
storage.mode(x) <- 'integer'
modif <- TRUE
}
}
}
}
if(charfactor && is.character(x)) {
if(length(unique(x)) < .5*length(x)) {
x <- sub(' +$', '', x) # remove trailing blanks
x <- factor(x, exclude=c('', NA))
modif <- TRUE
}
}
if(modif) obj[[i]] <- x
NULL
}
if(print) cat('\n')
if(!missing(sasdict)) {
sasat <- sasdict[1,]
attributes(obj) <- c(attributes(obj),
sasds=as.character(sasat$MEMNAME),
sasdslabel=as.character(sasat$MEMLABEL))
}
obj
}
upData <- function(object, ...,
subset, rename=NULL, drop=NULL, keep=NULL,
labels=NULL, units=NULL, levels=NULL,
force.single=TRUE, lowernames=FALSE, caplabels=FALSE, classlab=FALSE,
moveUnits=FALSE, charfactor=FALSE, print=TRUE, html=FALSE) {
if(html) print <- FALSE
upfirst <- function(txt) gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", txt, perl=TRUE)
if(lowernames) names(object) <- casefold(names(object))
isdt <- inherits(object, 'data.table')
no <- names(object)
nobs <- nrow(object)
out <- paste('Input object size:\t', object.size(object), 'bytes;\t',
length(no), 'variables\t', nobs, 'observations\n')
if(print) cat(out)
if(! missing(subset)) {
s <- substitute(subset)
r <- eval(s, object, parent.frame())
if(! is.logical(r)) stop('subset must be a logical expression')
r <- r & ! is.na(r)
object <- object[r, , drop=FALSE]
nobs <- sum(r)
}
rnames <- row.names(object)
g <- function(x)
c(sm = storage.mode(x),
labclass = inherits(x, 'labelled'),
labpres = length(la <- attr(x, 'label')) && la != '',
lowuc = is.character(x) && length(unique(x)) < length(x) / 2)
vinfo <- sapply(object, g)
## Variables with labels but not classed as 'labelled' will not
## keep labels upon subscripting. Examples: variables imported
## using the haven package
j <- which(vinfo['labpres', ] == 'TRUE' & vinfo['labclass', ] == 'FALSE')
if(classlab && length(j))
for(i in j) {
x <- object[[i]]
class(x) <- c('labelled', class(x))
object[[i]] <- x
}
## Much slower:
## for(i in j) class(object[[i]]) <- c('labelled', class(object[[i]]))
## The following is targeted at R workspaces exported from StatTransfer
al <- attr(object, 'var.labels')
if(length(al)) {
if(caplabels) al <- upfirst(al)
for(i in 1:length(no))
if(al[i] != '') label(object[[i]]) <- al[i]
attr(object, 'var.labels') <- NULL
if(missing(force.single)) force.single <- FALSE
} else
if(caplabels) {
for(i in which(vinfo['labpres', ] == 'TRUE')) {
x <- object[[i]]
if(length(la <- attr(x, 'label'))) {
attr(x, 'label') <- upfirst(la)
object[[i]] <- x
}
}
}
al <- attr(object, 'label.table')
if(length(al)) {
for(i in 1 : length(no)) {
ali <- al[[i]]
if(length(ali))
object[[i]] <- factor(object[[i]], unname(ali), names(ali))
}
attr(object, 'label.table') <- attr(object, 'val.labels') <- NULL
}
if(moveUnits)
for(i in 1:length(no)) {
z <- object[[i]]
lab <- olab <- attr(z,'label')
if(!length(lab) || length(attr(z, 'units')))
next
paren <- length(grep('\\(.*\\)',lab))
brack <- length(grep('\\[.*\\]',lab))
if(paren + brack == 0) next
u <- if(paren) regexpr('\\(.*\\)', lab)
else regexpr('\\[.*\\]', lab)
len <- attr(u,'match.length')
un <- substring(lab, u + 1, u + len - 2)
lab <- substring(lab, 1, u-1)
if(substring(lab, nchar(lab), nchar(lab)) == ' ')
lab <- substring(lab, 1, nchar(lab) - 1)
out <- c(out, outn <- paste('Label for', no[i], 'changed from',
olab, 'to',
lab, '\n\tunits set to', un, '\n'))
if(print) cat(outn)
attr(z,'label') <- lab
attr(z,'units') <- un
object[[i]] <- z
}
if(length(rename)) {
nr <- names(rename)
if(length(nr) == 0 || any(nr == ''))
stop('the list or vector specified in rename must specify variable names')
for(i in 1 : length(rename)) {
if(nr[i] %nin% no)
stop(paste('unknown variable name:',nr[i]))
out <- c(out, outn <- paste('Renamed variable\t', nr[i],
'\tto', rename[[i]], '\n'))
if(print) cat(outn)
}
no[match(nr, no)] <- colnames(vinfo)[match(nr, colnames(vinfo))] <-
unlist(rename)
names(object) <- no
}
z <- substitute(list(...))
if(length(z) > 1) {
z <- z[-1]
vn <- names(z)
if(!length(vn) || any(vn == ''))
stop('variables must all have names')
for(i in 1 : length(z)) {
x <- eval(z[[i]], object, parent.frame())
v <- vn[i]
if(v %in% no) {
out <- c(out, outn <- paste0('Modified variable\t', v, '\n'))
if(print) cat(outn)
vinfo[, v] <- g(x)
}
else {
out <- c(out, outn <- paste0('Added variable\t\t', v, '\n'))
if(print) cat(outn)
no <- c(no, v)
vinfo <- cbind(vinfo, g(x))
colnames(vinfo)[ncol(vinfo)] <- v
}
d <- dim(x)
lx <- if(length(d)) d[1] else length(x)
if(lx != nobs) {
if(lx == 1) {
warning(paste('length of ',v,
' is 1; will replicate this value.', sep=''))
x <- rep(x, length.out=nobs)
}
else {
f <- find(v)
if(length(f)) {
out <- c(out, outn <- paste('Variable', v, 'found in',
paste(f, collapse=' '), '\n'))
if(print) cat(outn)
}
stop(paste('length of ', v, ' (', lx, ')\n',
'does not match number of rows in object (',
nobs, ')', sep=''))
}
}
## If x is factor and is all NA, user probably miscoded. Add
## msg.
if(is.factor(x) && all(is.na(x)))
warning(paste('Variable ',v,'is a factor with all values NA.\n',
'Check that the second argument to factor() matched the original levels.\n',
sep=''))
object[[v]] <- x
}
}
if(force.single) {
## sm <- sapply(object, storage.mode)
sm <- vinfo['sm', ]
ii <- which(sm == 'double')
if(length(ii))
for(i in ii) {
x <- object[[i]]
if(testDateTime(x) || is.matrix(x))
next
## For long vectors don't consider unless the first 500 are NA
## all(is.na(x)) is slow
notallna <- any(! is.na(x[1:min(nobs,500)]))
notallna <- notallna || any(! is.na(x))
if(! notallna) storage.mode(object[[i]]) <- 'integer'
else {
## For long vectors don't consider unless the first 500
## are integer
if(nobs < 500 || ! any(floor(x[1:500]) != x[1:500], na.rm=TRUE)) {
notfractional <- ! any(floor(x) != x, na.rm=TRUE)
if(notfractional && max(abs(x), na.rm=TRUE) <= (2 ^ 31 - 1)) {
storage.mode(x) <- 'integer'
object[[i]] <- x
}
}
}
}
}
if(charfactor) {
mfact <- as.logical(vinfo['lowuc', ])
if(any(mfact))
for(i in which(mfact)) {
x <- sub(' +$', '', object[[i]]) # remove trailing blanks
object[[i]] <- factor(x, exclude=c('', NA))
}
}
if(length(drop) && length(keep)) stop('cannot specify both drop and keep')
if(length(drop)) {
if(length(drop) == 1) {
out <- c(out, outn <- paste0('Dropped variable\t',drop,'\n'))
if(print) cat(outn)
}
else {
out <- c(out, outn <- paste0('Dropped variables\t',
paste(drop, collapse=','), '\n'))
if(print) cat(outn)
}
s <- drop %nin% no
if(any(s))
warning(paste('The following variables in drop= are not in object:',
paste(drop[s], collapse=' ')))
no <- no[no %nin% drop]
object <- if(isdt) object[, ..no] else object[no]
}
if(length(keep)) {
if(length(keep) == 1) {
out <- c(out, outn <- paste0('Kept variable\t', keep, '\n'))
if(print) cat(outn)
}
else {
out <- c(out, outn <- paste0('Kept variables\t',
paste(keep, collapse=','), '\n'))
if(print) cat(outn)
}
s <- keep %nin% no
if(any(s))
warning(paste('The following variables in keep= are not in object:',
paste(keep[s], collapse=' ')))
no <- no[no %in% keep]
object <- if(isdt) object[, ..no] else object[no]
}
if(length(levels)) {
if(!is.list(levels)) stop('levels must be a list')
nl <- names(levels)
s <- nl %nin% no
if(any(s)) {
warning(paste('The following variables in levels= are not in object:',
paste(nl[s], collapse=' ')))
nl <- nl[! s]
}
for(n in nl) {
x <- object[[n]]
if(! is.factor(x))
x <- as.factor(x)
levels(x) <- levels[[n]]
object[[n]] <- x
## levels[[nn]] will usually be a list; S+ invokes merge.levels
}
}
if(length(labels)) {
nl <- names(labels)
if(!length(nl)) stop('elements of labels were unnamed')
s <- nl %nin% no
if(any(s)) {
warning(paste('The following variables in labels= are not in object:',
paste(nl[s], collapse=' ')))
nl <- nl[!s]
}
for(n in nl) {
x <- object[[n]]
label(x) <- labels[[n]]
object[[n]] <- x
}
}
if(length(units)) {
nu <- names(units)
s <- nu %nin% no
if(any(s)) {
warning(paste('The following variables in units= are not in object:',
paste(nu[s], collapse=' ')))
nu <- nu[!s]
}
for(n in nu)
attr(object[[n]], 'units') <- units[[n]]
}
out <- c(out, outn <- paste0('New object size:\t',
object.size(object),
' bytes;\t', length(no), ' variables\t', nobs,
' observations\n'))
if(print) cat(outn)
if(html) {
cat('
\n')
cat(out)
cat('
\n')
}
object
}
dataframeReduce <- function(data, fracmiss=1, maxlevels=NULL,
minprev=0, print=TRUE) {
g <- function(x, fracmiss, maxlevels, minprev) {
if(is.matrix(x)) {
f <- mean(is.na(x %*% rep(1, ncol(x))))
return(if(f > fracmiss)
paste('fraction missing>',fracmiss,sep='') else '')
}
h <- function(a, b)
if(a == '') b else if(b == '') a else paste(a, b, sep=';')
f <- mean(is.na(x))
x <- x[!is.na(x)]
n <- length(x)
r <- if(f > fracmiss)
paste('fraction missing>', fracmiss,sep='') else ''
if(is.character(x)) x <- factor(x)
if(length(maxlevels) && is.factor(x) &&
length(levels(x)) > maxlevels)
return(h(r, paste('categories>',maxlevels,sep='')))
s <- ''
if(is.factor(x) || length(unique(x))==2) {
tab <- table(x)
if((min(tab) / max(n, 1L)) < minprev) {
if(is.factor(x)) {
x <- combine.levels(x, minlev=minprev)
s <- 'grouped categories'
if(length(levels(x)) < 2)
s <- paste('prevalence<', minprev, sep='')
}
else s <- paste('prevalence<', minprev, sep='')
}
}
h(r, s)
}
h <- sapply(data, g, fracmiss, maxlevels, minprev)
if(all(h == '')) return(data)
info <- data.frame(Variable=names(data)[h != ''],
Reason=h[h != ''], row.names=NULL, check.names=FALSE)
if(print) {
cat('\nVariables Removed or Modified\n\n')
print(info)
cat('\n')
}
s <- h == 'grouped categories'
if(any(s)) for(i in which(s))
data[[i]] <- combine.levels(data[[i]], minlev=minprev)
if(any(h != '' & ! s)) data <- data[h == '' | s]
attr(data, 'info') <- info
data
}
utils::globalVariables('..no')
Hmisc/R/Fdebug.r 0000644 0001762 0000144 00000003112 14764570453 013111 0 ustar ligges users #' Debug Printing Function Generator
#'
#' Takes the name of a system `options(opt=)` and checks to see if option `opt` is
#' set to `TRUE`, taking its default value to be `FALSE`. If `TRUE`, a function is
#' created that calls [prn()] to print an object with the object's name in the
#' description along with the option name and the name of the function within which
#' the generated function was called, if any. If option `opt` is not set, a dummy function
#' is generated instead. If `options(debug_file=)` is set when the generated function
#' is called, [prn()] output will be appended to that file name instead of the console.
#' At any time, set `options(debug_file='')` to resume printing to the console.
#'
#' @param opt character string containing an option name
#'
#' @returns a function
#' @export
#' @md
#' @author Fran Harrell
#'
#' @examples
#' dfun <- Fdebug('my_option_name') # my_option_name not currently set
#' dfun
#' dfun(sqrt(2))
#' options(my_option_name=TRUE)
#' dfun <- Fdebug('my_option_name')
#' dfun
#' dfun(sqrt(2))
#' # options(debug_file='/tmp/z') to append output to /tmp/z
#' options(my_option_name=NULL)
Fdebug <- function(opt) {
if(getOption(opt, FALSE)) {
deb <- function(x, txt, callingfun, file=getOption('debug_file', '')) {
if(length(callingfun))
txt <- paste0(txt, ': ', callingfun)
head <- deparse(substitute(x), width.cutoff=500)[1]
prn(x, txt, head=head, file=file)
}
formals(deb)$txt <- opt
formals(deb)$callingfun <- as.character(sys.call(-1)[1])
}
else deb <- function(x, txt, callingfun, file) {NULL}
deb
}
Hmisc/R/varclus.s 0000644 0001762 0000144 00000023216 14377371516 013403 0 ustar ligges users varclus <-
function(x,
similarity=c("spearman","pearson","hoeffding",
"bothpos","ccbothpos"),
type=c("data.matrix","similarity.matrix"),
method="complete",
data=NULL, subset=NULL, na.action=na.retain,
trans=c("square", "abs", "none"),
...)
{
call <- match.call()
type <- match.arg(type)
if(type != "similarity.matrix") similarity <- match.arg(similarity)
trans <- match.arg(trans)
nact <- NULL
if(inherits(x,"formula")) {
form <- x
oldops <- options(contrasts=c("contr.treatment","contr.poly"))
if(length(list(...))) data <- dataframeReduce(data, ...)
x <- list(formula=form, data=data, na.action=na.action, subset=subset)
x <- do.call('model.frame', x)
nam <- names(x)
nv <- length(x)
Terms <- attr(x,'terms')
nact <- attr(x,"na.action")
x <- model.matrix(Terms, x)
if(dimnames(x)[[2]][1]=='(Intercept)') x <- x[,-1]
form <- TRUE
options(oldops)
type <- "data.matrix"
}
else form <- FALSE
n <- NULL
if(mode(x) != "numeric") stop("x matrix must be numeric")
if(type == "data.matrix") { ## assume not a correlation matrix
if(similarity %in% c("bothpos","ccbothpos")) {
isthere <- 1*(! is.na(x))
x[is.na(x)] <- 0
x[x > 0] <- 1
n <- crossprod(isthere)
x <- crossprod(x)/n
if(similarity=='ccbothpos') {
cc <- diag(x) %*% t(diag(x))
cc[row(cc)==col(cc)] <- 0
x <- x - cc
}
}
else if(similarity=="hoeffding") {
D <- hoeffd(x); x <- D$D; n <- D$n
}
else {
D <- rcorr(x, type=similarity)
x <- D$r
x <- switch(trans,
square = x^2,
abs = abs(x),
none = x)
n <- D$n
}
}
else if(diff(dim(x)) != 0)
stop("x must be square to be a similarity matrix")
if(any(is.na(x))) {
cat("Part of the similarity matrix could not be computed:\n")
x[x < .01] <- 0
print(x, digits=2)
stop()
}
w <- if(similarity=='ccbothpos') NULL
else hclust(as.dist(1-x), method=method)
structure(list(call=call, sim=x, n=n, hclust=w, similarity=similarity,
trans=trans, method=method, na.action=nact),
class="varclus")
}
print.varclus <- function(x, abbrev=FALSE, ...)
{
dput(x$call); cat("\n")
if(length(x$na.action))
naprint(x$na.action)
trans <- x$trans
s <- c(hoeffding="30 * Hoeffding D",
spearman=switch(trans,
square = "Spearman rho^2",
abs = "|Spearman rho|",
none = "Spearman rho"),
pearson=switch(trans,
square = "Pearson r^2",
abs = "|Pearson r|",
none = "Pearson r"),
bothpos="Proportion",
ccbothpos="Chance-Corrected Proportion")[x$similarity]
cat("\nSimilarity matrix (",s,")\n\n",sep="")
k <- x$sim
lab <- dimnames(k)[[2]]
if(abbrev)
lab <- abbreviate(lab)
dimnames(k) <- list(lab,lab)
print.default(round(k, 2))
n <- x$n
if(length(n)) {
if(length(n) == 1)
cat("\nNo. of observations used=", n,"\n\n")
else {
cat("\nNo. of observations used for each pair:\n\n")
dimnames(n) <- list(lab,lab)
print(n)
}
}
cat("\nhclust results (method=",x$method,")\n\n",sep="")
print(x$hclust)
invisible()
}
plot.varclus <- function(x, ylab, abbrev=FALSE, legend.=FALSE, loc, maxlen=20,
labels=NULL, ...)
{
trans <- x$trans
if(missing(ylab)) {
s <- c(hoeffding="30 * Hoeffding D",
spearman=switch(trans,
square = expression(paste(Spearman,~rho^2)),
abs = expression(paste(Spearman,~abs(rho))),
none = expression(paste(Spearman,~rho))),
pearson=switch(trans,
square = expression(paste(Pearson,~r^2)),
abs = expression(paste(Pearson,~abs(r))),
none = expression(paste(Pearson,~r))),
bothpos="Proportion",
ccbothpos="Chance-Corrected Proportion")[x$similarity]
if((is.expression(s) && as.character(s)=='NULL') ||
(! is.expression(s) && (is.na(s) || s=='')))
s <- x$similarity
ylab <- s
}
if(legend.) abbrev <- TRUE
if(! length(labels)) labels <- dimnames(x$sim)[[2]]
olabels <- labels
if(abbrev) labels <- abbreviate(labels)
if(! length(x$hclust))
stop('clustering was not done on similarity="ccbothpos"')
plot(x$hclust, labels=labels, ann=FALSE, axes=FALSE, ...)
ya <- pretty(range(1 - x$hclust$height))
axis(2, at=1-ya, labels=format(ya))
title(ylab=ylab)
s <- labels != olabels
if(legend. && any(s)) {
if(missing(loc)) {
cat("Click mouse at upper left corner of legend\n")
loc <- locator(1)
}
olabels <- ifelse(nchar(olabels)>maxlen, substring(olabels,1,maxlen),
olabels)
text(loc, paste(paste(labels[s],":",olabels[s],"\n"),
collapse=""), adj=0)
}
invisible()
}
na.retain <- function(mf) mf
naclus <- function(df, method="complete")
{
ismiss <- function(x) if(is.character(x)) is.na(x) | x=='' else is.na(x)
na <- sapply(df, ismiss) * 1
n <- nrow(na)
sim <- crossprod(na) / n
res <- varclus(sim, type="similarity.matrix", similarity="Fraction Missing",
method=method)
na.per.obs <- apply(na, 1, sum)
nc <- ncol(na)
mean.na <- rep(NA, nc)
names(mean.na) <- dimnames(na)[[2]]
for(i in 1:nc) {
y <- na[,i] == 1
if(any(y)) mean.na[i] <- mean(na.per.obs[y]) - 1
NULL
}
res$na.per.obs <- na.per.obs
res$mean.na <- mean.na
res
}
naplot <- function(obj, which=c('all','na per var','na per obs','mean na',
'na per var vs mean na'),
...)
{
which <- match.arg(which)
tab <- table(obj$na.per.obs)
na.per.var <- diag(obj$sim)
names(na.per.var) <- dimnames(obj$sim)[[2]]
mean.na <- obj$mean.na
if(which %in% c('all','na per var'))
dotchart2(sort(na.per.var), xlab='Fraction of NAs',
main='Fraction of NAs in each Variable', ...)
if(which %in% c('all','na per obs'))
dotchart2(tab, auxdata=tab,
xlab='Frequency',
main='Number of Missing Variables Per Observation', ...)
if(which %in% c('all','mean na'))
dotchart2(sort(mean.na),
xlab='Mean Number of NAs',
main='Mean Number of Other Variables Missing for\nObservations where Indicated Variable is NA',
...)
if(which %in% c('all','na per var vs mean na')) {
xpd <- par('xpd')
par(xpd=NA)
on.exit(par(xpd=xpd))
plot(na.per.var, mean.na, xlab='Fraction of NAs for Single Variable',
ylab='Mean # Other Variables Missing', type='p')
usr <- par('usr')
eps <- .015*diff(usr[1:2]);
epsy <- .015*diff(usr[3:4])
s <- (1:length(na.per.var))[! is.na(mean.na)]
taken.care.of <- NULL
for(i in s) {
if(i %in% taken.care.of)
next
w <- s[s > i & abs(na.per.var[s]-na.per.var[i]) < eps &
abs(mean.na[s]-mean.na[i]) < epsy]
if(any(w)) {
taken.care.of <- c(taken.care.of, w)
text(na.per.var[i]+eps, mean.na[i],
paste(names(na.per.var[c(i,w)]),collapse='\n'),adj=0)
}
else text(na.per.var[i]+eps, mean.na[i], names(na.per.var)[i], adj=0)
}
}
invisible(tab)
}
plotMultSim <- function(s, x=1:dim(s)[3],
slim=range(pretty(c(0,max(s,na.rm=TRUE)))),
slimds=FALSE,
add=FALSE, lty=par('lty'), col=par('col'),
lwd=par('lwd'), vname=NULL, h=.5, w=.75,
u=.05, labelx=TRUE, xspace=.35)
{
if(! length(vname))
vname <- dimnames(s)[[1]]
p <- dim(s)[1]
if(length(vname) != p) stop('wrong length for vname')
if(p != dim(s)[2])
stop('similarity matrix not square')
if(length(x) != dim(s)[3])
stop('length of x differs from extent of 3rd dimension of s')
if(! add) {
plot(c(-xspace,p+.5),c(.5,p+.25), type='n', axes=FALSE, xlab='',ylab='')
if(labelx)
text(1:p, rep(.6,p), vname, adj=.5)
text(rep(.5,p), 1:p, vname, adj=1)
}
scaleit <- function(x, xlim, lim) lim[1] +
(x - xlim[1]) / diff(xlim) * diff(lim)
if(slimds) {
slim.diag <- -1e10
for(k in 1:length(x)) {
sk <- s[,,k]
r <- max(diag(sk))
slim.diag <- max(slim.diag, r)
}
slim.diag <- range(pretty(c(0,slim.diag)))
slim.offdiag <- slim.diag - diff(slim.diag)/2
}
rx <- range(x)
rxe <- c(rx[1] - u * diff(rx), rx[2] + u * diff(rx))
for(i in 1 : p) {
for(j in 1 : p) {
if((i == j) && all(s[i,j,] == 1))
next
sl <- if(slimds) if(i==j) slim.diag
else slim.offdiag
else slim
sle <- c(sl[1]-u*diff(sl), sl[2]+u*diff(sl))
if(! add) {
lines(c(i-w/2,i+w/2,i+w/2,
i-w/2,i-w/2),
c(j-h/2,j-h/2,j+h/2,
j+h/2,j-h/2), col=gray(.5), lwd=.65)
xc <- rep(i-w/2-u/3,2)
yc <- scaleit(sl, sle, c(j-h/2,j+h/2))
if(i==1 && j<=2)
{
text(xc, yc,
format(sl,digits=2), adj=1, cex=.7)
segments(rep(xc+u/8,2),yc,
rep(xc+u/3,2),yc)
}
}
lines(scaleit(x, rxe, c(i-w/2,i+w/2)),
scaleit(s[i,j,], sle, c(j-h/2,j+h/2)),
lty=lty, lwd=lwd, col=col)
if(! add && slimds && (i != j))
lines(c(i-w/2,i+w/2),
rep(scaleit(0, sle, c(j-h/2,j+h/2)),2),
col=gray(.5))
}
}
invisible(slim)
}
Hmisc/R/redun.s 0000644 0001762 0000144 00000024467 14622126032 013032 0 ustar ligges users redun <- function(formula, data=NULL, subset=NULL,
r2=.9, type=c('ordinary','adjusted'),
nk=3, tlinear=TRUE, rank=qrank, qrank=FALSE,
allcat=FALSE, minfreq=0,
iterms=FALSE, pc=FALSE,
pr=FALSE, ...)
{
acall <- match.call()
type <- match.arg(type)
if(inherits(formula, 'formula')) {
a <- as.character(formula)
if(length(a) == 2 && a[1] == '~' && a[2] == '.' &&
length(list(...))) data <- dataframeReduce(data, ...)
Terms <- terms(formula, specials='I', data=data)
# model.frame does not remove variables with a minus sign in front
# of their names, but these are not in term.labels
allvars <- setdiff(as.character(attr(Terms, 'variables')), 'list')
keptvars <- attr(Terms, 'term.labels')
if(length(allvars) > length(keptvars)) {
formula <- as.formula(paste('~', paste(keptvars, collapse='+')))
Terms <- terms(formula, specials='I', data=data)
}
m <- list(formula=formula, data=data, subset=subset,
na.action=na.delete)
data <- do.call('model.frame', m)
data <- data[attr(Terms, 'term.labels')]
nam <- names(data)
linear <- nam[attr(Terms,'specials')$I]
na.action <- attr(data, 'na.action')
if(pr) naprint(na.action)
} else {
if(! is.matrix(formula))
stop("formula must be a numeric matrix when it's not an actual formula")
data <- as.data.frame(formula)
formula <- NULL
na.action <- NULL
nam <- names(data)
p <- length(data)
linear <- rep(FALSE, length(data))
}
p <- length(data)
n <- nrow(data)
cat.levels <- vector('list',p)
names(cat.levels) <- nam
vtype <- rep(if(qrank) 'q' else if(rank) 'r' else 's', p)
names(vtype) <- nam
enough <- rep(TRUE, p)
if(rank) nk <- 0
for(i in 1:p)
{
xi <- data[[i]]
ni <- nam[i]
iscat <- FALSE
if(is.character(xi))
{
xi <- as.factor(xi)
lev <- levels(xi)
iscat <- TRUE
}
else if(is.factor(xi))
{
lev <- levels(xi)
iscat <- TRUE
}
if(iscat)
{
data[[i]] <- as.integer(xi)
cat.levels[[ni]] <- lev
vtype[ni] <- 'c'
if(minfreq > 0 && sum(table(xi) >= minfreq) < 2) enough[i] <- FALSE
}
else
{
u <- unique(xi)
if(length(u) == 1)
{
warning(paste(ni,'is constant'))
enough[i] <- FALSE
}
if(minfreq > 0 && length(u)==2 && sum(table(xi) >= minfreq) < 2)
enough[i] <- FALSE
if(nk==0 || length(u) < 3 || ni %in% linear)
vtype[ni] <- if(vtype[ni] == 'q') 'r' else 'l'
}
}
toofew <- nam[! enough]
if(length(toofew))
{
p <- sum(enough)
nam <- nam[enough]
cat.levels <- cat.levels[enough]
vtype <- vtype[enough]
data <- data[enough]
}
dfs <- c(l=1, s=nk - 1, r=1, q=2, c=0)
xdf <- dfs[vtype]
j <- vtype=='c'
if(any(j)) for(i in which(j)) xdf[i] <- length(cat.levels[[i]]) - 1
names(xdf) <- nam
orig.df <- sum(xdf)
X <- matrix(NA, nrow=n, ncol=orig.df)
st <- en <- integer(p)
start <- 1
for(i in 1:p)
{
xi <- data[[i]]
if(vtype[i] %in% c('r', 'q')) {
xi <- rank(xi) / length(xi)
x <- if(vtype[i] == 'q') cbind(xi, xi ^ 2) else as.matrix(xi)
}
else x <- aregTran(xi, vtype[i], nk)
st[i] <- start
nc <- ncol(x)
xdf[i]<- nc
end <- start + nc - 1
en[i] <- end
if(end > orig.df) stop('program logic error')
X[, start : end] <- x
start <- end + 1
}
if(end < orig.df) X <- X[, 1:end, drop=FALSE]
## if couldn't derive the requested number of knots in splines
fcan <- function(ix, iy, X, st, en, vtype, tlinear, type,
allcat, r2, minfreq)
{
## Get all subscripts for variables in the right hand side
k <- rep(FALSE, ncol(X))
for(i in ix) k[st[i] : en[i]] <- TRUE
ytype <- if(tlinear && vtype[iy] %in% c('s', 'q')) 'l' else vtype[iy]
Y <- if(ytype=='l') X[, st[iy], drop=FALSE] else
X[, st[iy] : en[iy], drop=FALSE]
d <- dim(Y); n <- d[1]; ny <- d[2]
f <- cancor(X[, k, drop=FALSE], Y)
R2 <- f$cor[1]^2
if(type=='adjusted')
{
dof <- sum(k) + ny - 1
R2 <- max(0, 1 - (1 - R2) * (n - 1) / (n - dof - 1))
}
ycoef <- f$ycoef[, 1]
yscore <- matxv(Y, ycoef) * sqrt(n - 1) - sum(ycoef * f$ycenter)
## If variable to possibly remove is categorical with more than 2
## categories (more than one dummy variable) make sure ALL frequent
## categories are redundant (not just the linear combination of
## dummies) if allcat is TRUE. Do this by substituting for R^2 the
## minimum R^2 over predicting each dummy variable.
if(R2 > r2 && allcat && ytype=='c' && (en[iy] > st[iy]))
{
for(j in st[iy] : en[iy])
{
y <- X[, j, drop=FALSE]
if(sum(y) >= minfreq && n - sum(y) >= minfreq)
{
f <- cancor(X[, k, drop=FALSE], y)
R2c <- f$cor[1]^2
if(type=='adjusted')
{
dof <- sum(k)
R2c <- max(0, 1 - (1 - R2c)*(n-1)/(n-dof-1))
}
R2 <- min(R2, R2c, na.rm=TRUE)
}
}
}
list(R2=R2, yscore=yscore)
}
if(iterms)
{
nc <- ncol(X)
nm <- NULL
for(i in 1:p)
{
m <- nam[i]
np <- en[i] - st[i] + 1
if(np > 1) for(j in 1:(np-1))
m <- c(m, paste(nam[i], paste(rep("'", j), collapse=''), sep=''))
nm <- c(nm, m)
if(pc) X[,st[i]:en[i]] <- prcomp(X[,st[i]:en[i]], scale=TRUE)$x
}
colnames(X) <- nm
p <- nc
nam <- nm
st <- en <- 1:nc
vtype <- rep('l', nc)
}
In <- 1:p; Out <- integer(0)
r2r <- numeric(0)
r2l <- scores <- list()
for(i in 1:p) {
if(pr) cat('Step',i,'of a maximum of', p, '\r')
## For each variable currently on the right hand side ("In")
## find out how well it can be predicted from all the other "In" variables
if(length(In) < 2) break
Rsq <- In * 0
l <- 0
for(j in In)
{
l <- l + 1
k <- setdiff(In, j)
fc <- fcan(k, j, X, st, en, vtype, tlinear, type,
allcat, r2, minfreq)
Rsq[l] <- fc$R2
if(! length(scores[[nam[j]]])) scores[[nam[j]]] <- fc$yscore
}
if(i==1) {Rsq1 <- Rsq; names(Rsq1) <- nam[In]}
if(max(Rsq) < r2) break
removed <- In[which.max(Rsq)]
r2removed <- max(Rsq)
## Check that all variables already removed can be predicted
## adequately if new variable 'removed' is removed
k <- setdiff(In, removed)
r2later <- NULL
if(length(Out))
{
r2later <- Out*0
names(r2later) <- nam[Out]
l <- 0
for(j in Out)
{
l <- l+1
fc <- fcan(k, j, X, st, en, vtype, tlinear, type, allcat, r2, minfreq)
r2later[l] <- fc$R2
if(! length(scores[[nam[j]]])) scores[[nam[j]]] <- fc$yscore
}
if(min(r2later) < r2) break
}
Out <- c(Out, removed)
In <- setdiff(In, Out)
r2r <- c(r2r, r2removed)
if(length(r2later)) r2l[[i]] <- r2later
}
if(length(r2r)) names(r2r) <- nam[Out]
if(length(r2l)) names(r2l) <- nam[Out]
if(pr) cat('\n')
structure(list(call=acall, formula=formula,
In=nam[In], Out=nam[Out], toofew=toofew,
rsquared=r2r, r2later=r2l, rsq1=Rsq1,
n=n, p=p, rank=rank, qrank=qrank,
na.action=na.action,
vtype=vtype, tlinear=tlinear,
allcat=allcat, minfreq=minfreq, nk=nk, df=xdf,
cat.levels=cat.levels,
r2=r2, type=type, scores=do.call('cbind', scores)),
class='redun')
}
print.redun <- function(x, digits=3, long=TRUE, ...)
{
prcvec <- function(w) cat(strwrap(paste(w, collapse=' ')), '', sep='\n')
cat("\nRedundancy Analysis\n\n")
if(length(x$formula)) {
print(x$formula)
cat("\n")
}
cat('n:',x$n,'\tp:',x$p, '\tnk:',x$nk,'\n')
cat('\nNumber of NAs:\t', length(x$na.action$omit), '\n')
a <- x$na.action
if(length(a)) naprint(a)
ranks <- 'rank' %in% names(x) && x$rank
if(ranks)
cat('\nAnalysis used ranks',
if(x$qrank) 'and square of ranks', '\n')
if(x$tlinear)
cat('\nTransformation of target variables forced to be',
if(ranks) 'linear in the ranks\n' else 'linear\n')
if(x$allcat)
cat('\nAll levels of a categorical variable had to be redundant before the\nvariable was declared redundant\n')
if(x$minfreq > 0)
cat('\nMinimum category frequency required for retention of a binary or\ncategorical variable:', x$minfreq, '\n')
if(length(x$toofew))
{
cat('\nBinary or categorical variables removed because of inadequate frequencies:\n\n')
cat(x$toofew, '\n')
}
cat('\nR-squared cutoff:', x$r2, '\tType:', x$type,'\n')
if(long)
{
cat('\nR^2 with which each variable can be predicted from all other variables:\n\n')
print(round(x$rsq1, digits))
if(x$allcat)
cat('\n(For categorical variables the minimum R^2 for any sufficiently\nfrequent dummy variable is displayed)\n\n')
}
if(!length(x$Out))
{
cat('\nNo redundant variables\n\n')
return(invisible())
}
cat('\nRendundant variables:\n\n')
prcvec(x$Out)
cat('\nPredicted from variables:\n\n')
prcvec(x$In)
w <- x$r2later
vardel <- names(x$rsquared)
if(! long)
{
print(data.frame('Variable Deleted'=vardel,
'R^2'=round(x$rsquared,digits),
row.names=NULL, check.names=FALSE))
return(invisible())
}
later <- rep('', length(vardel))
i <- 0
for(v in vardel)
{
i <- i + 1
for(z in w)
{
if(length(z) && v %in% names(z))
later[i] <- paste(later[i], round(z[v], digits), sep=' ')
}
}
print(data.frame('Variable Deleted'=vardel,
'R^2'=round(x$rsquared,digits),
'R^2 after later deletions'=later,
row.names=NULL,
check.names=FALSE))
invisible()
}
Hmisc/R/ordGroupBoot.r 0000644 0001762 0000144 00000005767 14740205330 014343 0 ustar ligges users #' Minimally Group an Ordinal Variable So Bootstrap Samples Will Contain All Distinct Values
#'
#' When bootstrapping models for ordinal Y when Y is fairly continuous, it is frequently the case that one or more bootstrap samples will not include one or more of the distinct original Y values. When fitting an ordinal model (including a Cox PH model), this means that an intercept cannot be estimated, and the parameter vectors will not align over bootstrap samples. To prevent this from happening, some grouping of Y may be necessary. The `ordGroupBoot` function uses [Hmisc::cutGn()] to group Y so that the minimum number in any group is guaranteed to not exceed a certain integer `m`. `ordGroupBoot` tries a range of `m` and stops at the lowest `m` such that either all `B` tested bootstrap samples contain all the original distinct values of Y (if `B`>0), or that the probability that a given sample of size `n` with replacement will contain all the distinct original values exceeds `aprob` (`B`=0). This probability is computed approximately using an approximation to the probability of complete sample coverage from the _coupon collector's problem_ and is quite accurate for our purposes.
#'
#' @param y a numeric vector
#' @param B number of bootstrap samples to test, or zero to use a coverage probability approximation
#' @param m range of minimum group sizes to test; the default range is usually adequate
#' @param what specifies that either the mean `y` in each group should be returned, a `factor` version of this with interval endpoints in the levels, or the computed value of `m` should be returned
#' @param aprob minimum coverage probability sought
#' @param pr set to `FALSE` to not print the computed value of the minimum `m` satisfying the needed condition
#'
#' @returns a numeric vector corresponding to `y` but grouped, containing eithr the mean of `y` in each group or a factor variable representing grouped `y`, either with the minimum `m` that satisfied the required sample covrage
#' @seealso [cutGn()]
#' @export
#' @author Frank Harrell
#' @md
#'
#' @examples
#' set.seed(1)
#' x <- c(1:6, NA, 7:22)
#' ordGroupBoot(x, m=5:10)
#' ordGroupBoot(x, m=5:10, B=5000, what='factor')
ordGroupBoot <- function(y, B=0, m=7:min(15, floor(n / 3)),
what=c('mean', 'factor', 'm'),
aprob=0.9999, pr=TRUE) {
what <- match.arg(what)
n <- sum(! is.na(y))
for(mm in m) {
yr <- cutGn(y, m=mm, what=if(what == 'm') 'mean' else what)
if(B == 0) {
tab <- table(yr)
p <- 1 - sum(exp(- tab))
if(p > aprob) {
if(pr) cat('Minimum m:', mm, '\n')
return(if(what == 'm') mm else yr)
}
next
}
yru <- unique(yr)
for(i in 1 : B) {
yb <- sample(yr, replace=TRUE)
if(! all(yru %in% yb)) break
}
if(i == B) {
if(pr) cat('Minimum m:', mm, '\n')
return(if(what == 'm') mm else yr)
}
}
stop('no value of m tested will sufficiently guarantee coverage of original values in samples with replacement')
}
Hmisc/R/score.binary.s 0000644 0001762 0000144 00000002114 12250357227 014303 0 ustar ligges users score.binary <- function(..., fun=max, points=1:p,
na.rm=funtext=='max', retfactor=TRUE)
{
x <- list(...)
p <- length(x)
nam <- (as.character(sys.call())[-1])[1:p]
x <- matrix(unlist(x), ncol=p)
if(!missing(points)) {
if(length(points)==1)
points <- rep(points, p)
if(length(points)!=p)
stop('wrong length for points')
}
x <- x * rep(points, rep.int(nrow(x),p))
funtext <- as.character(substitute(fun))
if(funtext=='max' && !missing(points) && retfactor)
warning('points do not matter for fun=max with retfactor=T\nas long as they are in ascending order')
if(!missing(retfactor) && retfactor && funtext!='max')
stop('retfactor=T only applies to fun=max')
xna <- apply(x, 1, function(x) any(is.na(x)))
funargs <- as.list(args(fun))
funargs <- funargs[-length(funargs)]
if(any(names(funargs) == "na.rm")) {
x <- apply(x, 1, fun, na.rm=na.rm)
} else {
x <- apply(x, 1, fun)
}
if(!na.rm)
x[x==0 & xna] <- NA
if(retfactor && funtext=='max')
factor(x, c(0,points), c("none",nam))
else x
}
Hmisc/R/gbayesSeqSim.r 0000644 0001762 0000144 00000023516 14473140370 014307 0 ustar ligges users ##' Simulate Bayesian Sequential Treatment Comparisons Using a Gaussian Model
##'
##' Simulate a sequential trial under a Gaussian model for parameter estimates, and Gaussian priors using simulated estimates and variances returned by `estSeqSim`. For each row of the data frame `est` and for each prior/assertion combination, computes the posterior probability of the assertion.
##' @title gbayesSeqSim
##' @param est data frame created by `estSeqSim()`
##' @param asserts list of lists. The first element of each list is the user-specified name for each assertion/prior combination, e.g., `"efficacy"`. The other elements are, in order, a character string equal to "<", ">", or "in", a parameter value `cutoff` (for "<" and ">") or a 2-vector specifying an interval for "in", and either a prior distribution mean and standard deviation named `mu` and `sigma` respectively, or a parameter value (`"cutprior"`) and tail area `"tailprob"`. If the latter is used, `mu` is assumed to be zero and `sigma` is solved for such that P(parameter > 'cutprior') = P(parameter < - 'cutprior') = `tailprob`.
##' @return a data frame with number of rows equal to that of `est` with a number of new columns equal to the number of assertions added. The new columns are named `p1`, `p2`, `p3`, ... (posterior probabilities), `mean1`, `mean2`, ... (posterior means), and `sd1`, `sd2`, ... (posterior standard deviations). The returned data frame also has an attribute `asserts` added which is the original `asserts` augmented with any derived `mu` and `sigma` and converted to a data frame, and another attribute `alabels` which is a named vector used to map `p1`, `p2`, ... to the user-provided labels in `asserts`.
##' @author Frank Harrell
##' @seealso `gbayes()`, `estSeqSim()`, `simMarkovOrd()`, `estSeqMarkovOrd()`
##' @examples
##' \dontrun{
##' # Simulate Bayesian operating characteristics for an unadjusted
##' # proportional odds comparison (Wilcoxon test)
##' # For 100 simulations, 5 looks, 2 true parameter values, and
##' # 2 assertion/prior combinations, compute the posterior probability
##' # Use a low-level logistic regression call to speed up simuluations
##' # Use data.table to compute various summary measures
##' # Total simulation time: 2s
##' lfit <- function(x, y) {
##' f <- rms::lrm.fit(x, y)
##' k <- length(coef(f))
##' c(coef(f)[k], vcov(f)[k, k])
##' }
##' gdat <- function(beta, n1, n2) {
##' # Cell probabilities for a 7-category ordinal outcome for the control group
##' p <- c(2, 1, 2, 7, 8, 38, 42) / 100
##'
##' # Compute cell probabilities for the treated group
##' p2 <- pomodm(p=p, odds.ratio=exp(beta))
##' y1 <- sample(1 : 7, n1, p, replace=TRUE)
##' y2 <- sample(1 : 7, n2, p2, replace=TRUE)
##' list(y1=y1, y2=y2)
##' }
##'
##' # Assertion 1: log(OR) < 0 under prior with prior mean 0.1 and sigma 1 on log OR scale
##' # Assertion 2: OR between 0.9 and 1/0.9 with prior mean 0 and sigma computed so that
##' # P(OR > 2) = 0.05
##' asserts <- list(list('Efficacy', '<', 0, mu=0.1, sigma=1),
##' list('Similarity', 'in', log(c(0.9, 1/0.9)),
##' cutprior=log(2), tailprob=0.05))
##'
##' set.seed(1)
##' est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200),
##' gendat=gdat,
##' fitter=lfit, nsim=100)
##' z <- gbayesSeqSim(est, asserts)
##' head(z)
##' attr(z, 'asserts')
##'
##' # Compute the proportion of simulations that hit targets (different target posterior
##' # probabilities for efficacy vs. similarity)
##'
##' # For the efficacy assessment compute the first look at which the target
##' # was hit (set to infinity if never hit)
##' require(data.table)
##' z <- data.table(z)
##' u <- z[, .(first=min(p1 > 0.95)), by=.(parameter, sim)]
##' # Compute the proportion of simulations that ever hit the target and
##' # that hit it by the 100th subject
##' u[, .(ever=mean(first < Inf)), by=.(parameter)]
##' u[, .(by75=mean(first <= 100)), by=.(parameter)]
##' }
##' @md
gbayesSeqSim <- function(est, asserts) {
nas <- length(asserts)
alabels <- character(nas)
for(i in 1 : nas) {
a <- asserts[[i]]
nam <- names(a)
if(nam[1] == '') nam[1] <- 'label'
if(nam[2] == '') nam[2] <- 'dir'
if(nam[3] == '') nam[3] <- 'cutoff'
names(a) <- nam
if((length(a$cutoff) == 2) != (a$dir == 'in'))
stop('mismatch of direction and length of cutoff in asserts')
if(any(c('mu', 'sigma') %nin% names(a))) {
a$mu <- 0
a$sigma <- - a$cutprior / qnorm(a$tailprob)
if(a$sigma <= 0) stop('error in specification of cutoff or tailprob')
}
w <- format(round(a$cutoff, 3))
a$assertion <- paste(if(a$dir %in% c('<','>')) a$dir,
if(length(a$cutoff) == 2)
paste0('[', w[1], ',', w[2], ']')
else
w[1])
asserts[[i]] <- a
alabels[i] <- a$label
}
N <- nrow(est)
ests <- est$est
vests <- est$vest
## For each simulated parameter estimate and variance compute nas
## posterior probabilities
for(i in 1 : nas) {
a <- asserts[[i]]
dir <- a$dir
cutoff <- a$cutoff
mu <- a$mu
sigma2 <- (a$sigma) ^ 2
var.post <- 1. / (1. / sigma2 + 1. / vests)
mean.post <- (mu / sigma2 + ests / vests) * var.post
sd.post <- sqrt(var.post)
pp <-
switch(dir,
'<' = pnorm(cutoff, mean.post, sd.post),
'>' = pnorm(cutoff, mean.post, sd.post, lower.tail=FALSE),
'in' = pnorm(cutoff[2], mean.post, sd.post) -
pnorm(cutoff[1], mean.post, sd.post))
label(pp) <- a$label
est[[paste0('p', i)]] <- pp
est[[paste0('mean', i)]] <- mean.post
est[[paste0('sd', i)]] <- sd.post
}
a <- asserts
w <- NULL
for(j in 1 : nas) {
a <- asserts[[j]]
a$dir <- a$cutoff <- NULL
if(any(c('cutprior', 'tailprob') %nin% names(a)))
a$cutprior <- a$tailprob <- NA
w <- rbind(w, as.data.frame(a))
}
attr(est, 'asserts') <- w
names(alabels) <- paste0('p', 1 : nas)
attr(est, 'alabels') <- alabels
est
}
##' Simulate Comparisons For Use in Sequential Clinical Trial Simulations
##'
##' Simulates sequential clinical trials. Looks are done sequentially at observation numbers given in the vector `looks` with the earliest possible look being at observation 2. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function `gendat` that given a true effect of `parameter` and the two sample sizes (for treatment groups 1 and 2) returns a list with vectors `y1` and `y2` containing simulated data. The user also provides a function `fitter` with arguments `x` (group indicator 0/1) and `y` (response variable) that returns a 2-vector containing the effect estimate and its variance. `parameter` is usually on the scale of a regression coefficient, e.g., a log odds ratio.
##' @title estSeqSim
##' @param parameter vector of true parameter (effects; group differences) values
##' @param looks integer vector of observation numbers at which posterior probabilities are computed
##' @param gendat a function of three arguments: true parameter value (scalar), sample size for first group, sample size for second group
##' @param fitter a function of two arguments: 0/1 group indicator vector and the dependent variable vector
##' @param nsim number of simulations (default is 1)
##' @param progress set to `TRUE` to send current iteration number to the console
##' @return a data frame with number of rows equal to the product of `nsim`, the length of `looks`, and the length of `parameter`.
##' @author Frank Harrell
##' @seealso `gbayesSeqSim()`, `simMarkovOrd()`, `estSeqMarkovOrd()`
##' @examples
##' if (requireNamespace("rms", quietly = TRUE)) {
##' # Run 100 simulations, 5 looks, 2 true parameter values
##' # Total simulation time: 2s
##' lfit <- function(x, y) {
##' f <- rms::lrm.fit(x, y)
##' k <- length(coef(f))
##' c(coef(f)[k], vcov(f)[k, k])
##' }
##' gdat <- function(beta, n1, n2) {
##' # Cell probabilities for a 7-category ordinal outcome for the control group
##' p <- c(2, 1, 2, 7, 8, 38, 42) / 100
##'
##' # Compute cell probabilities for the treated group
##' p2 <- pomodm(p=p, odds.ratio=exp(beta))
##' y1 <- sample(1 : 7, n1, p, replace=TRUE)
##' y2 <- sample(1 : 7, n2, p2, replace=TRUE)
##' list(y1=y1, y2=y2)
##' }
##'
##' set.seed(1)
##' est <- estSeqSim(c(0, log(0.7)), looks=c(50, 75, 95, 100, 200),
##' gendat=gdat,
##' fitter=lfit, nsim=100)
##' head(est)
##' }
##' @md
estSeqSim <- function(parameter, looks, gendat, fitter, nsim=1,
progress=FALSE) {
looks <- sort(looks)
nlook <- length(looks)
N <- max(looks)
np <- length(parameter)
nc <- nsim * nlook * np
parm <- est <- vest <- numeric(nc)
look <- sim <- integer(nc)
## For each simulation and each parameter value, simulate data for the
## whole study
is <- 0
for(isim in 1 : nsim) {
if(progress) cat('Simulation', isim, '\r')
for(param in parameter) {
X <- sample(0 : 1, N, replace=TRUE)
dat <- gendat(param, sum(X == 0), sum(X == 1))
Y <- rep(NA, N)
Y[X == 0] <- dat$y1
Y[X == 1] <- dat$y2
## For each look compute the parameter estimate and its variance
for(l in looks) {
f <- fitter(X[1 : l], Y[1 : l])
is <- is + 1
sim[is] <- isim
parm[is] <- param
look[is] <- l
est[is] <- f[1]
vest[is] <- f[2]
} # end looks
} # end param
} # end sim
if(progress) cat('\n')
data.frame(sim=sim, parameter=parm, look=look,
est=est, vest=vest)
}
Hmisc/R/ecdfSteps.r 0000644 0001762 0000144 00000002720 14401222647 013624 0 ustar ligges users ##' Compute Coordinates of an Empirical Distribution Function
##'
##' For a numeric vector uses the R built-in `ecdf` function to compute
##' coordinates of the ECDF, with extension slightly below and above the
##' range of `x` by default. This is useful for `ggplot2` where the ECDF may need to be transformed. The returned object is suitable for creating stratified statistics using `data.table` and other methods.
##' @title ecdfSteps
##' @param x numeric vector, possibly with `NA`s that are ignored
##' @param extend a 2-vector do extend the range of x (low, high). Set `extend=FALSE` to not extend `x`, or leave it missing to extend it 1/20th of the observed range on other side.
##' @return a list with components `x` and `y`
##' @author Frank Harrell
##' @md
##' @seealso [stats::ecdf()]
##' @examples
##' ecdfSteps(0:10)
##' \dontrun{
##' # Use data.table for obtaining ECDFs by country and region
##' w <- d[, ecdfSteps(z, extend=c(1,11)), by=.(country, region)] # d is a DT
##' # Use ggplot2 to make one graph with multiple regions' ECDFs
##' # and use faceting for countries
##' ggplot(w, aes(x, y, color=region)) + geom_step() +
##' facet_wrap(~ country)
##' }
ecdfSteps <- function(x, extend) {
u <- sort(unique(x))
if(missing(extend) || is.numeric(extend) || extend) {
if(missing(extend)) {
r <- range(u)
eps <- diff(r) / 20.
extend <- c(r[1] - eps, r[2] + eps)
}
u <- c(extend[1], u, extend[2])
}
list(x=u, y=ecdf(x)(u))
}
Hmisc/R/hidingTOC.r 0000644 0001762 0000144 00000017104 13417776562 013536 0 ustar ligges users #' Moving and Hiding Table of Contents
#'
#' Moving and hiding table of contents for Rmd HTML documents
#'
#' \code{hidingTOC} creates a table of contents in a Rmd document that
#' can be hidden at the press of a button. It also generate buttons that allow
#' the hiding or unhiding of the diffrent level depths of the table of contents.
#'
#' @param buttonLabel the text on the button that hides and unhides the
#' table of contents. Defaults to \code{Contents}.
#' @param levels the max depth of the table of contents that it is desired to
#' have control over the display of. (defaults to 3)
#' @param posCollapse if \code{'margin'} then display the depth select buttons
#' vertically along the side of the page choosen by \code{buttonSide}. If
#' \code{'top'} then display the depth select buttons horizontally under the
#' button that hides the TOC. Defaults to \code{'margin'}. \code{'bottom'} is
#' currently unimplemented.
#' @param tocSide which side of the page should the table of contents be placed
#' on. Can be either \code{'right'} or \code{'left'}. Defaults to
#' \code{'right'}
#' @param buttonSide which side of the page should the button that hides the TOC
#' be placed on. Can be either \code{'right'} or \code{'left'}. Defaults to
#' \code{'right'}
#' @param hidden Logical should the table of contents be hidden at page load
#' Defaults to \code{FALSE}
#'
#' @return a HTML formated text string to be inserted into an markdown document
#' @author Thomas Dupont
#' @examples
#' \dontrun{
#' hidingTOC()
#' }
#' @export
hidingTOC <- function(buttonLabel="Contents", levels=3,
tocSide=c('right','left'), buttonSide=c('right','left'),
posCollapse=c('margin','top','bottom'), hidden=FALSE) {
## Make javascript functions that controll the hiding and unhiding of
## different levels of the TOC
## This is done by switching on elements with id equal to TOC or class equal
## tocify-subheader and with attribute data-tag values less then or equal to
## the requested level and by switching off elements with attribute data-tag
## values greater then the requested level.
makeLevelExpandFun <- function(level, maxLevels) {
## Sanity check to make sure that level is never greater then maxLevels
if (level > maxLevels)
stop("level value ", level, " is greater then maxLevels value ", maxLevels)
## There are 2 special cases.
return(if (level == 1L) {
## Where the reqested level equals 1. Unhide the element with id
## equal to TOC and hide the elements with class equal to
## tocify-subheader.
'function expandLevel1(){$("#TOC").toggle(true);$(".tocify-subheader").toggle(false)}'
} else if (level == maxLevels) {
## Where the requested level is equal to maxLevels then just unhide
## all elements with id equal to TOC or class equal to
## tocify-subheader.
paste0('function expandLevel', level, '(){$("#TOC,.tocify-subheader").toggle(true)}')
} else {
## General case level greater then 1 and less then maxLevels. Unhide
## the elements with id equal to TOC or class equal to
## tocify-subheader with attribute data-tag values less then or
## equal to the requested level. Then hide elements with class
## tocify-subheader with attribute data-tag values greater then the
## requested level but less then or equal to maxLevels.
paste0("function expandLevel", level, '(){$("#TOC,',
paste0('.tocify-subheader[data-tag=',seq.int(2L, level),']', collapse=','),
'").toggle(true);$("',
paste0('.tocify-subheader[data-tag=',seq.int(level+1L, maxLevels),']', collapse=','),
'").toggle(false)}')
})
}
## basic HTML skeleton to inwhich to place various values
skeleton <- '
%s %s
'
buttonSide <- match.arg(buttonSide)
tocSide <- match.arg(tocSide)
posCollapse <- match.arg(posCollapse)
if(posCollapse == 'bottom') {
stop("arguement posCollapse = bottom is not supported yet")
}
if(tocSide != buttonSide)
stop("non-symmetric values for tocSide and buttonSide are not supported")
if(!missing(hidden) && (length(hidden) == 0L || (!is.logical(hidden) && !is.numeric(hidden))))
stop("hidden must be of logical type")
levelSequence <- seq_len(levels)
## CSS text
cssText <- paste0(".toc-level-select-group{clear:both}#TOC{position:fixed;top:0;",
tocSide, ':0;margin:',
switch(posCollapse,
margin = '23px ',
top = '44px '),
switch(posCollapse,
margin = '20px ',
top = '0px '),
'20px ',
switch(posCollapse,
margin = '20px',
top = '0px'),
';z-index:9',
if(hidden) ";display:none",
'}#toc-controls{position:fixed;top:0;',
buttonSide, ':0;margin:0px}.col-md-3{width: 0%}.col-md-9{width: 100%}',
'div.container-fluid.main-container{max-width:none;margin-left:0px;margin-right:none}')
## Generate the javascript text needed for the TOC level display buttons'
## functions.
scriptText <- paste0(vapply(levelSequence, makeLevelExpandFun, "", maxLevels=levels),
collapse="")
## Which side the buttons should be pulled to.
pullClass <- if(buttonSide == "right") {
"pull-right"
} else {
"pull-left"
}
## Generate the hiding button HTML text.
buttonText <- paste0('')
## Generate the level buttons' HTML text.
levelButtonText <- paste0('
',
paste0('',
collapse=switch(posCollapse,
margin = " ",
top="",
stop("Unknown value for posCollapse ", posCollapse))),
"
")
return(sprintf(skeleton, cssText, scriptText, buttonText, levelButtonText))
}
Hmisc/R/rmClose.r 0000644 0001762 0000144 00000002370 14763374761 013331 0 ustar ligges users #' rmClose
#'
#' Remove close values from a numeric vector that are not at the outer limtis. This is useful for removing axis breaks that overlap when plotting.
#'
#' @param x a numeric vector with no `NA`s
#' @param minfrac minimum allowed spacing between consecutive ordered `x`, as a fraction of the range of `x`
#'
#' @returns a sorted numeric vector of non-close values of `x`
#' @export
#' @md
#' @author Frank Harrell
#' @examples
#' rmClose(c(1, 2, 4, 47, 48, 49, 50), minfrac=0.07)
rmClose <- function(x, minfrac=0.05) {
x <- unique(sort(x))
mindist <- minfrac * diff(range(x))
n <- length(x)
selected <- x[1]
last_selected <- x[1] # Always select the first point
for (i in 2 : n) {
if (x[i] - last_selected >= mindist) {
selected <- c(selected, x[i])
last_selected <- x[i]
}
}
return(selected)
}
# Code not used:
# repeat {
# prn(x)
# n <- length(x)
# if(n < 3) return(x)
# if(x[n - 1] > x[n] - mindist) {
# x <- x[- (n - 1)]
# next
# }
# gaps <- diff(c(- mindist * 2, x))
# mind <- min(gaps[- c(1, n)])
# prn(gaps); prn(mind)
# if(mind >= mindist) return(x)
# candidates <- setdiff(which(gaps == mind), c(1, n))
# prn(candidates)
# x <- x[- candidates[1]]
# }
#}
Hmisc/R/rcspline.restate.s 0000644 0001762 0000144 00000014161 14112731553 015174 0 ustar ligges users rcspline.restate <- function(knots, coef, type=c("ordinary","integral"),
x="X", lx=nchar(x),norm=2,
columns=65, before="& &", after="\\",
begin="", nbegin=0,
digits=max(8,.Options$digits))
{
type <- match.arg(type)
k <- length(knots)
if(k<3)
stop("must have >=3 knots in a restricted cubic spline")
p <- length(coef)
if(p == k)
{
Intc <- coef[1]
coef <- coef[-1]
p <- p-1
}
else Intc <- 0
if(k-1 != p)
stop("coef must be of length # knots - 1")
knotnk <- knots[k]
knotnk1 <- knots[k-1]
knot1 <- knots[1]
kd <- if(norm==0) 1 else if(norm==1)(knotnk-knotnk1)^3 else (knotnk-knot1)^2
coef[-1] <- coef[-1]/kd
d <- c(0, knots-knotnk)[1:p]
coefk <- sum(coef*d)/(knotnk-knotnk1)
d <- c(0, knots-knotnk1)[1:p]
coefk1 <- sum(coef*d)/(knotnk1-knotnk)
if(!length(names(coef)))
names(coef) <- paste(x,1:length(coef),sep="")
coef <- c(coef, coefk, coefk1)
names(coef)[k] <- "1st restricted coef"
names(coef)[k+1] <- "2nd restricted coef"
if(type=="integral")
coef <- c(.5*coef[1],.25*coef[-1])
cof <- formatSep(coef, digits)
kn <- formatSep(-knots, digits)
if(Intc!=0)
{
txt <- txt2 <- formatSep(Intc, digits)
if(type=="integral")
{
txt <- paste(txt, "* x")
txt2 <- paste(txt2, '*', x)
}
if(coef[1]>=0)
{
txt <- paste(txt, "+");
txt2 <- paste(txt2, '+')
}
}
else txt <- txt2 <- ""
if(cof[1]!=0)
{
txt <- paste(txt, cof[1],
if(type=="ordinary")"* x"
else "* x^2",
sep="")
txt2 <- paste(txt2, cof[1],
if(type=="ordinary") paste("*",x)
else paste("*",x,"^2"),
sep="")
}
for(i in 2:(p+2))
{
nam <- paste("pmax(x",
if(knots[i-1]<0) "+"
else NULL,
if(knots[i-1]!=0) kn[i-1]
else NULL,
",0)^",
if(type=="ordinary")"3"
else "4",
sep="")
nam2 <- paste("pmax(",x,
if(knots[i-1]<0) "+"
else NULL,
if(knots[i-1]!=0) kn[i-1]
else NULL,
",0)^",
if(type=="ordinary")"3"
else "4",
sep="")
z <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+"
else NULL,
cof[i], "*", nam, sep="")
z2 <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+"
else NULL,
cof[i], "*", nam2, sep="")
txt <- paste(txt , z, sep="")
txt2<- paste(txt2, z2, sep="")
}
func <- parse(text=paste('function(x)', txt))
cof <- formatSep(coef, digits)
kn <- formatSep(-knots, digits)
lcof <- nchar(cof)
cof <- latexSN(cof)
cur <- begin; colcnt <- nbegin; tex <- NULL
if(Intc!=0)
{
fint <- formatSep(Intc, digits)
if(type=="integral")
{
fint <- paste(fint, x)
colcnt <- colcnt+2
}
cur <- paste(cur, fint, sep="")
colcnt <- colcnt + nchar(fint)
if(coef[1]>0)
{
cur <- paste(cur, " + ", sep="");
colcnt <- colcnt+3
}
}
if(coef[1]!=0)
{
sp <- if(substring.location(cof[1],"times")$first > 0) "\\:"
else NULL
cur <- paste(cur, cof[1], sp, x,
if(type=="integral") "^2",
sep="")
##\:=medium space in LaTeX
colcnt <- colcnt+lcof[1]+lx+(type=="integral")
}
tex.names <- character(p+2)
size <- lx+lcof[-1]+nchar(kn)+3
for(i in 2:(p+2))
{
nam <- paste("(", x,
if(knots[i-1]<0) "+"
else NULL,
if(knots[i-1]!=0) kn[i-1]
else NULL,
")_{+}^{",
if(type=="ordinary")"3}"
else "4}",
sep="")
q <- paste(if(coef[i]>=0 & (i>2 | coef[1]!=0 | Intc!=0)) "+"
else NULL,
cof[i], nam, sep="")
n <- size[i-1]
if(colcnt+n > columns)
{
tex <- c(tex, cur)
cur <- ""
colcnt <- 0
}
cur <- paste(cur, q, sep="")
colcnt <- colcnt+n
}
tex <- c(tex, cur)
tex <- paste(before, tex, after)
if(Intc!=0) coef <- c(Intercept=Intc, coef)
attr(coef, "knots") <- knots
attr(coef, "function") <- func
attr(coef, "function.text") <- txt2
attr(coef, "latex") <- tex
names(colcnt) <- NULL
attr(coef, "columns.used") <- colcnt
coef
}
rcsplineFunction <- function(knots, coef=numeric(0), norm=2,
type=c('ordinary', 'integral')) {
type <- match.arg(type)
k <- length(knots)
kd <- if(norm==0) 1 else if(norm==1) knots[k]-knots[k-1] else
(knots[k]-knots[1])^.66666666666666666666666
f <- function(x, knots, coef, kd, type) {
k <- length(knots)
knotnk <- knots[k]
knotnk1 <- knots[k - 1]
knot1 <- knots[1]
if(length(coef) < k) coef <- c(0, coef)
if(type == 'ordinary') {
y <- coef[1] + coef[2] * x
for(j in 1 : (k - 2))
y <- y + coef[j + 2] *
(pmax((x - knots[j]) / kd, 0) ^ 3 +
((knotnk1 - knots[j]) *
pmax((x - knotnk) / kd, 0) ^ 3 -
(knotnk - knots[j]) *
(pmax((x - knotnk1) / kd, 0) ^ 3)) /
(knotnk - knotnk1))
return(y)
}
y <- coef[1] * x + 0.5 * coef[2] * x * x
for(j in 1 : (k - 2))
y <- y + 0.25 * coef[j + 2] * kd *
(pmax((x - knots[j]) / kd, 0) ^ 4 +
((knotnk1 - knots[j]) *
pmax((x - knotnk) / kd, 0) ^ 4 -
(knotnk - knots[j]) *
(pmax((x - knotnk1) / kd, 0) ^ 4)) /
(knotnk - knotnk1))
y
}
formals(f) <- list(x=numeric(0), knots=knots, coef=coef, kd=kd, type=type)
f
}
Hmisc/R/r2describe.r 0000644 0001762 0000144 00000003636 14574670756 013762 0 ustar ligges users #' Summarize Strength of Relationships Using R-Squared From Linear Regression
#'
#' Function to use `leaps::regsubsets()` to briefly describe which variables more strongly predict another variable. Variables are in a numeric matrix and are assumed to be transformed so that relationships are linear (e.g., using `redun()` or `transcan()`.)
#'
#' @title r2describe
#' @param x numeric matrix with 2 or more columns
#' @param nvmax maxmum number of columns of x to use in predicting a given column
#'
#' @return nothing
#' @export
#' @md
#' @author Frank Harrell
#'
#' @examples
#' \dontrun{
#' r <- redun(...)
#' r2describe(r$scores)
#' }
r2describe <- function(x, nvmax=10) {
if(! requireNamespace('leaps', quietly=TRUE))
stop('You must install the leaps package to use r2describe')
if(! is.numeric(x) || ! is.matrix(x))
stop('x must be a numeric matrix')
if(ncol(x) < 2) stop('x must have at least 2 columns')
p <- ncol(x)
nam <- colnames(x)
cat('\nStrongest Predictors of Each Variable With Cumulative R^2\n')
for(k in 1 : p) {
fchar <- capture.output( # don't allow regular output
f <- leaps::regsubsets(x[, -k], x[, k], method='forward',
nbest=1, nvmax=min(p - 1, nvmax)))
s <- summary(f)
w <- s$which[, -1, drop=FALSE] # omit intercept
xnm <- colnames(w)
xadded <- character(0)
for(l in 1 : nrow(w)) {
varnow <- xnm[w[l,]]
varnew <- setdiff(varnow, xadded)
xadded <- c(xadded, varnew)
}
rsq <- structure(s$rsq, names=xadded)
l <- which(rsq >= 0.985)
if(length(l)) rsq <- rsq[1 : min(l)]
cat('\n', nam[k], '\n', sep='')
fw <- character(0)
xadded <- names(rsq)
for(l in 1 : length(rsq))
fw <- paste0(fw, if(l > 1) ' + ', xadded[l],
' (', round(rsq[l], 3), ')')
cat(strwrap(fw), sep='\n')
}
invisible()
}
Hmisc/R/strwrap.s 0000644 0001762 0000144 00000005067 12243661443 013421 0 ustar ligges users if(!exists('strwrap')) {
strwrap <- function (x, width = 0.9 * getOption("width"), indent = 0, exdent = 0,
prefix = "", simplify = TRUE) {
indentString <- paste(rep.int(" ", indent), collapse = "")
exdentString <- paste(rep.int(" ", exdent), collapse = "")
y <- list()
z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]")
for (i in seq(along = z)) {
yi <- character(0)
for (j in seq(along = z[[i]])) {
words <- z[[i]][[j]]
nc <- nchar(words, type = "w")
if (any(is.na(nc))) {
nc0 <- nchar(words)
nc[is.na(nc)] <- nc0[is.na(nc)]
}
if (any(nc == 0)) {
zLenInd <- which(nc == 0)
zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$",
words) + 1))]
if (length(zLenInd) > 0) {
words <- words[-zLenInd]
nc <- nc[-zLenInd]
}
}
if (length(words) == 0) {
yi <- c(yi, "", prefix)
next
}
currentIndex <- 0
lowerBlockIndex <- 1
upperBlockIndex <- integer(0)
lens <- cumsum(nc + 1)
first <- TRUE
maxLength <- width - nchar(prefix, type = "w") -
indent
while (length(lens) > 0) {
k <- max(sum(lens <= maxLength), 1)
if (first) {
first <- FALSE
maxLength <- maxLength + indent - exdent
}
currentIndex <- currentIndex + k
if (nc[currentIndex] == 0)
upperBlockIndex <- c(upperBlockIndex, currentIndex -
1)
else upperBlockIndex <- c(upperBlockIndex, currentIndex)
if (length(lens) > k) {
if (nc[currentIndex + 1] == 0) {
currentIndex <- currentIndex + 1
k <- k + 1
}
lowerBlockIndex <- c(lowerBlockIndex, currentIndex +
1)
}
if (length(lens) > k)
lens <- lens[-(1:k)] - lens[k]
else lens <- NULL
}
nBlocks <- length(upperBlockIndex)
s <- paste(prefix, c(indentString, rep.int(exdentString,
nBlocks - 1)), sep = "")
for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]],
collapse = " "), sep = "")
yi <- c(yi, s, prefix)
}
y <- c(y, list(yi[-length(yi)]))
}
if (simplify)
y <- unlist(y)
y
}
}
Hmisc/R/summary.formula.s 0000644 0001762 0000144 00000240140 14425721275 015055 0 ustar ligges users summary.formula <-
function(formula, data=NULL, subset=NULL, na.action=NULL,
fun=NULL,
method=c('response','reverse','cross'),
overall=method == 'response'|method == 'cross',
continuous=10, na.rm=TRUE, na.include=method != 'reverse',
g=4, quant = c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625,
0.75, 0.875, 0.95, 0.975),
nmin=if(method == 'reverse') 100 else 0,
test=FALSE,
conTest=conTestkw,
catTest=catTestchisq,
ordTest=ordTestpo,
...)
{
call <- match.call()
missmethod <- missing(method)
method <- match.arg(method)
## Multiple left hand side variables -> automatically call summaryM
if(grepl('.*\\+.*~', paste(deparse(formula), collapse='')))
return(summaryM(formula, data=data, subset=subset,
na.action=na.action, overall=overall,
continuous=continuous, na.include=na.include,
quant=quant, nmin=nmin, test=test,
conTest=conTest, catTest=catTest, ordTest=ordTest))
X <- match.call(expand.dots=FALSE)
X$fun <- X$method <- X$na.rm <- X$na.include <- X$g <-
X$overall <- X$continuous <- X$quant <- X$nmin <- X$test <-
X$conTest <- X$catTest <- X$... <- NULL
if(missing(na.action))
X$na.action <- na.retain
Terms <- if(missing(data)) terms(formula,'stratify')
else terms(formula,'stratify',data=data)
X$formula <- Terms
X[[1]] <- as.name("model.frame")
X <- eval(X, sys.parent())
Terms <- attr(X,"terms")
resp <- attr(Terms,"response")
if(resp == 0 && missmethod)
method <- 'reverse'
if(test && method != 'reverse')
stop('test=TRUE only allowed for method="reverse"')
if(method != 'reverse' && resp != 1)
stop("must have a variable on the left hand side of the formula")
nact <- attr(X, "na.action")
nvar <- ncol(X)-1
strat <- attr(Terms,'specials')$stratify
getlab <- function(x, default)
{
lab <- attr(x, 'label')
if(!length(lab) || lab == '') default else lab
}
if(length(strat)) {
if(method != 'response')
stop('stratify only allowed for method="response"')
sRequire('survival')
temp <- survival::untangle.specials(Terms,'stratify')
strat.name <- var.inner(Terms)[temp$terms]
strat <- if(length(temp$vars) == 1) as.factor(X[[temp$vars]])
else stratify(X[,temp$vars])
strat.label <- getlab(X[,temp$vars[1]], strat.name)
X[[temp$vars]] <- NULL # remove strata factors
} else {
strat <- factor(rep('',nrow(X)))
strat.name <- strat.label <- ''
}
nstrat <- length(levels(strat))
if(resp>0) {
Y <- X[[resp]]
yname <- as.character(attr(Terms,'variables'))[2]
ylabel <- getlab(Y, yname)
if(!is.matrix(Y))
Y <- matrix(Y, dimnames=list(names(Y),yname))
} else {
yname <- ylabel <- NULL
}
if(method != 'reverse') {
if(!length(fun)) { # was missing(fun) 25May01
fun <- function(y) apply(y, 2, mean)
uy <- unique(Y[!is.na(Y)]) # fixed 16Mar96
r <- range(uy, na.rm=TRUE)
funlab <- if(length(uy) == 2 && r[1] == 0 & r[2] == 1) "Fraction"
else "Mean"
funlab <- paste(funlab, 'of', yname)
} else if(is.character(fun) && fun == '%') {
fun <- function(y)
{
stats <- 100*apply(y, 2, mean)
names(stats) <- paste(dimnames(y)[[2]],'%')
stats
}
funlab <- paste('% of', yname)
}
## Compute number of descriptive statistics per cell
s <-
if(inherits(Y,'Surv'))
as.vector((1 * is.na(unclass(Y))) %*% rep(1, ncol(Y)) > 0)
else
((if(is.character(Y)) Y == ''|Y == 'NA'
else is.na(Y)) %*%
rep(1,ncol(Y))) > 0
stats <- if(length(dim(Y))) fun(Y[!s,,drop=FALSE])
else fun(Y[!s])
nstats <- length(stats)
name.stats <-
if(length(dn <- dimnames(stats)) == 2)
as.vector(outer(dn[[1]],dn[[2]],FUN=function(a,b)paste(b,a)))
else
names(stats)
if(length(fun)) {
if(length(de <- deparse(fun)) == 2) {
de <- as.list(fun)
de <- as.character(de[[length(de)]])
funlab <- if(de[1] == 'apply') de[length(de)]
else de[1]
## 2nd case is for simple function(x)mean(x) function
} else funlab <- as.character(substitute(fun))
}
if(funlab[1] == '')
funlab <- yname
if(length(name.stats) == 0) {
name.stats <- if(nstats == 1) yname
else paste0(yname , 1 : nstats)
}
}
if(method == 'response') {
X[[resp]] <- NULL # remove response var
s <-
if(!na.rm) FALSE
else if(inherits(Y,'Surv'))
as.vector((1 * is.na(unclass(Y))) %*% rep(1, ncol(Y)) > 0)
else
((if(is.character(Y)) Y == ''|Y == 'NA'
else is.na(Y)) %*%
rep(1,ncol(Y))) > 0
nmissy <- sum(s)
if(nmissy) {
X <- X[!s,,drop=FALSE]
Y <- Y[!s,,drop=FALSE]
strat <- strat[!s]
}
##Compute total number of columns, counting n
nc <- nstrat*(1+nstats)
colname <- rep(c('N',name.stats),nstrat)
rowname <- vname <- vlabel <- vunits <- res <- NULL
dm <- dim(X)
nx <- dm[2]
n <- dm[1]
nlevels <- integer(nx)
labels <- character(nx)
units <- labels
i <- 0
nams <- c(names(X), if(overall)'Overall')
for(v in nams) {
i <- i+1
x <- if(v == 'Overall') factor(rep('',n))
else X[[v]]
if(inherits(x,'mChoice')) x <- as.numeric(x)
labels[i] <- getlab(x, nams[i])
units[i] <- if(length(l <- attr(x,'units'))) l
else ''
if(!(ismc <- is.matrix(x))) {
s <- is.na(x)
if(!is.factor(x)) {
xu <- unique(x[!s]);
lu <- length(xu)
x <- if(lu < continuous) {
r <- range(xu)
if(lu == 2 && r[1] == 0 && r[2] == 1)
factor(x,labels=c('No','Yes'))
else
factor(x)
} else cut2(x, g=g, ...)
}
if(na.include && any(s)) {
x <- na.include(x)
levels(x)[is.na(levels(x))] <- 'NA'
## R 1.5 and later has NA as level not 'NA', satisfies is.na
}
xlev <- levels(x)
if(nmin > 0) {
nn <- table(x);
xlev <- names(nn)[nn >= nmin]
}
} else {
xlev <- dimnames(x)[[2]]
if(!length(xlev))
stop('matrix variables must have column dimnames')
if(!is.logical(x)) {
if(is.numeric(x))
x <- x == 1
else {
x <- structure(casefold(x),dim=dim(x))
x <- x == 'present' | x == 'yes'
}
}
if(nmin > 0) {
nn <- apply(x, 2, sum, na.rm=TRUE)
xlev <- xlev[nn >= nmin]
}
}
nlevels[i] <- length(xlev)
for(lx in xlev) {
r <- NULL
for(js in levels(strat)) {
j <- if(ismc) strat == js & x[,lx]
else strat == js & x == lx
if(!na.include)
j[is.na(j)] <- FALSE
nj <- sum(j)
f <-
if(nj) {
statz <- unlist(fun(Y[j,,drop=FALSE]))
## 23apr03; had just let matrix replicate to fill
## Thanks: Derek Eder
if(length(statz) != nstats)
stop(paste('fun for stratum',lx,js,'did not return',
nstats, 'statistics'))
matrix(statz, ncol=nstats, byrow=TRUE)
} else rep(NA,nstats)
r <- c(r, nj, f)
}
res <- rbind(res, r)
}
rowname <- c(rowname, xlev)
bl <- rep('',length(xlev)-1)
vname <- c(vname,v,bl)
vlabel <- c(vlabel,labels[i],bl)
vunits <- c(vunits,units[i],bl)
}
rowname[rowname == 'NA'] <- 'Missing'
dimnames(res) <- list(rowname,colname)
at <- list(formula=formula, call=call, n=n, nmiss=nmissy, yname=yname,
ylabel=ylabel,
ycolname=if(length(d<-dimnames(Y)[[2]]))d else yname,
funlab=funlab,
vname=vname, vlabel=vlabel, nlevels=nlevels,
labels=labels, units=units, vunits=vunits,
strat.name=strat.name, strat.label=strat.label,
strat.levels=levels(strat))
attributes(res) <- c(attributes(res), at)
attr(res,'class') <- 'summary.formula.response'
return(res)
}
if(method == 'reverse') {
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
if(resp) {
group <- as.factor(X[[resp]])[,drop=TRUE]
group.freq <- table(group)
group.freq <- group.freq[group.freq>0]
if(overall)
group.freq <- c(group.freq, Combined=sum(group.freq))
} else {
group <- rep(0,nrow(X))
group.freq <- NULL
}
nv <- ncol(X)-resp
n <- integer(nv)
type <- n
nams <- names(X)
comp <- dat <- vector("list",nv)
names(comp) <- names(dat) <- if(resp)nams[-1]
else nams
labels <- Units <- vector("character",nv)
if(test) {
testresults <- vector('list', nv)
names(testresults) <- names(comp)
}
for(i in 1:nv) {
w <- X[[resp+i]]
if(length(attr(w, "label")))
labels[i] <- attr(w, "label")
if(length(attr(w, 'units')))
Units[i] <- attr(w, 'units')
if(!inherits(w, 'mChoice')) {
if(!is.factor(w) && !is.logical(w) &&
length(unique(w[! is.na(w)])) < continuous)
w <- as.factor(w)
s <- !is.na(w)
if(na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- 'NA'
s <- rep(TRUE, length(s))
}
n[i] <- sum(s)
w <- w[s]
g <- group[s, drop=TRUE]
if(is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if(test) {
if(is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else
testresults[[i]] <- catTest(tab)
}
if(nrow(tab) == 1) { # 7sep02
b <- casefold(dimnames(tab)[[1]],upper=TRUE)
pres <- c('1','Y','YES','PRESENT')
abse <- c('0','N','NO', 'ABSENT')
jj <- match(b, pres, nomatch=0)
if(jj > 0)
bc <- abse[jj]
else {
jj <- match(b, abse, nomatch=0)
if(jj > 0) bc <- pres[jj]
}
if(jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if(overall)
tab <- cbind(tab, Combined=apply(tab,1,sum))
comp[[i]] <- tab
type[i] <- 1
} else {
sfn <- function(x, quant)
{
o <- options('digits')
options(digits=15)
## so won't lose precision in quantile names
on.exit(options(o))
c(quantile(x,quant), Mean=mean(x), SD=sqrt(var(x)))
}
qu <- tapply(w, g, sfn, simplify=TRUE, quants)
if(test)
testresults[[i]] <- conTest(g, w)
if(overall)
qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu),ncol=length(quants)+2,byrow=TRUE,
dimnames=list(names(qu),
c(format(quants),'Mean','SD')))
if(any(group.freq <= nmin))
dat[[i]] <-
lapply(split(w,g),nmin=nmin,
function(x,nmin)
if(length(x) <= nmin)x
else NULL)
type[i] <- 2
}
} else {
w <- as.numeric(w) == 1 ## multiple choice variables
n[i] <- nrow(w)
g <- as.factor(group)
ncat <- ncol(w)
tab <- matrix(NA, nrow=ncat, ncol=length(levels(g)),
dimnames=list(dimnames(w)[[2]], levels(g)))
if(test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for(j in 1:ncat) {
tab[j,] <- tapply(w[,j], g, sum, simplify=TRUE, na.rm=TRUE)
if(test) {
tabj <- rbind(table(g) - tab[j, ], tab[j, ])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if(test)
testresults[[i]] <- list(P = pval,
stat = stat,
df = d.f.,
testname = st$testname,
statname = st$statname,
namefun = st$namefun,
latexstat = st$latexstat,
plotmathstat = st$plotmathstat)
if(overall)
tab <- cbind(tab, Combined=apply(tab,1,sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
return(structure(list(stats=comp, type=type,
group.name=if(resp)nams[1]
else NULL,
group.label=ylabel,
group.freq=group.freq,
labels=labels, units=Units,
quant=quant, data=dat,
N=sum(!is.na(group)), n=n,
testresults=if(test)testresults
else NULL,
call=call, formula=formula),
class="summary.formula.reverse"))
}
if(method == 'cross') {
X[[resp]] <- NULL
Levels <- vector("list",nvar)
nams <- names(X)
names(Levels) <- names(X)
labels <- character(nvar)
for(i in 1:nvar) {
xi <- X[[i]]
if(inherits(xi,'mChoice'))
xi <- factor(format(xi))
else if(is.matrix(xi) && ncol(xi) > 1)
stop('matrix variables not allowed for method="cross"')
labels[i] <- getlab(xi, nams[i])
if(is.factor(xi))
xi <- xi[,drop=TRUE]
if(!is.factor(xi) && length(unique(xi[!is.na(xi)]))>=continuous)
xi <- cut2(xi, g=g, ...)
X[[i]] <- na.include(as.factor(xi))
levels(X[[i]])[is.na(levels(X[[i]]))] <- 'NA'
Levels[[i]] <- c(levels(X[[i]]),if(overall)"ALL")
}
##Make a data frame with all combinations of values (including those
##that don't exist in the data, since trellis needs them)
df <- expand.grid(Levels)
nl <- nrow(df)
N <- Missing <- integer(nl)
na <- is.na(Y %*% rep(1,ncol(Y)))
S <- matrix(NA, nrow=nl, ncol=nstats, dimnames=list(NULL,name.stats))
chk <- function(z, nstats)
{
if(length(z) != nstats)
stop(paste('fun did not return',nstats,
'statistics for a stratum'))
z
}
if(nvar == 1) {
df1 <- as.character(df[[1]]); x1 <- X[[1]]
for(i in 1:nl) {
s <- df1[i] == 'ALL' | x1 == df1[i]
w <- if(na.rm) s & !na else s
N[i] <- sum(w)
Missing[i] <- sum(na[s])
S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats)
else rep(NA,nstats)
}
} else if(nvar == 2) {
df1 <- as.character(df[[1]]);
df2 <- as.character(df[[2]])
x1 <- X[[1]];
x2 <- X[[2]]
for(i in 1:nl) {
s <- (df1[i] == 'ALL' | x1 == df1[i]) & (df2[i] == 'ALL' | x2 == df2[i])
w <- if(na.rm) s & !na
else s
N[i] <- sum(w)
Missing[i] <- sum(na[s])
S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats)
else rep(NA,nstats)
}
} else if(nvar == 3) {
df1 <- as.character(df[[1]]);
df2 <- as.character(df[[2]])
df3 <- as.character(df[[3]])
x1 <- X[[1]];
x2 <- X[[2]];
x3 <- X[[3]]
for(i in 1:nl) {
s <- (df1[i] == 'ALL' | x1 == df1[i]) & (df2[i] == 'ALL' | x2 == df2[i]) &
(df3[i] == 'ALL' | x3 == df3[i])
w <- if(na.rm) s & !na
else s
N[i] <- sum(w)
Missing[i] <- sum(na[s])
S[i,] <- if(any(w))chk(fun(Y[w,,drop=FALSE]),nstats)
else rep(NA,nstats)
}
} else stop('no more than 3 independent variables allowed')
lab <- names(df)
lab2 <- if(length(lab)>1) paste(lab,collapse=", ")
else lab
heading <- paste(funlab,"by",lab2)
S <- S[,,drop=TRUE]
attr(S,"label") <- yname #funlab
df$S <- S
df$N <- N
df$Missing <- Missing
a <- list(heading=heading,byvarnames=lab2,Levels=Levels,labels=labels,
na.action=nact,formula=formula,call=call,yname=yname,ylabel=ylabel,
class=c("summary.formula.cross","data.frame"))
attributes(df) <- c(attributes(df), a)
df
}
}
##The following makes formula(object) work (using especially for update())
formula.summary.formula.cross <- function(x, ...) attr(x,'formula')
na.retain <- function(d) d
print.summary.formula.response <-
function(x,
vnames=c('labels','names'), prUnits=TRUE,
abbreviate.dimnames=FALSE,
prefix.width, min.colwidth,
formatArgs=NULL, markdown=FALSE, ...)
{
stats <- x
stats <- unclass(stats)
vnames <- match.arg(vnames)
ul <- vnames == 'labels'
at <- attributes(stats)
ns <- length(at$strat.levels)
vlabels <- at$labels
if(prUnits) {
atu <- gsub('*', ' ', at$units, fixed=TRUE)
vlabels <- ifelse(atu == '',vlabels,
paste0(vlabels,' [',atu,']'))
}
cap <- paste(at$ylabel,
if(ns>1)
paste0(' by',
if(ul) at$strat.label
else at$strat.name),
' N=',at$n,
if(at$nmiss) paste0(', ',at$nmiss,' Missing'))
if(! markdown) cat(cap, '\n\n')
d <- dim(stats)
nr <- length(at$nlevels)
vlab <- if(ul) vlabels[vlabels != '']
else at$vname[at$vname != '']
z <- matrix('',nrow=nr,ncol=1+d[2],dimnames=list(vlab,NULL))
dz <- dimnames(stats)[[1]]
cstats <- matrix('',nrow=d[1],ncol=d[2])
for(j in 1:d[2]) {
ww <- c(list(stats[,j]), formatArgs)
cstats[,j] <- do.call('format', ww)
cstats[is.na(stats[,j]),j] <- ''
}
if(markdown) {
if(! requireNamespace("knitr", quietly=TRUE))
stop('markdown=TRUE requires the knitr package to be installed')
vlab <- if(ul) at$vlabel else at$vname
if(prUnits && any(at$units != ''))
vlab[vlab != ''] <- paste0(vlab[vlab != ''], '[', at$units, ']')
if(vlab[length(vlab)] == 'Overall')
vlab[length(vlab)] <- '**Overall**'
z <- cbind(vlab, rownames(x), cstats)
colnames(z) <- c('', '', colnames(stats))
return(knitr::kable(z, align=c('l', 'l', rep('r', ncol(cstats))),
caption=cap))
}
is <- 1
for(i in 1:nr) {
ie <- is+at$nlevels[i]-1
z[i,1] <- paste(dz[is:ie],collapse='\n')
for(j in 1:d[2]) z[i,j+1] <- paste(cstats[is:ie,j],collapse='\n')
is <- ie+1
}
if(missing(prefix.width))
prefix.width <- max(nchar(dimnames(z)[[1]]))
if(missing(min.colwidth))
min.colwidth <-
max(min(nchar(cstats)[nchar(cstats)>0]), min(nchar(dimnames(stats)[[2]])))
z <- rbind(c('',dimnames(stats)[[2]]), z)
print.char.matrix(z, col.names=FALSE, ...)
invisible()
}
latex.summary.formula.response <-
function(object,
title=first.word(deparse(substitute(object))), caption,
trios, vnames=c('labels','names'), prn=TRUE, prUnits=TRUE,
rowlabel='', cdec=2,
ncaption=TRUE, ...)
{
stats <- object
if(!prn)
stats <- stats[, dimnames(stats)[[2]] != 'N', drop=FALSE]
title <- title # otherwise problem with lazy evaluation 25May01
stats <- unclass(stats)
at <- attributes(stats)
vnames <- match.arg(vnames)
ul <- vnames == 'labels'
ns <- length(at$strat.levels)
nstat <- ncol(stats)/ns
if(!missing(trios)) {
if(is.logical(trios))
trios <- at$ycolname
ntrio <- length(trios)
if(ntrio*3 + prn != nstat) #allow for N
stop('length of trios must be 1/3 the number of statistics computed')
}
if(missing(caption)) caption <- latexTranslate(at$ylabel)
if(ns>1) caption <- paste(caption,' by', if(ul)at$strat.label else
at$strat.name)
if(ncaption)
caption <- paste0(caption,
'~~~~~N=',at$n,
if(at$nmiss) paste0(',~',at$nmiss,' Missing'))
dm <- dimnames(stats)
dm[[1]] <- latexTranslate(dm[[1]], greek=TRUE)
dm[[2]] <- latexTranslate(dm[[2]], greek=TRUE)
dimnames(stats) <- dm
caption <- sedit(caption, "cbind", "")
vn <- if(ul)at$vlabel
else at$vname
if(prUnits) {
atvu <- gsub('*', ' ', at$vunits, fixed=TRUE)
vn <- ifelse(atvu == '', vn,
paste0(vn,'~\\hfill\\tiny{', atvu, '}'))
}
vn <- latexTranslate(vn, greek=TRUE)
if(missing(trios)) {
cdec <- rep(cdec, length = nstat)
} else {
cdec <- rep(cdec, length = nstat / 3)
}
cdec <- rep(cdec, ns)
isn <- colnames(stats) %in% c('N', 'n')
if(any(isn) && prn) cdec[isn] <- 0
if(missing(trios)) {
cstats <- unclass(stats)
} else {
fmt <- function(z, cdec) ifelse(is.na(z), '', format(round(z, cdec)))
cstats <- list()
k <- m <- 0
for(is in 1 : ns) {
if(prn) {
k <- k + 1
m <- m + 1
cstats[[k]] <- stats[, m] ## N, numeric mode
}
for(j in 1 : ntrio) {
m <- m + 1
k <- k + 1
cstats[[k]] <- paste0('{\\scriptsize ', fmt(stats[,m], cdec[k]), '~}',
fmt(stats[,m + 1], cdec[k]), ' {\\scriptsize ',
fmt(stats[,m + 2], cdec[k]), '}')
m <- m + 2
}
}
names(cstats) <- rep(c(if(prn)'N'
else NULL, trios), ns)
attr(cstats, 'row.names') <- dm[[1]]
attr(cstats,'class') <- 'data.frame'
if(prn)
nstat <- 2 # for n.cgroup below
else
nstat <- 1
}
insert.bottom <-
if(missing(trios))
''
else
'\\noindent {\\scriptsize $a$\\ } $b$ {\\scriptsize $c$\\ } represent the lower quartile $a$, the median $b$, and the upper quartile $c$.'
r <-
if(ns > 1)
latex(cstats, title=title, caption=caption, rowlabel=rowlabel,
n.rgroup=at$nlevels, rgroup=vn[vn != ''],
n.cgroup=rep(nstat,ns), cgroup=at$strat.levels, cdec=cdec,
col.just=rep('c',ncol(cstats)),
rowname=dm[[1]], insert.bottom=insert.bottom, ...)
else
latex(cstats, title=title, caption=caption, rowlabel=rowlabel,
n.rgroup=at$nlevels, rgroup=vn[vn != ''], cdec=cdec,
col.just=rep('c',ncol(cstats)),
rowname=dm[[1]], insert.bottom=insert.bottom, ...)
r
}
plot.summary.formula.response <-
function(x, which = 1,
vnames = c('labels', 'names'), xlim, xlab,
pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), superposeStrata = TRUE,
dotfont=1, add=FALSE, reset.par=TRUE,
main, subtitles=TRUE, ...)
{
stats <- x
stats <- unclass(stats)
vnames <- match.arg(vnames)
ul <- vnames == 'labels'
at <- attributes(stats)
ns <- length(at$strat.levels)
if(ns>1 && length(which)>1)
stop('cannot have a vector for which if > 1 strata present')
if(ns < 2)
superposeStrata <- FALSE
vn <- if(ul) at$vlabel
else at$vname
Units <- at$vunits
vn <- ifelse(Units == '', vn, paste0(vn, ' [', Units, ']'))
## dotchart2 groups argument may not be an R plotmath expression
vn <- vn[vn != '']
d <- dim(stats)
n <- d[1]
nstat <- d[2]/ns
vnd <- factor(rep(vn, at$nlevels))
dn <- dimnames(stats)
if(missing(xlim))
xlim <- range(stats[,nstat*((1:ns)-1)+1+which],na.rm=TRUE)
if(missing(main))
main <- at$funlab
nw <- length(which)
pch <- rep(pch, length.out=if(superposeStrata)ns else nw)
dotfont <- rep(dotfont, length.out=nw)
opar <- par(no.readonly=TRUE)
if(reset.par)
on.exit(par(opar))
if(superposeStrata) Ns <- apply(stats[,nstat*((1:ns)-1)+1],1,sum)
for(is in 1:ns) {
for(w in 1:nw) {
js <- nstat*(is-1)+1+which[w]
z <- stats[,js]
if(missing(xlab))
xlab <- if(nw>1) dn[[2]][js]
else at$ylabel
dotchart2(z, groups=vnd, xlab=xlab, xlim=xlim,
auxdata=if(superposeStrata) Ns
else stats[,js-which[w]],
auxtitle='N', sort.=FALSE,
pch=pch[if(superposeStrata)is
else w],
dotfont=dotfont[w],
add=add | w>1 | (is > 1 && superposeStrata),
reset.par=FALSE, ...)
if(ns>1 && !superposeStrata)
title(paste(paste(main,if(main != '')' '),at$strat.levels[is]))
else if(main != '') title(main)
if(ns == 1 && subtitles) {
title(sub=paste0('N=', at$n), adj=0, cex=.6)
if(at$nmiss>0)
title(sub=paste0('N missing=', at$nmiss), cex=.6, adj=1)
}
}
}
if(superposeStrata) { ##set up for Key()
Key1 <- function(x=NULL, y=NULL, lev, pch) {
oldpar <- par('usr', 'xpd')
par(usr=c(0,1,0,1),xpd=NA)
on.exit(par(oldpar))
if(is.list(x)) {
y <- x$y;
x <- x$x
}
if(!length(x)) x <- 0
if(!length(y)) y <- 1 ## because of formals()
rlegend(x, y, legend=lev, pch=pch, ...)
invisible()
}
formals(Key1) <- list(x=NULL,y=NULL,lev=at$strat.levels,
pch=pch)
.setKey(Key1)
}
invisible()
}
plot.summary.formula.reverse <-
function(x, vnames = c('labels', 'names'), what = c('proportion','%'),
which = c('both', 'categorical', 'continuous'),
xlim = if(what == 'proportion') c(0,1)
else c(0,100),
xlab = if(what == 'proportion') 'Proportion'
else 'Percentage',
pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE,
dotfont = 1, main,
prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001,
conType = c('dot', 'bp', 'raw'), cex.means = 0.5, ...)
{
obj <- x
vnames <- match.arg(vnames)
what <- match.arg(what)
which <- match.arg(which)
conType <- match.arg(conType)
ul <- vnames == 'labels'
if(is.logical(prtest) && ! prtest) prtest <- 'none'
test <- obj$testresults
if(!length(test)) prtest <- 'none'
varNames <- names(obj$stats)
vn <- if(ul) obj$labels
else varNames
Units <- obj$units
nw <- if(lg <- length(obj$group.freq)) lg
else 1
gnames <- names(obj$group.freq)
if(missing(main))
main <- if(nw == 1)''
else
paste(if(what == 'proportion')'Proportions'
else 'Percentages','Stratified by',
obj$group.label)
pch <- rep(pch, length.out=nw)
dotfont <- rep(dotfont, length.out=nw)
lab <- vnd <- z <- nmiss <- vnamd <- NULL
type <- obj$type; n <- obj$n
opar <- par()
on.exit(setParNro(opar))
npages <- 0
if(which != 'continuous' && any(type %in% c(1,3))) {
ftstats <- NULL
for(i in (1:length(type))[type == 1 | type == 3]) {
nam <- vn[i]
tab <- obj$stats[[i]]
if(nw == 1)
tab <- as.matrix(tab)
nr <- nrow(tab)
denom <- if(type[i] == 1) apply(tab, 2, sum)
else obj$group.freq
y <- (if(what == 'proportion') 1
else 100) * sweep(tab, 2, denom, FUN='/')
lev <- dimnames(y)[[1]]
exc <- exclude1 && (nr == 2)
jstart <- if(exc) 2
else 1
## nn <- c(nn, n[i], rep(NA, if(exc) nr-2 else nr-1))
## k <- 0
rl <- casefold(lev)
binary <- type[i] == 1 && exc &&
(all(rl %in% c("0","1"))|all(rl %in% c("false","true"))|
all(rl %in% c("absent","present")))
for(j in jstart:nrow(y)) {
if(nw == 1)
z <- rbind(z, y[j,])
else {
yj <- rep(NA, nw)
names(yj) <- gnames
yj[names(y[j,])] <- y[j,]
z <- rbind(z, yj)
}
lab <- c(lab, if(binary) ''
else lev[j])
vnd <- c(vnd, nam)
vnamd <- c(vnamd, varNames[i])
}
if(any(prtest != 'none')) {
fts <- formatTestStats(test[[varNames[i]]], type[i] == 3,
if(type[i] == 1)1
else 1 : nr,
prtest=prtest,
plotmath=TRUE,
pdig=pdig, eps=eps)
ftstats <- c(ftstats, fts,
if(type[i] == 1 && nr - exc - 1 > 0)
rep(expression(''),
nr - exc - 1))
}
}
dimnames(z) <- list(lab, dimnames(z)[[2]])
for(i in 1 : nw) {
zi <- z[,i]
if(any(prtest == 'none') || i > 1)
dotchart2(zi, groups=vnd, xlab=xlab, xlim=xlim,
sort.=FALSE, pch=pch[i],
dotfont=dotfont[i],
add=i > 1, ...)
else
dotchart2(zi, groups=vnd, auxdata=ftstats,
xlab=xlab, xlim=xlim, sort.=FALSE,
pch=pch[i], dotfont=dotfont[i],
add=i > 1, ...)
}
if(main != '')
title(main)
npages <- npages + 1
setParNro(opar)
## Dummy key if only one column, so won't use another Key from an
## earlier run
if(nw < 2) {
Key1 <- function(...)invisible(NULL)
.setKey(Key1)
} else { ##set up for key() if > 1 column
Key3 <- function(x=NULL, y=NULL, lev, pch) {
oldpar <- par('usr', 'xpd')
par(usr=c(0,1,0,1),xpd=NA)
on.exit(par(oldpar))
if(is.list(x)) {
y <- x$y;
x <- x$x
}
## Even though par('usr') shows 0,1,0,1 after lattice draws
## its plot, it still needs resetting
if(!length(x))
x <- 0
if(!length(y))
y <- 1 ## because of formals()
rlegend(x, y, legend=lev, pch=pch, ...)
invisible()
}
formals(Key3) <- list(x=NULL,y=NULL,lev=names(obj$group.freq),
pch=pch)
.setKey(Key3)
}
}
ncont <- sum(type == 2)
if(which != 'categorical' && ncont) {
mf <- par('mfrow')
if(length(mf) == 0)
mf <- c(1,1)
if(ncont > 1 & max(mf) == 1) {
mf <- if(ncont <= 4)c(2,2)
else if(ncont <= 6)c(2,3)
else if(ncont <= 9)c(3,3)
else c(4,3)
## if(ncont <= 12)c(4,3) else if(ncont <= 16) c(4,4) else c(5,4)
nr <- mf[1]
m <- par('mar')
par(mfrow=mf)
}
npages <- npages + ceiling(sum(type == 2) / prod(mf))
for(i in (1:length(type))[type == 2]) {
nam <- labelPlotmath(vn[i], Units[i])
st <- obj$stats[[i]]
if(nw == 1)
st <- as.matrix(st)
if(conType == 'dot') {
quantile.columns <- dimnames(st)[[2]] %nin% c('Mean','SD')
st <- st[,quantile.columns,drop=FALSE]
xlim <- range(st)
ns <- as.numeric(dimnames(st)[[2]])
l <- 1:length(ns)
q1 <- l[abs(ns-.25) < .001]
med <- l[abs(ns-.5) < .001]
q3 <- l[abs(ns-.75) < .001]
st <- st[,c(q1,med,q3),drop=FALSE]
for(j in 1:3) {
stj <- st[,j]
if(nw == 1)
names(stj) <- ''
dotchart2(stj, xlab=nam, xlim=xlim, sort.=FALSE,
pch=c(91,
if(FALSE)183
else 16,
93)[j],
dotfont=dotfont[1],
add=j > 1, ...)
}
Key2 <- function(x=NULL, y=NULL, quant, ...)
{
quant <- format(quant)
txt <- paste0('(',quant[2],',',quant[3],',',quant[4],
') quantiles shown\nx-axes scaled to (',quant[1],',',
quant[5],') quantiles')
if(length(x)) {
if(is.list(x)) {
y <- x$y;
x <- x$x
}
text(x,y,txt, cex=.8, adj=0, ...)
} else
mtitle(lr=txt, cex.l=.8, line=1, ...)
invisible()
}
formals(Key2) <- list(x=NULL,y=NULL,quant=obj$quant)
.setKey2(Key2)
} else if(conType == 'bp')
bpplt(st, xlab=nam, cex.points=cex.means)
else
stripChart(obj$data[[i]], xlab=nam)
if(all(prtest != 'none')) {
fts <- formatTestStats(test[[varNames[i]]], prtest=prtest,
plotmath=TRUE,
pdig=pdig, eps=eps)
title(fts, line=.5)
}
}
}
invisible(npages)
}
#This version of the stardard dotchart function allows a vector of values
#to be specified (typically cell sizes) that are written to the right
#or horizontal (only) dot charts. New vectors and auxdata and auxgdata and
#a label for auxdata, auxtitle.
#Also added: sort. parameter, to allow suppression of rearrangements of data,
#and added the parameter `add'. Reference lines are always drawn with lwd=1.
#There's also a new parameter, groupfont, which specifies a font number for
#group headings.
#cex.labels is a cex to be used only for category labels. Default is cex.
#Added reset.par - set to T to reset par() after making plot. You will
#need to set reset.par to T for the last call in a sequence.
dotchart2 <-
function(data, labels, groups = NULL, gdata = NA, horizontal = TRUE,
pch = 16,
xlab = "", ylab="", xlim=NULL, auxdata, auxgdata=NULL, auxtitle,
lty = 1,
lines = TRUE, dotsize = .8, cex = par("cex"),
cex.labels = cex, cex.group.labels = cex.labels*1.25, sort.=TRUE,
add=FALSE, dotfont=par('font'),
groupfont=2, reset.par=add, xaxis=TRUE,
width.factor=1.1, lcolor='gray',
leavepar=FALSE, axisat=NULL, axislabels=NULL,
...)
{
if(!add) {
plot.new() ## needed for strwidth
par(new=TRUE)
}
ieaux <- if(missing(auxdata)) FALSE else is.expression(auxdata)
mtextsrt <- function(..., srt=0) mtext(..., las=1)
ndata <- length(data)
if(missing(labels)) {
if(!is.null(names(data)))
labels <- names(data)
else labels <- paste("#", seq(along = ndata))
}
else
labels <- rep(as.character(labels), length = ndata)
if(missing(groups)) {
glabels <- NULL
gdata <- NULL
}
else {
if(!sort.) {
##assume data sorted in groups, but re-number groups
##to be as if groups given in order 1,2,3,...
ug <- unique(as.character(groups))
groups <- factor(as.character(groups),levels=ug)
}
groups <- unclass(groups)
glabels <- levels(groups)
gdata <- rep(gdata, length = length(glabels))
ord <- order(groups, seq(along = groups))
groups <- groups[ord]
data <- data[ord]
labels <- labels[ord]
if(!missing(auxdata)) auxdata <- auxdata[ord]
}
alldat <- c(data, gdata)
if(!missing(auxdata)) {
auxdata <- c(auxdata, auxgdata)
if(!ieaux) auxdata <- format(auxdata)
}
alllab <- paste(c(labels, glabels),'')
## set up margins and user coordinates, draw box
tcex <- par('cex')
tmai <- par("mai")
oldplt <- par("plt")
if(reset.par && !leavepar)
on.exit(par(mai = tmai, cex = tcex))
par(cex = cex)
mxlab <- .1+max(strwidth(labels, units='inches',cex=cex.labels),
if(length(glabels))
strwidth(glabels,units='inches',cex=cex.group.labels))*
width.factor
if(horizontal) {
tmai2 <- tmai[3:4]
if(!missing(auxdata))
tmai2[2] <- .2+width.factor*
max(strwidth(if(ieaux) auxdata else format(auxdata),
units='inches',cex=cex.labels))
if(!leavepar) par(mai = c(tmai[1], mxlab, tmai2))
if(!add)
plot(alldat, seq(along = alldat), type = "n",
ylab = '', axes = FALSE, xlab = '', xlim=xlim, ...)
logax <- par("xaxt") == "l"
}
else {
if(!leavepar) par(mai = c(mxlab, tmai[2:4]))
if(!add)
plot(seq(along = alldat), alldat, type = "n",
xlab = "", axes = FALSE, ylab = '', ...)
logax <- par("yaxt") == "l"
}
tusr <- par("usr")
if(!add && logax) {
if(horizontal)
abline(v = 10^tusr[1:2], h = tusr[3:4])
else abline(v = tusr[1:2], h = 10^tusr[3:4])
}
else if(!add) abline(v = tusr[1:2], h = tusr[3:4])
den <- ndata + 2 * length(glabels) + 1
if(horizontal) {
if(!add && xaxis)
mgp.axis(1, axistitle=xlab, at=axisat, labels=axislabels)
delt <- ( - (tusr[4] - tusr[3]))/den
ypos <- seq(tusr[4], by = delt, length = ndata)
}
else {
if(!add)
mgp.axis(2, axistitle=xlab, at=axisat, labels=axislabels)
delt <- (tusr[2] - tusr[1])/den
ypos <- seq(tusr[1], by = delt, length = ndata)
}
if(!missing(groups)) {
ypos1 <- ypos + 2 * delt *
(if(length(groups)>1) cumsum(c(1, diff(groups) > 0)) else 1)
diff2 <- c(3 * delt, diff(ypos1))
ypos2 <- ypos1[abs(diff2 - 3 * delt) < abs(0.001 * delt)] -
delt
ypos <- c(ypos1, ypos2) - delt
}
##put on labels and data
ypos <- ypos + delt
nongrp <- 1:ndata
if(horizontal) {
xmin <- par('usr')[1]
if(!add && lines)
abline(h = ypos[nongrp], lty = lty, lwd=1, col=lcolor)
points(alldat, ypos, pch = pch, cex = dotsize * cex, font=dotfont, ...)
if(!add && !missing(auxdata)) {
faux <- if(ieaux) auxdata else format(auxdata)
upedge <- par('usr')[4]
outerText(faux, ypos[nongrp], cex=cex.labels)
if(!missing(auxtitle))
outerText(auxtitle,
upedge+strheight(auxtitle,cex=cex.labels)/2,
cex=cex.labels)
}
if(!add) {
labng <- alllab[nongrp]
## Bug in sending character strings to mtext or text containing
## [ or ] - they don't right-justify in S+
bracket <- substring(labng,1,1) == '[' |
substring(labng,nchar(labng),nchar(labng)) == ']'
yposng <- ypos[nongrp]
s <- !bracket
if(!is.na(any(s)) && any(s))
mtextsrt(paste(labng[s],''), 2, 0, at=yposng[s],
srt=0, adj=1, cex=cex.labels)
s <- bracket
if(!is.na(any(s)) && any(s))
text(rep(par('usr')[1],sum(s)),
yposng[s], labng[s], adj=1,
cex=cex.labels, srt=0,xpd=NA)
if(!missing(groups))
mtextsrt(paste(alllab[ - nongrp],''), 2, 0, at = ypos[ - nongrp],
srt = 0, adj = 1, cex = cex.group.labels, font=groupfont)
}
}
else {
if(!add && lines)
abline(v = ypos[nongrp], lty = lty, lwd=1, col=lcolor)
points(ypos, alldat, pch = pch, cex = dotsize * cex, font=dotfont, ...)
if(!add) mtextsrt(alllab[nongrp], 1, 0,
at = ypos[nongrp], srt = 90, adj = 1,
cex = cex.labels)
if(!add && !missing(groups))
mtextsrt(alllab[ - nongrp], 1, 0, at = ypos[ - nongrp],
srt = 90, adj = 1, cex = cex.group.labels, font=groupfont)
}
plt <- par("plt")
if(horizontal) {
frac <- (oldplt[2] - oldplt[1])/(oldplt[2] - plt[1])
umin <- tusr[2] - (tusr[2] - tusr[1]) * frac
tusr <- c(umin, tusr[2:4])
}
else {
frac <- (oldplt[4] - oldplt[3])/(oldplt[4] - plt[3])
umin <- tusr[4] - (tusr[4] - tusr[3]) * frac
tusr <- c(tusr[1:2], umin, tusr[4])
}
invisible()
}
print.summary.formula.reverse <-
function(x, digits, prn=any(n != N), pctdig=0,
what=c('%', 'proportion'),
npct=c('numerator','both','denominator','none'),
exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE,
sep="/", abbreviate.dimnames=FALSE,
prefix.width=max(nchar(lab)),
min.colwidth, formatArgs=NULL, round=NULL,
prtest=c('P','stat','df','name'), prmsd=FALSE, long=FALSE,
pdig=3, eps=0.001, ...)
{
npct <- match.arg(npct)
vnames <- match.arg(vnames)
what <- match.arg(what)
if(is.logical(prtest) && !prtest)
prtest <- 'none'
stats <- x$stats
nv <- length(stats)
cstats <- lab <- character(0)
nn <- integer(0)
type <- x$type
n <- x$n
N <- x$N
nams <- names(stats)
labels <- x$labels
Units <- x$units
test <- x$testresults
if(! length(test)) prtest <- 'none'
nw <- if(lg <- length(x$group.freq)) lg else 1
gnames <- names(x$group.freq)
if(!missing(digits)) {
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
}
cstats <- NULL
for(i in 1:nv) {
nn <- c(nn, n[i])
nam <- if(vnames == "names") nams[i] else labels[i]
if(prUnits && nchar(Units[i]))
nam <- paste0(nam,' [', gsub('*',' ', Units[i], fixed=TRUE),']')
tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]]
else NULL
if(type[i] == 1 || type[i] == 3) {
cs <- formatCats(stats[[i]], nam, tr, type[i],
if(length(x$group.freq)) x$group.freq else x$n[i],
what, npct, pctdig, exclude1, long, prtest,
pdig=pdig, eps=eps)
nn <- c(nn, rep(NA, nrow(cs) - 1))
} else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd,
sep, formatArgs, round, prtest,
pdig=pdig, eps=eps)
cstats <- rbind(cstats, cs)
}
lab <- dimnames(cstats)[[1]]
gl <- names(x$group.freq)
gl <- if(length(gl)) paste0(gl," \n(N=",x$group.freq,")")
else ""
if(length(test) && ! all(prtest == 'none'))
gl <- c(gl,
if(length(prtest) == 1 && prtest != 'stat')
if(prtest == 'P')'P-value'
else prtest
else ' Test\nStatistic')
nc <- nchar(cstats)
spaces <- substring(" ",
1, (max(nc)-nc+1)/2) # center strings
dc <- dim(cstats)
cstats <- paste0(spaces, cstats)
dim(cstats) <- dc
if(prn) {
cnn <- format(nn)
cnn[is.na(nn)] <- ''
cstats <- cbind(cnn, cstats)
gl <- c('N', gl)
}
cstats <- rbind(gl, cstats)
dimnames(cstats) <- list(c('',lab), NULL)
cat("\n\nDescriptive Statistics",
if(length(x$group.label))
paste(" by",x$group.label)
else
paste0(" (N=",x$N,")"),"\n\n", sep="")
if(missing(min.colwidth))
min.colwidth <- max(min(nchar(gl)),min(nc[nc>0]))
print.char.matrix(cstats, col.names=FALSE,
col.txt.align='left', ...)
invisible(cstats)
}
## Function to format subtable for categorical var, for method='reverse'
formatCats <- function(tab, nam, tr, type, group.freq,
what=c('%', 'proportion'),
npct, pctdig, exclude1, long, prtest,
lang='plain', testUsed=character(0),
npct.size='scriptsize', pdig=3, eps=.001,
footnoteTest=TRUE, dotchart=FALSE, mspecs=markupSpecs)
{
what <- match.arg(what)
gnames <- names(group.freq)
nr <- nrow(tab)
specs <- mspecs[[lang]]
spc <- specs$space
sspc <- if(lang == 'plain') '' else specs$sspace
lspc <- specs$lspace
bold <- specs$bold
frac <- specs$frac
if(lang != 'latex' && ! is.function(npct.size))
npct.size <- function(x) x
else if(! is.function(npct.size)) {
npctsize <- npct.size
npct.size <- function(x) paste0('{\\', npctsize, ' ', x, '}')
}
## If there was a missing column of tab because e.g. the variable was
## always NA for one (or more) of the groups, add columns of NAs
if(ncol(tab) < length(group.freq)) {
tabfull <- matrix(NA, nrow=nr, ncol=length(group.freq),
dimnames=list(dimnames(tab)[[1]], gnames))
tabfull[,dimnames(tab)[[2]]] <- tab
tab <- tabfull
}
denom <- if(type == 1) apply(tab, 2, sum)
else group.freq
pct <- if(ncol(tab) > 1) sweep(tab, 2, denom, FUN='/') else tab / denom
pct <- pct * (if(what == '%') 100 else 1)
cpct <- paste0(format(round(pct, pctdig)),
if(lang == 'latex' && what == '%') '\\%'
else if(what == '%') "%")
denom.rep <- matrix(rep(format(denom), nr), nrow=nr, byrow=TRUE)
if(npct != 'none')
cpct <-
paste(cpct,
switch(npct,
numerator = npct.size(paste0(' (', format(tab), ')')),
denominator = npct.size(paste0(' of ', denom.rep)),
both = npct.size(paste0(frac(format(tab),
denom.rep))),
slash = npct.size(paste0(spc, format(tab),
sspc, '/', sspc, denom.rep))
) )
if(lang == 'latex') cpct <- sedit(cpct, ' ', spc)
dim(cpct) <- dim(pct)
dimnames(cpct) <- dimnames(pct)
cpct[is.na(pct)] <- ""
lev <- dimnames(pct)[[1]]
exc <- exclude1 && (nr == 2) && (type == 1)
rl <- casefold(dimnames(pct)[[1]])
binary <- type == 1 && exc &&
(all(rl %in% c("0","1")) | all(rl %in% c("false","true")) |
all(rl %in% c("absent","present")))
if(binary) long <- FALSE
jstart <- if(exc) 2 else 1
nw <- if(lg <- length(group.freq)) lg else 1
lab <- if(binary) nam
else if(long) c(nam, paste(spc, spc, lev[jstart : nr]))
else c(paste(nam, ':', lev[jstart]),
if(nr > jstart) paste(lspc, lev[(jstart + 1) : nr]))
cs <- matrix('', nrow=long + (if(exc) nr - 1
else nr),
ncol = nw + (length(tr) > 0),
dimnames = list(lab, c(gnames,
if(length(tr)) ''
else NULL)))
if(nw == 1)
cs[(long + 1) : nrow(cs), 1] <- cpct[jstart : nr, ]
else
cs[(long + 1) : nrow(cs), 1 : nw] <- cpct[jstart : nrow(cpct), gnames]
if(lang == 'latex' && dotchart && ncol(pct) <= 3) {
locs <- c(3,-3,5,-5,7,-7,9,-9)
points <- c("\\circle*{4}","\\circle{4}","\\drawline(0,2)(-1.414213562,-1)(1.414213562,-1)(0,2)")
point.loc <-
sapply(jstart:nrow(pct),
function(i) {
paste(ifelse(is.na(pct[i,]), "",
paste0("\\put(", pct[i,], ",0){",
points[1:ncol(pct)],"}")),
collapse='')
})
error.loc <- character(nrow(tab) - exc)
k <- 0
for(i in jstart:ncol(tab)) {
if(i > jstart) {
p1prime <- (tab[,i] + 1)/(denom[i] + 2)
d1 <- p1prime*(1-p1prime)/denom[i]
for(j in jstart:(i-1)) {
k <- k + 1
p2prime <- (tab[,j] + 1)/(denom[j] + 2)
error <- 196 * sqrt(d1 + p2prime * (1 - p2prime)/denom[j])
bar <- ifelse(is.na(error), "",
paste0("\\put(", (pct[,i] + pct[,j])/2 - error, ",",
locs[k],"){\\line(1,0){",error*2,"}}"))
error.loc <- paste0(error.loc, bar)
}
}
}
scale <- character(nrow(tab) - exc)
scale[1] <- "\\multiput(0,2)(25,0){5}{\\color[gray]{0.5}\\line(0,-1){4}}\\put(-5,0){\\makebox(0,0){\\tiny 0}}\\put(108,0){\\makebox(0,0){\\tiny 1}}"
cl <- paste0("\\setlength\\unitlength{1in/100}\\begin{picture}(100,10)(0,-5)",
scale,"\\put(0,0){\\color[gray]{0.5}\\line(1,0){100}}",
point.loc, error.loc,
"\\end{picture}")
cs[(long + 1) : nrow(cs), ncol(cs)] <- cl
}
if(length(tr)) {
ct <- formatTestStats(tr, type == 3,
if(type == 1) 1
else 1 : nr,
prtest, lang=lang, testUsed=testUsed,
pdig=pdig, eps=eps, footnoteTest=footnoteTest,
mspecs=mspecs)
if(length(ct) == 1)
cs[1, ncol(cs)] <- ct
else
cs[(long + 1) : nrow(cs), ncol(cs)] <- ct
}
cs
}
## Function to format subtable for continuous var, for method='reverse'
formatCons <- function(stats, nam, tr, group.freq, prmsd, sep='/',
formatArgs=NULL, round=NULL, prtest,
lang='plain', testUsed=character(0),
middle.bold=FALSE, outer.size=NULL, msdsize=NULL,
brmsd=FALSE, pdig=3, eps=.001, footnoteTest=TRUE,
prob=c(0.25, 0.5, 0.75), prN=FALSE, mspecs=markupSpecs)
{
specs <- mspecs[[lang]]
spc <- specs$space
bold <- if(middle.bold) specs$bold else function(x) x
lspc <- specs$lspace
sup <- specs$sup
br <- specs$br
plminus <- specs$plminus
math <- specs$math
if(lang == 'plain' || ! length(msdsize)) msdsize <- function(x) x
if(! is.function(msdsize)) {
Msdsize <- msdsize
if(lang == 'latex')
msdsize <- function(x) paste0('{\\', Msdsize, ' ', x, '}')
}
if(lang == 'plain') outer.size <- function(x) x
if(! is.function(outer.size)) {
Outer.size <- outer.size
outer.size <- function(x) paste0('{\\', Outer.size, ' ', x, '}')
}
nw <- if(lg <- length(group.freq)) lg else 1
ns <- dimnames(stats)[[2]]
ns <- ifelse(ns %in% c('Mean','SD','N'), '-1', ns)
ns <- as.numeric(ns)
l <- 1:length(ns)
if(length(prob) == 3) {
qs <- numeric(3)
for(i in seq_along(qs)) {
qs[i] <- l[abs(ns - prob[i]) < .001]
}
} else {
q1 <- l[abs(ns - .25) < .001]
med <- l[abs(ns - .5 ) < .001]
q3 <- l[abs(ns - .75) < .001]
qs <- c(q1, med, q3)
}
qu <- stats[,qs,drop=FALSE]
if(prmsd)
qu <- cbind(qu, stats[, c('Mean', 'SD'), drop=FALSE])
if(length(round) && round == 'auto') {
r <- max(abs(stats[, colnames(stats) %nin% c('N', 'SD')]), na.rm=TRUE)
round <- if(r == 0) 2
else max(0, min(5, 3 - round(log10(r))))
}
if(length(round)) qu <- round(qu, round)
ww <- c(list(qu), formatArgs)
cqu <- do.call('format', ww)
if(prN)
cqu <- cbind(cqu,stats[,'N',drop=FALSE])
cqu[is.na(qu)] <- ''
if(lang != 'plain') {
st <- character(nrow(cqu))
names(st) <- dimnames(qu)[[1]]
for(j in 1:nrow(cqu)) {
st[j] <-
paste0(outer.size(cqu[j, 1]), ' ',
bold( cqu[j, 2]), ' ',
outer.size(cqu[j, 3]))
if(prmsd) {
z <- if(brmsd) paste0(br, msdsize(paste0(cqu[j, 4], spc,
plminus, spc, cqu[j, 5])))
else paste0(spc, spc, msdsize(paste0('(', cqu[j, 4], spc,
plminus, cqu[j, 5], ')')))
st[j] <- paste0(st[j], z)
}
if(prN)
st[j] <-
paste0(st[j], outer.size(paste0(spc, math(paste0('N=', cqu[j, ncol(cqu)])))))
}
} else {
if(prmsd) {
st <- apply(cqu, 1,
function(x,sep) paste(x[1], sep, x[2], sep,x[3], ' ',
x[4], '+/-', x[5], sep=''), sep=sep)
} else {
st <- apply(cqu[,seq(3),drop=FALSE], 1, paste, collapse=sep)
}
if(prN) {
st <- setNames(sprintf("%s N=%s", st, cqu[, ncol(cqu), drop=FALSE]),
names(st))
}
}
### if(any(is.na(qu))) st <- "" # Why was this here?
if(nw == 1) yj <- st
else {
yj <- rep('', nw)
names(yj) <- names(group.freq)
yj[names(st)] <- st
}
if(length(tr)) {
ct <- formatTestStats(tr, prtest=prtest, lang=lang,
testUsed=testUsed, pdig=pdig, eps=eps,
footnoteTest=footnoteTest, mspecs=mspecs)
yj <- c(yj, ct)
}
matrix(yj, nrow=1, dimnames=list(nam,names(yj)))
}
formatTestStats <- function(tr, multchoice=FALSE,
i=if(multchoice) NA else 1,
prtest, lang='plain',
testUsed=character(0),
pdig=3, eps=.001,
plotmath=FALSE, footnoteTest=TRUE,
mspecs=markupSpecs)
{
## tr=an element of testresults (created by summary.formula method='reverse')
## or summaryM
if(any(i > 1) && ! multchoice) stop('logic error')
## was i > 1; length mismatch
specs <- mspecs[[lang]]
spc <- specs$space
sup <- specs$sup
math <- specs$math
pval <- tr$P[i]
teststat <- tr$stat[i]
testname <- tr$testname
if(any(is.na(pval)) || any(is.na(teststat))) {
res <- rep('', length(pval))
if(lang == 'latex' && length(testUsed))
res <-
if(footnoteTest)
rep(paste0(sup(match(testname, testUsed))), length(pval))
else rep('', length(pval))
return(res)
}
## Note: multchoice tests always have only one type of d.f.
deg <- if(multchoice) tr$df[i] else tr$df
dof <- if(multchoice) as.character(deg) else paste(deg, collapse=',')
namefun <- specs[[tr$namefun]] ## function for typesetting stat name
statmarkup <-
if(lang == 'latex') tr$latexstat
else if(plotmath) tr$plotmathstat
else tr$statname
if(length(prtest) > 1 && 'stat' %in% prtest &&
(lang != 'plain' || plotmath)) {
if(plotmath) {
## replace "df" inside statmarkup with actual d.f.
if(length(grep('df', statmarkup)))
statmarkup <- sedit(statmarkup, 'df',
if(lang == 'latex' || length(deg) == 1) dof
else paste0('list(', dof, ')'))
} else statmarkup <- namefun(deg)
}
pval <- format.pval(pval, digits=pdig, eps=eps)
plt <- substring(pval,1,1) == '<'
if(any(plt) && lang == 'latex')
pval <- sub('<', '\\\\textless ', pval)
if(lang != 'plain') {
if(length(prtest) == 1)
paste0(
switch(prtest,
P = pval,
stat = format(round(teststat, 2)),
df = dof,
name = statmarkup),
if(footnoteTest && length(testUsed))
paste0(sup(match(testname, testUsed))))
else paste0(
if('stat' %in% prtest)
paste0(statmarkup, '=', format(round(teststat, 2))),
if(all(c('stat', 'P') %in% prtest))
(if(lang == 'html') ', ' else paste0(',', spc)),
if('P' %in% prtest) paste0('P',ifelse(plt,'','='), pval),
if(footnoteTest && length(testUsed))
paste0(sup(match(testname, testUsed))))
} else if(plotmath) {
if(length(prtest) == 1)
parse(text=switch(prtest,
P = ifelse(plt, paste0('~', 'P', pval),
paste0('~', 'P==', pval)),
stat = format(round(teststat, 2)),
dof = format(dof),
name = statmarkup))
else
parse(text=paste(if('stat' %in% prtest)
paste0('~list(',statmarkup,'==',
format(round(teststat,2))),
if(all(c('stat','P') %in% prtest)) ', ',
if('P' %in% prtest)paste0(ifelse(plt,'~P','~P=='),
pval,')')))
} else {
if(length(prtest) == 1)
switch(prtest,
P = pval,
stat = format(round(teststat, 2)),
df = dof,
name = statmarkup)
else
paste(if('stat' %in% prtest)
paste0(statmarkup, '=', format(round(teststat, 2))),
if('df' %in% prtest) paste0('d.f.=', dof),
if('P' %in% prtest) paste0('P', ifelse(plt,'','='), pval))
}
}
latex.summary.formula.reverse <-
function(object, title=first.word(deparse(substitute(object))),
digits, prn = any(n != N), pctdig=0,
what=c('%', 'proportion'),
npct=c('numerator','both','denominator','slash','none'),
npct.size='scriptsize', Nsize='scriptsize',
exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE,
middle.bold=FALSE, outer.size="scriptsize",
caption, rowlabel="",
insert.bottom=TRUE, dcolumn=FALSE, formatArgs=NULL, round=NULL,
prtest=c('P','stat','df','name'), prmsd=FALSE, msdsize=NULL,
long=dotchart, pdig=3, eps=.001, auxCol=NULL, dotchart=FALSE, ...)
{
x <- object
npct <- match.arg(npct)
vnames <- match.arg(vnames)
what <- match.arg(what)
if(is.logical(prtest) && ! prtest) prtest <- 'none'
stats <- x$stats
nv <- length(stats)
cstats <- lab <- character(0)
nn <- integer(0)
type <- x$type
n <- x$n
N <- x$N
nams <- names(stats)
labels <- x$labels
Units <- x$units
nw <- if(lg <- length(x$group.freq)) lg else 1
gnames <- names(x$group.freq)
test <- x$testresults
if(! length(test)) prtest <- 'none'
mspecs <- markupSpecs
gt1.test <-
if(all(prtest == 'none'))
FALSE
else
length(unique(sapply(test, function(a)a$testname))) > 1
if(!missing(digits)) {
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
}
if(missing(caption))
caption <- paste0("Descriptive Statistics",
if(length(x$group.label))
paste(" by",x$group.label)
else
paste0(" $(N=",x$N,")$"))
bld <- if(middle.bold) '\\bf '
else ''
cstats <- NULL
testUsed <- auxc <- character(0)
for(i in 1:nv) {
if(length(auxCol))
auxc <- c(auxc, auxCol[[1]][i])
nn <- c(nn, n[i])
nam <- if(vnames == "names") nams[i]
else labels[i]
if(prUnits && nchar(Units[i]) > 0)
nam <- paste0(nam, '~\\hfill\\tiny{',
gsub('*',' ', Units[i], fixed=TRUE),'}')
tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]]
else NULL
if(length(test) && all(prtest != 'none'))
testUsed <- unique(c(testUsed, tr$testname))
if(type[i] %in% c(1, 3)) {
cs <- formatCats(stats[[i]], nam, tr, type[i],
if(length(x$group.freq)) x$group.freq else x$n[i],
what, npct, pctdig, exclude1, long, prtest,
lang='latex', testUsed=testUsed,
npct.size=npct.size,
pdig=pdig, eps=eps,
footnoteTest=gt1.test, dotchart=dotchart, mspecs=mspecs)
nn <- c(nn, rep(NA, nrow(cs)-1))
} else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd,
prtest=prtest, formatArgs=formatArgs, round=round,
lang='latex', testUsed=testUsed,
middle.bold=middle.bold,
outer.size=outer.size, msdsize=msdsize,
pdig=pdig, eps=eps, footnoteTest=gt1.test,
mspecs=mspecs)
cstats <- rbind(cstats, cs)
if(length(auxc) && nrow(cstats) > 1)
auxc <- c(auxc, rep(NA, nrow(cs)-1))
}
lab <- dimnames(cstats)[[1]]
gl <- names(x$group.freq)
if(!length(gl))
gl <- " "
lab <- latexTranslate(lab, c(" "), c("~"), greek=TRUE)
gl <- latexTranslate(gl, greek=TRUE)
extracolheads <-
if(any(gl != " "))
c(if(prn)'', paste0('$N=', x$group.freq, '$'))
else NULL # 21jan03
if(length(test) && !all(prtest == 'none')) {
gl <- c(gl,
if(length(prtest) == 1 && prtest != 'stat')
if(prtest == 'P') 'P-value'
else prtest
else 'Test Statistic')
if(length(extracolheads)) extracolheads <- c(extracolheads,'') # 21jan03
}
dimnames(cstats) <- list(NULL,gl)
cstats <- data.frame(cstats, check.names=FALSE, stringsAsFactors=FALSE)
col.just <- rep("c",length(gl))
if(dcolumn && all(prtest != 'none') &&
gl[length(gl)] %in% c('P-value','Test Statistic'))
col.just[length(col.just)] <- '.'
if(prn) {
cstats <- data.frame(N=nn, cstats, check.names=FALSE,
stringsAsFactors=FALSE)
col.just <- c("r",col.just)
}
if(!insert.bottom)
legend <- NULL
else {
legend <- character()
if(any(type == 2)) {
legend <- paste0("\\noindent {\\", outer.size, " $a$\\ }{", bld,
"$b$\\ }{\\", outer.size,
" $c$\\ } represent the lower quartile $a$, the median $b$, and the upper quartile $c$\\ for continuous variables.",
if(prmsd) '~~$x\\pm s$ represents $\\bar{X}\\pm 1$ SD.'
else '')
}
if(prn) {
legend <- c(legend, '$N$', '~is the number of non--missing values.')
}
if(any(type == 1) && npct == 'numerator') {
legend <- c(legend, 'Numbers after percents are frequencies.')
}
if(length(testUsed))
legend <-c(legend,
if(length(testUsed) == 1)'\\noindent Test used:'
else '\\indent Tests used:',
if(length(testUsed) == 1) paste(testUsed,'test')
else paste(paste0('\\textsuperscript{\\normalfont ',
1:length(testUsed),'}',testUsed,
' test'),collapse='; '))
}
if(length(auxc)) {
if(length(auxc) != nrow(cstats))
stop(paste0('length of auxCol (',length(auxCol[[1]]),
') is not equal to number or variables in table (',
nv,').'))
auxcc <- format(auxc)
auxcc[is.na(auxc)] <- ''
cstats <- cbind(auxcc, cstats)
nax <- names(auxCol)
heads <- get2rowHeads(nax)
names(cstats)[1] <- heads[[1]]
if(length(col.just)) col.just <- c('r', col.just)
if(length(extracolheads)) extracolheads <- c(heads[2], extracolheads)
}
resp <- latex.default(cstats, title=title, caption=caption, rowlabel=rowlabel,
col.just=col.just, numeric.dollar=FALSE,
insert.bottom=legend, rowname=lab, dcolumn=dcolumn,
extracolheads=extracolheads, extracolsize=Nsize,
...)
if(dotchart)
resp$style <- unique(c(resp$style, 'calc', 'epic', 'color'))
resp
}
print.summary.formula.cross <- function(x, twoway=nvar == 2,
prnmiss=any(stats$Missing>0), prn=TRUE,
abbreviate.dimnames=FALSE,
prefix.width=max(nchar(v)),
min.colwidth, formatArgs=NULL,
...)
{
stats <- x
a <- attributes(stats)
cat("\n",a$heading,"\n\n")
attr(stats,'class') <- NULL
ylab <- attr(stats$S,"label")
nvar <- length(a$Levels)
vnames <- names(a$Levels)
nam <- c(vnames, if(prn)"N", if(prnmiss) "Missing", "S")
stats <- stats[nam]
S <- stats$S
ars <- length(dim(S)) # may always be TRUE
attr(stats,"row.names") <- rep("",length(a$row.names))
if(twoway && nvar == 2) {
V <- stats[[vnames[1]]]
H <- stats[[vnames[2]]]
v <- levels(V)
h <- levels(H)
z <- dimnames(stats$S)[[2]]
if(!length(z))
z <- ylab
z <- c(if(prn)"N",
if(prnmiss)"Missing",
z) # 5Oct00
header <- matrix(paste(z,collapse="\n"),1,1)
print.char.matrix(header, col.names=FALSE)
d <- c(length(v),length(h),length(z))
st <- array(NA, dim=d, dimnames=list(v,h,z))
cstats <- array("", dim=d, dimnames=list(v,h,z))
for(i in 1:length(V)) {
j <- V == V[i,drop=FALSE] & H == H[i,drop=FALSE]
st[V[i,drop=FALSE],H[i,drop=FALSE],] <-
c(if(prn)stats$N[j],
if(prnmiss)stats$Missing[j],
if(ars)S[j,]
else S[j])
}
for(k in 1:d[3]) {
ww <- c(list(st[,,k]), formatArgs)
cstats[,,k] <- ifelse(is.na(st[,,k]),"",do.call('format',ww))
}
dimn <- dimnames(cstats)[1:2]
names(dimn) <- vnames
cstats2 <- matrix("", nrow=d[1], ncol=d[2], dimnames=dimn)
for(i in 1:d[1]) {
for(j in 1:d[2]) {
cstats2[i,j] <- paste(cstats[i,j,], collapse="\n")
}
}
if(missing(min.colwidth))
min.colwidth <-
max(min(nchar(dimnames(cstats2)[[2]])),
min(nchar(cstats)[nchar(cstats)>0]))
return(invisible(print.char.matrix(cstats2, col.names=TRUE, ...)))
}
##print.data.frame messes up matrix names (here prefixing by S)
if(ars) {
stats$S <- NULL
snam <- dimnames(S)[[2]]
for(i in 1:ncol(S))
stats[[snam[i]]] <- S[,i]
} else names(stats)[length(stats)] <- ylab
stats <- as.data.frame(stats, stringsAsFactors=FALSE)
invisible(print(stats, ...))
}
latex.summary.formula.cross <-
function(object,
title=first.word(deparse(substitute(object))),
twoway=nvar == 2,
prnmiss=TRUE, prn=TRUE,
caption=attr(object,"heading"), vnames=c('labels','names'),
rowlabel="", ...)
{
stats <- object
vnames <- match.arg(vnames)
ul <- vnames == 'labels'
stats <- unclass(stats)
a <- attributes(stats)
ylab <- attr(stats$S,"label")
nvar <- length(a$Levels)
nam <- c(names(a$Levels),
if(prn)"N",
if(prnmiss)"Missing",
"S")
##Force lazy evaluation since stats about to change
caption <- caption;
title <- title
stats <- stats[nam]
S <- stats$S
ars <- length(dim(S))
inn <- c('cbind','c(','ALL', 'NA')
out <- c('', '(' ,'Total','Missing')
caption <- latexTranslate(caption, inn, out, pb=TRUE, greek=TRUE)
if(twoway)
rowlab <-
if(ul)
latexTranslate(a$labels[1],inn,out,pb=TRUE,greek=TRUE)
else
names(stats)[1]
rvar <- stats[[1]]
cvar <- stats[[2]]
lev1 <- levels(rvar)
lev2 <- levels(cvar)
if(!twoway) {
for(i in 1:nvar)
stats[[i]] <- latexTranslate(as.character(stats[[i]]),inn,
out,pb=TRUE,greek=TRUE)
if(ars) {
stats$S <- NULL
snam <- latexTranslate(dimnames(S)[[2]],inn,out,pb=TRUE,greek=TRUE)
for(i in 1:ncol(S))
stats[[snam[i]]] <- S[,i]
} else names(stats)[length(stats)] <- ylab
stats <- structure(stats, row.names=rep("",length(stats$N)),
class="data.frame")
if(hasArg("col.just")) {
return(latex(stats, title=title, caption=caption, rowlabel=rowlabel, ...))
} else return(latex(stats, title=title, caption=caption, rowlabel=rowlabel,
col.just=c("l","l",rep("r",length(stats)-2)), ...))
}
##Two-way
S <- cbind(N=if(prn)stats$N,
Missing=if(prnmiss && any(stats$Missing)) stats$Missing, #5Oct00
stats$S)
nr <- length(lev1)
nc <- length(lev2)
ns <- ncol(S)
snam <- dimnames(S)[[2]]
snam <- latexTranslate(snam, inn, out, pb=TRUE,greek=TRUE)
dn <-
if(ns > 1)
rep(snam, nc)
else
latexTranslate(lev2,inn,out,pb=TRUE,greek=TRUE) # 5Oct00
st <- matrix(NA, nrow=nr, ncol=nc*ns, dimnames=list(NULL,dn))
for(i in 1:nr) {
l <- 0
for(j in 1:nc) {
w <- rvar == lev1[i] & cvar == lev2[j]
if(any(w))
for(k in 1:ns) {
l <- l+1
st[i,l] <- S[w,k]
}
}
}
latex(st, title=title, caption=caption,
rowlabel=if(rowlabel == '') rowlab else rowlabel,
n.rgroup=c(nrow(st)-1,1),
n.cgroup=if(ns>1) rep(ns,nc), # ns>1 5Oct00
cgroup =if(ns>1) latexTranslate(lev2,inn,out,pb=TRUE,greek=TRUE),
check.names=FALSE,
rowname=latexTranslate(lev1,inn,out,pb=TRUE,greek=TRUE), ...)
}
##stratify is a modification of Therneau's survival4 strata function
##Saves label attributute and defaults shortlabel to T
stratify <- function(..., na.group = FALSE, shortlabel = TRUE)
{
words <- as.list((match.call())[-1])
if(!missing(na.group))
words$na.group <- NULL
if(!missing(shortlabel))
words$shortlabel <- NULL
allf <- list(...)
if(length(allf) == 1 && is.list(ttt <- unclass(allf[[1]]))) {
allf <- ttt
words <- names(ttt)
}
xlab <- sapply(allf, function(x){lab <- valueLabel(x); if(is.null(lab)) NA else lab})
xname <- sapply(allf, function(x){name <- valueName(x); if(is.null(name)) NA else name})
xname <- ifelse(is.na(xname), words, xname)
xlab <- paste(ifelse(is.na(xlab), xname, xlab), collapse=' and ')
xname <- paste(xname, collapse = ' and ')
nterms <- length(allf)
what <- allf[[1]]
if(is.null(levels(what)))
what <- factor(what)
levs <- unclass(what) - 1
wlab <- levels(what)
if(na.group && any(is.na(what))) {
levs[is.na(levs)] <- length(wlab)
wlab <- c(wlab, "NA")
}
if(shortlabel)
labs <- wlab
else labs <- paste(words[1], wlab, sep = "=")
for(i in (1:nterms)[-1]) {
what <- allf[[i]]
if(is.null(levels(what)))
what <- factor(what)
wlab <- levels(what)
wlev <- unclass(what) - 1
if(na.group && any(is.na(wlev))) {
wlev[is.na(wlev)] <- length(wlab)
wlab <- c(wlab, "NA")
}
if(!shortlabel)
wlab <- format(paste(words[i], wlab, sep = "="))
levs <- wlev + levs * (length(wlab))
labs <- paste(rep(labs, rep(length(wlab), length(labs))),
rep(wlab, length(labs)), sep = ", ")
}
levs <- levs + 1
ulevs <- sort(unique(levs[!is.na(levs)]))
levs <- match(levs, ulevs)
labs <- labs[ulevs]
levels(levs) <- labs
class(levs) <- "factor"
if(length(xlab))
valueLabel(levs) <- xlab #FEH 2Jun95
if(length(xname))
valueName(levs) <- xname
levs
}
'[.summary.formula.response' <- function(x,i,j,drop=FALSE)
{
at <- attributes(x)
at$dim <- at$dimnames <- NULL
if(!missing(j)) {
x <- unclass(x)[,j,drop=FALSE]
at$ycolname <- at$ycolname[j]
attributes(x) <- c(attributes(x), at)
}
if(missing(i))
return(x)
if(is.character(i)) {
vn <- at$vname[at$vname != '']
k <- match(i, vn, nomatch=0)
if(any(k == 0))
stop(paste('requested variables not in object:',
paste(i[k == 0],collapse=' ')))
i <- k
}
j <- integer(0)
nl <- at$nlevels
is <- 1
for(m in 1:length(nl)) {
ie <- is+nl[m]-1
if(any(i == m))
j <- c(j,is:ie)
is <- ie+1
}
at$vname <- at$vname[j]
at$vlabel <- at$vlabel[j]
at$nlevels <- at$nlevels[i]
at$labels <- at$labels[i]
x <- unclass(x)[j,,drop=FALSE]
attributes(x) <- c(attributes(x), at)
x
}
cumcategory <- function(y)
{
if(!is.factor(y))
y <- factor(y)
lev <- levels(y)
y <- unclass(y)
Y <- matrix(NA, nrow=length(y), ncol=length(lev)-1,
dimnames=list(NULL, paste0('>=', lev[-1])))
storage.mode(Y) <- 'integer'
for(i in 2:length(lev))
Y[,i-1] <- 1*(y >= i)
Y
}
summarize <- function(X, by, FUN, ...,
stat.name=deparse(substitute(X)),
type=c('variables','matrix'), subset=TRUE,
keepcolnames=FALSE)
{
type <- match.arg(type)
if(missing(stat.name) && length(stat.name) > 1) stat.name <- 'X'
if(!is.list(by)) {
nameby <- deparse(substitute(by))
bylabel <- label(by)
by <- list(by[subset])
names(by) <- if(length(nameby) == 1) nameby
else 'by'
} else {
bylabel <- sapply(by, label)
if(!missing(subset))
by <- lapply(by, function(y, subset) y[subset],
subset=subset)
}
nby <- length(by)
bylabel <- ifelse(bylabel == '', names(by), bylabel)
typical.computation <- FUN(X, ...)
nc <- length(typical.computation)
xlabel <- deparse(substitute(X))
if(length(xlabel) != 1) xlabel <- 'X'
if(length(xlab <- attr(X,'label'))) xlabel <- xlab
if(!missing(subset))
X <- if(is.matrix(X)) X[subset,,drop=FALSE]
else X[subset]
byc <- do.call('paste', c(by, sep='|'))
## split does not handle matrices
## msplit <- function(x, group) {
## if(is.matrix(x)) {
## group <- as.factor(group)
## l <- levels(group)
## res <- vector('list', length(l))
## names(res) <- l
## for(j in l) res[[j]] <- x[group==j,,drop=FALSE]
## res
## } else split(x, group)
## }
## Following was streamlined 10oct02 using the new mApply
## if(nc==1) r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE) else {
## r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE)
## r <- matrix(unlist(r), nrow=nc, dimnames=dimnames(r))
## 2Mar00: added unlist because sapply was creating an array of
## lists in S+2000
## }
r <- mApply(X, byc, FUN, ..., keepmatrix=nc > 1)
rdimn <- dimnames(r)[[1]]
## someday can use unpaste defined in Misc.s
ans <- strsplit(if(nc == 1) names(r) else rdimn, '\\|')
ans <- sapply(ans, function(x)if(length(x)) x else '')
## strsplit returns list "transpose" of unpaste
bb <- matrix(unlist(ans), nrow=nby)
ans <- vector('list', nby)
for(jj in 1:nby) ans[[jj]] <- bb[jj,]
names(ans) <- names(by)
if(nc>1 && (nc != ncol(r))) stop('program logic error')
snames <- names(typical.computation)
if(! length(snames)) snames <- paste0(stat.name, 1 : nc)
if(! keepcolnames) {
if(length(stat.name) == 1) snames[1] <- stat.name
else if(length(stat.name)) snames <- stat.name
}
oldopt <- options('warn')
options(warn = -1)
on.exit(options(oldopt))
notna <- rep(TRUE, length(ans[[1]]))
for(i in 1:length(by)) {
byi <- by[[i]]
ansi <- ans[[i]]
if(is.factor(byi)) {
if(!is.character(ansi))
stop('program logic error:ansi not character')
ansi <- factor(ansi, levels(byi))
}
else if(is.numeric(byi))
ansi <- as.numeric(ansi)
names(ansi) <- NULL
label(ansi) <- bylabel[i]
ans[[i]] <- ansi
notna <- notna & !is.na(ansi)
}
if(type == 'matrix' || nc == 1) {
ans[[stat.name]] <-
if(nc == 1)
structure(r, names=NULL)
else
structure(r, dimnames=list(NULL, snames), names=NULL)
label(ans[[stat.name]]) <- xlabel
} else {
snames <- make.names(snames)
for(i in 1:length(snames)) {
ans[[snames[i]]] <- structure(r[, i], names=NULL)
label(ans[[snames[i]]]) <- xlabel
}
}
notna <- notna & !is.na(if(nc == 1) r
else (r %*% rep(1,nc)))
ans <- structure(ans, class='data.frame',
row.names=1 : length(ans[[1]]))
## removed [notna,] from end of above line; not sure why this was needed
iorder <- do.call('order', structure(unclass(ans)[1 : nby], names=NULL))
## order can bomb if data frame given (preserves names)
ans[iorder,]
}
##Following code is based on tapply instead
if(FALSE) {
r <- as.array(tapply(x, by, FUN, ...))
dn <- dimnames(r)
wrn <- .Options$warn
.Options$warn <- -1
for(i in 1:length(by)) {
byi <- by[[i]]
if(is.numeric(byi) && !is.factor(byi)) dn[[i]] <- as.numeric(dn[[i]])
}
.Options$warn <- wrn
names(dn) <- names(by)
ans <- expand.grid(dn)
typical.computation <- FUN(x, ...)
nc <- length(typical.computation)
snames <- names(typical.computation)
if(length(snames)) snames <- paste(stat.name, snames) else
snames <- if(nc == 1) stat.name else paste(stat.name, 1 : nc)
for(i in 1 : length(r)) if(!length(r[[i]])) r[[i]] <- rep(NA, nc)
## unlist will skip positions where calculations not done (NULLs)
S <- matrix(unlist(r), ncol=length(snames),
dimnames=list(NULL, snames), byrow=TRUE)
if(type == 'matrix') {
ans$S <- S
if(stat.name != 'S') names(ans)[length(ans)] <- stat.name
} else ans <- cbind(ans, S)
ans
}
smean.cl.normal <- function(x, mult=qt((1+conf.int)/2,n-1),
conf.int=.95, na.rm=TRUE)
{
if(na.rm) x <- x[!is.na(x)]
n <- length(x)
if(n < 2)
return(c(Mean=mean(x),Lower=NA,Upper=NA))
xbar <- sum(x)/n
se <- sqrt(sum((x - xbar)^2) / n / (n-1))
c(Mean=xbar, Lower=xbar - mult*se, Upper=xbar + mult*se)
}
smean.sd <- function(x, na.rm=TRUE)
{
if(na.rm)
x <- x[!is.na(x)]
n <- length(x)
if(n == 0)
return(c(Mean=NA, SD=NA))
xbar <- sum(x)/n
sd <- sqrt(sum((x - xbar)^2)/(n-1))
c(Mean=xbar, SD=sd)
}
smean.sdl <- function(x, mult=2, na.rm=TRUE)
{
if(na.rm)
x <- x[!is.na(x)]
n <- length(x)
if(n == 0)
return(c(Mean=NA, Lower=NA, Upper=NA))
xbar <- sum(x)/n
sd <- sqrt(sum((x - xbar)^2)/(n-1))
c(Mean=xbar, Lower=xbar - mult * sd, Upper=xbar + mult * sd)
}
#S-Plus gives a parse error for R's .Internal()
#Might try not using an else to see if S still parses
smean.cl.boot <- function(x, conf.int=0.95, B=1000, na.rm=TRUE, reps=FALSE) {
if(na.rm)
x <- x[!is.na(x)]
n <- length(x)
xbar <- mean(x)
if(n < 2L)
return(c(Mean=xbar, Lower=NA, Upper=NA))
z <- unlist(lapply(seq_len(B), function(i, x, N) sum(x[sample.int(N, N, TRUE, NULL)]),
x=x, N=n)) / n
quant <- quantile(z, c((1 - conf.int)/2, (1 + conf.int)/2))
names(quant) <- NULL
res <- c(Mean=xbar, Lower=quant[1L], Upper=quant[2L])
if(reps)
attr(res, "reps") <- z
res
}
smedian.hilow <- function(x, conf.int=.95, na.rm=TRUE)
{
quant <- quantile(x, probs=c(.5,(1-conf.int)/2,(1+conf.int)/2), na.rm=na.rm)
names(quant) <- c('Median','Lower','Upper')
quant
}
asNumericMatrix <- function(x)
{
a <- attributes(x)
k <- length(a$names)
at <- vector('list', k); names(at) <- a$names
for(i in 1:k) {
xi <- x[[i]]
type <- storage.mode(xi)
A <- attributes(xi)
if(type == 'character') {
xi <- factor(xi)
A <- c(A, attributes(xi))
x[[i]] <- xi
}
A$dim <- A$names <- A$dimnames <- NULL
A$.type. <- type
at[[i]] <- A
}
resp <- matrix(unlist(x), ncol=k,
dimnames=list(a$row.names, a$names))
attr(resp, 'origAttributes') <- at
resp
}
matrix2dataFrame <- function(x, at=attr(x, 'origAttributes'), restoreAll=TRUE)
{
d <- dimnames(x)
k <- length(d[[2]])
w <- vector('list',k)
nam <- names(w) <- d[[2]]
for(i in 1 : k) {
a <- at[[nam[i]]]
type <- a$.type.
a$.type. <- NULL
xi <- x[, i]
names(xi) <- NULL
lev <- a$levels
if(restoreAll) {
if(type == 'character') {
xi <- as.character(factor(xi, 1 : length(lev), lev))
a$levels <- NULL
if(length(a$class)) a$class <- setdiff(a$class, 'factor')
}
storage.mode(xi) <- type
## R won't let something be assigned class factor by brute
## force unless it's an integer object
attributes(xi) <- a
} else {
if(length(l <- a$label)) label(xi) <- l
if(length(u <- a$units)) units(xi) <- u
if(length(lev)) {
xi <- factor(xi, 1 : length(lev), lev)
if(type == 'character') xi <- as.character(xi)
}
}
w[[i]] <- xi
}
rn <- d[[1]]
if(! length(rn)) rn <- as.character(seq(along=xi))
structure(w, class='data.frame', row.names=rn)
}
stripChart <- function(x, xlim, xlab='', pch=1,
cex.labels=par('cex'), cex.points=.5,
lcolor='gray',
grid=FALSE)
{
if(grid) sRequire('lattice')
groups <- names(x)
if(missing(xlim))
xlim <- range(unlist(x),na.rm=TRUE)
i <- integer(0)
if(grid) {
lines <- lattice::llines
points <- lattice::lpoints
segments <- lattice::lsegments
}
plot.new()
mai <- omai <- par('mai')
on.exit(par(mai=omai))
mxlab <- .3+max(strwidth(groups, units='inches', cex=cex.labels))
mai[2] <- mxlab
par(mai=mai, new=TRUE)
plot(xlim, c(.5,length(groups)+.5), xlim=xlim, xlab='', ylab='',
axes=FALSE, type='n')
box()
mgp.axis(1, axistitle=xlab)
mtext(paste(groups,''), 2, 0, at=length(groups):1,
adj=1, las=1, cex=cex.labels)
y <- 0
abline(h = 1:length(groups), lty = 1, lwd=1, col=lcolor)
for(Y in length(groups):1) {
y <- y + 1
X <- x[[y]]
if(length(X))
points(X, rep(Y, length(X)), pch=pch)
}
}
conTestkw <- function(group,x) {
st <- spearman2(group,x)
list(P = st['P'],
stat = st['F'],
df = st[c('df1','df2')],
testname = if(st['df1'] == 1) 'Wilcoxon' else 'Kruskal-Wallis',
statname = 'F',
namefun = 'fstat',
latexstat = 'F_{df}',
plotmathstat = 'F[df]')
}
catTestchisq=function(tab) {
st <-
if(!is.matrix(tab) || nrow(tab) < 2 || ncol(tab) < 2)
list(p.value=NA, statistic=NA, parameter=NA)
else {
rowcounts <- tab %*% rep(1, ncol(tab))
tab <- tab[rowcounts > 0,]
if(!is.matrix(tab))
list(p.value=NA, statistic=NA, parameter=NA)
else chisq.test(tab, correct=FALSE)
}
list(P = st$p.value,
stat = st$statistic,
df = st$parameter,
testname = 'Pearson',
statname = 'Chi-square',
namefun = 'chisq',
latexstat = '\\chi^{2}_{df}',
plotmathstat = 'chi[df]^2')
}
ordTestpo=function(group, x) {
if (!requireNamespace("rms", quietly = TRUE))
stop("This function requires the 'rms' package.")
f <- rms::lrm(x ~ group)$stats
list(P = f['P'],
stat = f['Model L.R.'],
df = f['d.f.'],
testname = 'Proportional odds likelihood ratio',
statname = 'Chi-square',
namefun = 'chisq',
latexstat = '\\chi^{2}_{df}',
plotmathstat = 'chi[df]^2')
}
Hmisc/R/reShape.s 0000644 0001762 0000144 00000005412 12243661443 013300 0 ustar ligges users reShape <- function(x, ..., id, colvar, base, reps,
times=1:reps, timevar='seqno', constant=NULL)
{
if(!missing(base)) {
if(!is.list(x))
stop('x must be a list or data frame when base is given')
repvars <- as.vector(outer(base,1:reps,paste,sep=''))
nam <- names(x)
nonrep <- nam[nam %nin% repvars]
res <- vector('list', 1+length(nonrep)+length(base))
names(res) <- c(timevar, nonrep, base)
x1 <- x[[1]]
n <- if(is.matrix(x1)) nrow(x1)
else length(x1)
res[[1]] <- rep(times[1:reps], n)
for(i in nonrep) res[[i]] <- rep(x[[i]], rep(reps,n))
## Get indexes that will put unlist() in right order
k <- as.vector(matrix(1:(reps*n), nrow=reps, byrow=TRUE))
for(i in base) {
bn <- paste(i, 1:reps, sep='')
x1 <- x[[bn[1]]]
at <- attributes(x1)
at$names <- NULL
x1 <- unlist(x[bn])[k]
if(length(at)) attributes(x1) <- at
res[[i]] <- x1
}
if(is.data.frame(x)) {
rn <- attr(x,'row.names')
ln <- length(rn)
if(ln) {
## R calls data.frame even if specify structure, and R does
## not have dup.row.names argument to data.frame as does S+
return(data.frame(res,
row.names=paste(rep(rn,rep(reps,ln)),
rep(1:reps,n))))
}
}
return(res)
}
if(is.matrix(x)) {
y <- as.vector(x)
v1 <- all.is.numeric(dimnames(x)[[1]][row(x)],'vector')
v2 <- all.is.numeric(dimnames(x)[[2]][col(x)],'vector')
w <- list(v1, v2, y)
names(w) <- c('rowvar','colvar',as.character(substitute(x)))
if(length(nd <- names(dimnames(x))))
names(w)[1:2] <- nd
w
} else {
listid <- is.list(id)
i <- as.factor(if(listid) do.call('paste', c(id, sep='~'))
else id)
colvar <- as.factor(colvar)
m <- matrix(NA, nrow=length(levels(i)), ncol=length(levels(colvar)),
dimnames=list(levels(i), levels(colvar)))
dotlist <- list(...)
if(!length(dotlist)) {
m[cbind(i, colvar)] <- x
if(listid) {
j <- match(as.character(dimnames(m)[[1]]), as.character(i))
if(length(constant))
data.frame(id[j,,drop=FALSE], constant[j,,drop=FALSE], m)
else data.frame(id[j,,drop=FALSE], m)
} else m
} else {
res <- vector('list',nx <- 1+length(dotlist))
names(res) <- (as.character(sys.call())[-1])[1:nx]
nam2 <- names(sys.call()[-1])[1:nx]
if(length(nam2))
names(res) <- ifelse(nam2=='',names(res),nam2)
w <- m;
w[cbind(i, colvar)] <- x;
res[[1]] <- w
for(j in 2:nx) {
w <- m;
w[cbind(i, colvar)] <- dotlist[[j-1]]
res[[j]] <- w
}
res
}
}
}
Hmisc/R/spower.s 0000644 0001762 0000144 00000027576 14601127715 013246 0 ustar ligges users spower <- function(rcontrol, rinterv, rcens, nc, ni,
test=logrank, cox=FALSE, nsim=500, alpha=.05, pr=TRUE)
{
crit <- qchisq(1-alpha, 1)
group <- c(rep(1,nc), rep(2,ni))
nexceed <- 0
if(cox) beta <- numeric(nsim)
maxfail <- 0; maxcens <- 0
for(i in 1:nsim) {
if(pr && i %% 10 == 0) cat(i,'\r')
yc <- rcontrol(nc)
yi <- rinterv(ni)
cens <- rcens(nc+ni)
y <- c(yc, yi)
maxfail <- max(maxfail, max(y))
maxcens <- max(maxcens, max(cens))
S <- cbind(pmin(y,cens), 1*(y <= cens))
nexceed <- nexceed + (test(S, group) > crit)
if(cox)
{
fit <- survival::coxph.fit(as.matrix(group), S, strata=NULL,
offset=NULL, init=NULL,
control=survival::coxph.control(iter.max=10, eps=.0001),
method="efron", rownames=NULL)
beta[i] <- fit$coefficients
}
}
cat('\n')
if(maxfail < 0.99*maxcens)
stop(paste('Censoring time distribution defined at later times than\nsurvival time distribution. There will likely be uncensored failure times\nstacked at the maximum allowed survival time.\nMaximum simulated failure time:', max(y),'\nMaximum simulated censoring time:', max(cens)))
power <- nexceed/nsim
if(cox) structure(list(power=power, betas=beta, nc=nc, ni=ni,
alpha=alpha, nsim=nsim), class='spower') else power
}
print.spower <- function(x, conf.int=.95, ...)
{
b <- x$betas
hr <- exp(b)
pp <- (1+conf.int)/2
cl <- quantile(hr, c((1-conf.int)/2, pp))
meanbeta <- mean(b)
medbeta <- median(b)
hrmean <- exp(meanbeta)
hrmed <- exp(medbeta)
moehi <- cl[2]/hrmed
moelo <- hrmed/cl[1]
g <- function(w) round(w, 4)
mmoe <- max(moehi, moelo)
cat('\nTwo-Group Event Time Comparison Simulation\n\n',
x$nsim,' simulations\talpha: ', x$alpha, '\tpower: ', x$power,
'\t', conf.int, ' confidence interval\n',
'\nHazard ratio from mean beta : ', g(hrmean),
'\nHazard ratio from median beta : ', g(hrmed),
'\nStandard error of log hazard ratio: ', g(sd(b)),
'\nConfidence limits for hazard ratio: ', g(cl[1]), ', ', g(cl[2]),
'\nFold-change margin of error high : ', g(moehi),
'\t(upper CL/median HR)',
'\nFold-change margin of error low : ', g(moelo),
'\t(median HR/lower CL)',
'\nMax fold-change margin of error : ', g(mmoe),'\n\n')
cat('The fold change margin of error of', g(mmoe),
'represents the margin of error\n',
'the study is likely to achieve in estimating the intervention:control\n',
'hazard ratio. It is the ratio of a', conf.int, 'confidence limit on the\n',
'hazard ratio to the median hazard ratio obtained over the', x$nsim, 'simulations.\n',
'The confidence limit was obtained by computing the', pp, 'quantile of the\n',
x$nsim, 'observed hazard ratios. The standard error is the standard deviation\n',
'of the', x$nsim, 'simulated log hazard ratios.\n\n')
res <- c(cl, hrmean, hrmed, sd(b), moelo, moehi, x$power)
names(res) <- c('CLlower','CLupper','HRmean','HRmedian','SE',
'MOElower','MOEupper','Power')
invisible(res)
}
Quantile2 <- function(scontrol, hratio,
dropin=function(times)0,
dropout=function(times)0,
m=7500, tmax, qtmax=.001, mplot=200, pr=TRUE,
...)
{
## Solve for tmax such that scontrol(t)=qtmax
dlist <- list(...)
k <- length(dlist) && !is.null(dlist)
f <- if(k) function(x, scontrol, qt, ...) scontrol(x, ...) - qt
else function(x, scontrol, qt) scontrol(x) - qt
if(missing(tmax)) {
if(k) tmax <- uniroot(f, c(0,1e9), scontrol=scontrol, qt=qtmax, ...)$root
else tmax <- uniroot(f, c(0,1e9), scontrol=scontrol, qt=qtmax)$root
}
if(pr)
cat('\nInterval of time for evaluating functions:[0,',
format(tmax),']\n\n')
## Generate sequence of times to use in all approximations and sequence
## to use for plot method
times <- seq(0, tmax, length.out=m)
tim <- seq(0, tmax, length.out=mplot)
tinc <- times[2]
## Approximate hazard function for control group
sc <- scontrol(times, ...)
hc <- diff(-logb(sc))
hc <- c(hc, hc[m-1])/tinc ## to make length=m
## hazard function for intervention group
hr <- rep(hratio(times), length.out=m)
hi <- hc*hr
## hazard for control group with dropin
di <- rep(dropin(times), length.out=m)
hc2 <- (1-di)*hc + di*hi
## hazard for intervention group with dropout
do <- rep(dropout(times), length.out=m)
hi2 <- (1-do)*hi + do*hc
## survival for intervention group
si <- exp(-tinc*cumsum(hi))
## Compute contaminated survival function for control and intervention
sc2 <- if(any(di>0))exp(-tinc*cumsum(hc2))
else sc
si2 <- exp(-tinc*cumsum(hi2))
## Store all functions evaluated at shorter times vector (tim), for
## plotting
asing <- function(x) x
sc.p <- asing(approx(times, sc, xout=tim)$y)
hc.p <- asing(approx(times, hc, xout=tim)$y)
sc2.p <- asing(approx(times, sc2, xout=tim)$y)
hc2.p <- asing(approx(times, hc2, xout=tim)$y)
si.p <- asing(approx(times, si, xout=tim)$y)
hi.p <- asing(approx(times, hi, xout=tim)$y)
si2.p <- asing(approx(times, si2, xout=tim)$y)
hi2.p <- asing(approx(times, hi2, xout=tim)$y)
dropin.p <- asing(approx(times, di, xout=tim)$y)
dropout.p <- asing(approx(times, do, xout=tim)$y)
hratio.p <- asing(approx(times, hr, xout=tim)$y)
hratio2.p <- hi2.p/hc2.p
tim <- asing(tim)
plot.info <- list("C Survival" =list(Time=tim,Survival=sc.p),
"I Survival" =list(Time=tim,Survival=si.p),
"C Survival w/Dropin" =list(Time=tim,Survival=sc2.p),
"I Survival w/Dropout" =list(Time=tim,Survival=si2.p),
"C Hazard" =list(Time=tim,Hazard=hc.p),
"I Hazard" =list(Time=tim,Hazard=hi.p),
"C Hazard w/Dropin" =list(Time=tim,Hazard=hc2.p),
"I Hazard w/Dropout" =list(Time=tim,Hazard=hi2.p),
"Dropin" =list(Time=tim,Probability=dropin.p),
"Dropout" =list(Time=tim,Probability=dropout.p),
"Hazard Ratio" =list(Time=tim,Ratio=hratio.p),
"Hazard Ratio w/Dropin+Dropout"=list(Time=tim,Ratio=hratio2.p))
## Create S-Plus functions for computing random failure times for
## control and intervention subject to dropin, dropout, and hratio
r <- function(n, what=c('control','intervention'),
times, csurvival, isurvival)
{
what <- match.arg(what)
approx(if(what=='control')csurvival
else isurvival,
times, xout=runif(n), rule=2)$y
}
asing <- function(x) x
formals(r) <- list(n=integer(0),
what=c('control','intervention'),
times=asing(times), csurvival=asing(sc2),
isurvival=asing(si2))
structure(r, plot.info=plot.info,
dropin=any(di>0), dropout=any(do>0),
class='Quantile2')
}
print.Quantile2 <- function(x, ...)
{
attributes(x) <- NULL
print(x)
invisible()
}
plot.Quantile2 <- function(x,
what=c('survival','hazard','both','drop','hratio',
'all'), dropsep=FALSE,
lty=1:4, col=1, xlim, ylim=NULL,
label.curves=NULL, ...)
{
what <- match.arg(what)
pi <- attr(x, 'plot.info')
if(missing(xlim))
xlim <- c(0,max(pi[[1]][[1]]))
dropin <- attr(x, 'dropin')
dropout <- attr(x, 'dropout')
i <- c(1,2,
if(dropin)3,
if(dropout)4)
if(what %in% c('survival','both','all')) {
if(dropsep && (dropin|dropout)) {
labcurve(pi[1:2], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
labcurve(pi[i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
} else
labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
}
if(what %in% c('hazard','both','all')) {
if(dropsep && (dropin|dropout)) {
labcurve(pi[5:6], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
labcurve(pi[4+i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim,
ylim=ylim, opts=label.curves)
} else
labcurve(pi[4+i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
}
if(what=='drop' || (what=='all' && (dropin | dropout))) {
i <- c(if(dropin)9,
if(dropout)10)
if(length(i)==0)
i <- 10
labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
}
if(what %in% c('hratio','all')) {
i <- c(11,
if(dropin|dropout) 12)
labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,
opts=label.curves)
}
invisible()
}
logrank <- function(S, group)
{
i <- is.na(S) | is.na(group)
if(any(i))
{
i <- ! i
S <- S[i,, drop=FALSE]
group <- group[i]
}
u <- sort(unique(group))
if(length(u) > 2) stop('group must have only 2 distinct values')
x <- ifelse(group == u[2], 1, 0)
y <- S[, 1]
event <- S[, 2]
# Sort all data in descending failure time order
i <- order(- y)
y <- y[i]
event <- event[i]
x <- x[i]
x <- cbind(1 - x, x, (1 - x) * event, x * event)
s <- rowsum(x, y, FALSE)
nr1 <- cumsum(s[, 1])
nr2 <- cumsum(s[, 2])
d1 <- s[,3]
d2 <- s[,4]
rd <- d1 + d2
rs <- nr1 + nr2 - rd
n <- nr1 + nr2
oecum <- d1 - rd * nr1/n
vcum <- rd * rs * nr1 * nr2 / n / n / (n - 1)
chisq <- sum(oecum) ^ 2 / sum(vcum, na.rm=TRUE)
o1 <- sum(d1)
o2 <- sum(d2)
e1 <- sum(nr1 * rd / n)
e2 <- sum(nr2 * rd / n)
hr <- (o2 / e2) / (o1 / e1)
structure(chisq, hr=hr)
}
Weibull2 <- function(times, surv)
{
z1 <- -logb(surv[1])
z2 <- -logb(surv[2])
t1 <- times[1]
t2 <- times[2]
gamma <- logb(z2/z1)/logb(t2/t1)
alpha <- z1/(t1^gamma)
g <- function(times, alpha, gamma)
{
exp(-alpha*(times^gamma))
}
formals(g) <- list(times=NULL, alpha=alpha, gamma=gamma)
g
}
# Non-working code where logrank was tried to extend to stratification
if(FALSE) {
OE <- v <- hrn <- hrd <- 0
for(strat in unique(strata))
{
j <- strata==strat
s <- rowsum(x[j,], y[j], FALSE)
nr1 <- cumsum(s[,1])
nr2 <- cumsum(s[,2])
d1 <- s[,3]
d2 <- s[,4]
rd <- d1+d2
rs <- nr1+nr2-rd
n <- nr1+nr2
oecum <- d1 - rd*nr1/n
vcum <- rd * rs * nr1 * nr2 / n / n / (n-1)
OE <- OE + sum(oecum)
v <- v + sum(vcum, na.rm=TRUE)
hrn <- hrn + sum(d1*(nr1-d1)/n)
hrd <- hrd + sum(d2*(nr2-d2)/n)
}
chisq <- OE^2 / v
hr <- hrn/hrd
}
## Function to fit a Gompertz survival distribution to two points
## The function is S(t) = exp[-(1/b)exp(a+bt)]
## Returns a list with components a and b, and a function for
## generating S(t) for a vector of times
Gompertz2 <- function(times, surv)
{
z1 <- logb(-logb(surv[1]))
z2 <- logb(-logb(surv[2]))
t1 <- times[1]
t2 <- times[2]
b <- (z2-z1)/(t2-t1)
a <- z1 + logb(b)-b*t1
g <- function(times, a, b) {
exp(-exp(a+b*times)/b)
}
formals(g) <- list(times=NULL, a=a, b=b)
g
}
Lognorm2 <- function(times, surv)
{
z1 <- qnorm(1-surv[1])
z2 <- qnorm(1-surv[2])
sigma <- logb(times[2]/times[1])/(z2-z1)
mu <- logb(times[1]) - sigma*z1
g <- function(times, mu, sigma) {
pnorm(- (logb(times) - mu) / sigma)
}
formals(g) <- list(times=NULL, mu=mu, sigma=sigma)
g
}
Hmisc/R/regexpEscape.s 0000644 0001762 0000144 00000000242 12243661443 014320 0 ustar ligges users escapeBS <- function(string) {
gsub('\\\\', '\\\\\\\\\\', string)
}
escapeRegex <- function(string) {
gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', string)
}
Hmisc/R/showPsfrag.s 0000644 0001762 0000144 00000001105 12243661443 014027 0 ustar ligges users showPsfrag <- function(filename)
{
file <- paste(as.character(substitute(filename)),'ps',sep='.')
out <- "TEMPltx"
cat('\\documentclass{article}',
'\\usepackage{graphics}',
'\\usepackage[scanall]{psfrag}',
'\\begin{document}',
paste('\\includegraphics{',file,'}',sep=''),
'\\end{document}',sep='\n', file=paste(out,'tex',sep='.'))
sys(paste('latex "\\scrollmode\\input" ',out,';dvips -o ',out,'.ps ',out,
'; gv ',out,'.ps &',
sep=''))
unlink(paste(out,c('tex','log','dvi','ps','aux','pfg'),sep='.'))
invisible()
}
Hmisc/R/spearman.test.s 0000644 0001762 0000144 00000001453 12243661443 014476 0 ustar ligges users ## Spearman correlation test (p=1) or Spearman test extended by adding
## rank(x)^2 to model (p=2)
## F Harrell 30Sep90
spearman.test <- function(x,y,p=1)
{
x <- as.numeric(x);
y <- as.numeric(y) ## 17Jul97
if(length(x)!=length(y))
stop("length of x must = length of y")
nomiss <- !is.na(x+y)
n <- sum(nomiss)
if(n<3)
stop("fewer than 3 non-missing x-y pairs")
if(!(p==1 | p==2))
stop("p must be 1 or 2")
x <- x[nomiss]
x <- rank(x)
y <- y[nomiss]
y <- rank(y)
sst <- sum((y-mean(y))^2)
if(p==2)
x <- cbind(x,x^2)
sse <- sum((lsfit(x,y)$residuals)^2)
rsquare <- 1-sse/sst
df2 <- n-p-1
fstat <- rsquare/p/((1-rsquare)/df2)
pvalue <- 1-pf(fstat,p,df2)
x <- c(rsquare,fstat,p,df2,pvalue,n)
names(x) <- c("Rsquare","F","df1","df2","pvalue","n")
x
}
Hmisc/R/qrxcenter.r 0000644 0001762 0000144 00000004053 14717115414 013723 0 ustar ligges users #' Mean-center a data matrix and QR transform it
#'
#' For a numeric matrix `x` (or a numeric vector that is automatically changed to a one-column matrix), computes column means and subtracts them from `x` columns, and passes this matrix to [base::qr()] to orthogonalize columns. Columns of the transformed `x` are negated as needed so that original directions are preserved (which are arbitrary with QR decomposition). Instead of the default `qr` operation for which sums of squares of column values are 1.0, `qrxcenter` makes all the transformed columns have standard deviation of 1.0.
#' @title qrxcenter
#' @param x a numeric matrix or vector with at least 2 rows
#' @param ... passed to [base::qr()]
#'
#' @return a list with components `x` (transformed data matrix), `R` (the matrix that can be used to transform raw `x` and to transform regression coefficients computed on transformed `x` back to the original space), `Ri` (transforms transformed `x` back to original scale except for `xbar`), and `xbar` (vector of means of original `x` columns`)
#' @export
#' @md
#'
#' @examples
#' set.seed(1)
#' age <- 1:10
#' country <- sample(c('Slovenia', 'Italy', 'France'), 10, TRUE)
#' x <- model.matrix(~ age + country)[, -1]
#' x
#' w <- qrxcenter(x)
#' w
#' # Reproduce w$x
#' sweep(x, 2, w$xbar) %*% w$R
#' # Reproduce x from w$x
#' sweep(w$x %*% w$Ri, 2, w$xbar, FUN='+')
#' # See also https://hbiostat.org/r/examples/gtrans/gtrans#sec-splinebasis
qrxcenter <- function(x, ...) {
if(! is.matrix(x)) x <- is.matrix(x)
d <- dim(x)
n <- d[1]
p <- d[2]
if(n <= 2) stop('requires n > 1')
x <- scale(x, center=TRUE, scale=FALSE)
xbar <- as.vector(attr(x, 'scaled:center'))
QR <- qr(x, ...)
Q <- qr.Q(QR)
RR <- qr.R(QR)
sgns <- sign(diag(RR))
# Each column of Q squared sums to 1; make sum of squares = n - 1
# This makes SDs 1.0
sn <- sqrt(n - 1)
x <- sweep(Q, MARGIN = 2, STATS = sgns, FUN = `*`) * sn
RR <- sweep(RR, MARGIN = 1, STATS = sgns, FUN = `*`) / sn
list(x=x, R = backsolve(RR, diag(p)), Ri = RR, xbar=xbar)
}
Hmisc/R/ggfreqScatter.r 0000644 0001762 0000144 00000011202 14764570213 014507 0 ustar ligges users ggfreqScatter <- function(x, y, by=NULL, bins=50, g=10, cuts=NULL,
xtrans = function(x) x,
ytrans = function(y) y,
xbreaks = pretty(x, 10),
ybreaks = pretty(y, 10),
xminor = NULL,
yminor = NULL,
xlab=as.character(substitute(x)),
ylab=as.character(substitute(y)),
fcolors=viridis::viridis(10),
nsize=FALSE, stick=FALSE,
html=FALSE, prfreq=FALSE, ...) {
xlab <- if(! missing(xlab)) xlab
else if(label(x) != '') label(x, plot=TRUE, html=html) else xlab
ylab <- if(! missing(ylab)) ylab
else if(label(y) != '') label(y, plot=TRUE, html=html) else ylab
nx <- is.numeric(x); ny <- is.numeric(y)
xbreaks <- if(nx) xbreaks; ybreaks <- if(ny) ybreaks
bins <- rep(bins, length.out=2)
bypres <- length(by) > 0
if(! bypres) by <- rep(0, times=length(x))
i <- ! (is.na(x) | is.na(y))
x <- xtrans(x[i]); y <- ytrans(y[i])
by <- by[i]
if(nx) {
rx <- range(x)
sx <- diff(rx) / bins[1]
x <- rx[1] + sx * round((x - rx[1]) / sx)
}
if(ny) {
ry <- range(y)
sy <- diff(ry) / bins[2]
y <- ry[1] + sy * round((y - ry[1]) / sy)
}
k <- as.data.frame(table(by, x, y))
k <- k[k$Freq > 0, ]
if(nx) k$x <- as.numeric(as.character(k$x))
if(ny) k$y <- as.numeric(as.character(k$y))
if(prfreq) print(table(k$Freq))
if(stick) {
if(! ny) stop('stick=TRUE only works with numeric y')
Y <- k$y
f <- k$Freq
m <- max(f)
z <- 1.15 * m / sy
k$y1 <- Y - f / z / 2
k$y2 <- Y + f / z / 2
k$y3 <- ifelse(f == m, NA, Y - m / z / 2)
k$y4 <- ifelse(f == m, NA, Y - f / z / 2)
k$y5 <- ifelse(f == m, NA, Y + f / z / 2)
k$y6 <- ifelse(f == m, NA, Y + m / z / 2)
w <- ggplot(k, aes(x=x, y=y, label=.data$Freq)) +
geom_segment(aes(x=x, y=y1, xend=x, yend=y2, color=I('black')), data=k) +
geom_segment(aes(x=x, y=y3, xend=x, yend=y4, color=I('lightgray')), data=k) +
geom_segment(aes(x=x, y=y5, xend=x, yend=y6, color=I('lightgray')), data=k) +
xlab(xlab) + ylab(ylab) +
labs(caption=paste0('Maximum frequency:', m))
if(bypres) w <- w + facet_wrap(~ by)
return(w)
}
if(g == 0) {
w <- if(nsize)
ggplot(k, aes(x=x, y=y, size=.data$Freq ^ 0.25, label=.data$Freq)) +
geom_point(...) +
scale_size_continuous() +
xlab(xlab) + ylab(ylab) +
guides(size = guide_legend(title='Frequency'))
else
ggplot(k, aes(x=x, y=y, label=.data$Freq,
color=.data$Freq ^ 0.25)) +
geom_point(...) +
scale_color_gradientn(colors=fcolors) +
guides(alpha = FALSE,
color = guide_legend(title='Frequency')) +
xlab(xlab) + ylab(ylab)
if(bypres) w <- w + facet_wrap(~ by)
return(w)
}
k$fg <- if(length(cuts)) cut2(k$Freq, cuts=cuts) else cut2(k$Freq, g=g)
ufreq <- sort(unique(k$Freq))
few <- length(ufreq) <= 15
brn <- if(few) ufreq else unique(quantile(k$Freq, seq(0, g) / g))
w <- if(nsize)
ggplot(k, aes(x=x, y=y, size=.data$Freq ^ 0.25, label=.data$Freq)) +
geom_point(...) +
scale_size_continuous(breaks=brn ^ 0.25, labels=round(brn)) +
xlab(xlab) + ylab(ylab) +
guides(size = guide_legend(title='Frequency'))
else
ggplot(k, aes(x=x, y=y, label=.data$Freq,
color=if(few) .data$Freq else as.integer(fg))) + # k$Freq
geom_point(...) +
scale_color_gradientn(colors=fcolors,
breaks=if(few) ufreq else 1 : length(levels(k$fg)),
labels=if(few) ufreq else levels(k$fg)) +
guides(alpha = 'none',
color = guide_legend(title='Frequency')) +
xlab(xlab) + ylab(ylab)
if(nx) w <- w + scale_x_continuous(breaks=xtrans(xbreaks),
labels=format(xbreaks),
minor_breaks=if(length(xminor))
xtrans(xminor))
if(ny) w <- w + scale_y_continuous(breaks=ytrans(ybreaks),
labels=format(ybreaks),
minor_breaks=if(length(yminor))
ytrans(yminor))
if(bypres) w <- w + facet_wrap(~ by)
w
}
utils::globalVariables(c('fg','y1','y2','y3','y4','y5','y6'))
Hmisc/R/minor.tick.s 0000644 0001762 0000144 00000002402 12715617475 013774 0 ustar ligges users minor.tick <- function (nx = 2, ny = 2, tick.ratio = 0.5, x.args = list(), y.args = list()) {
ax <- function(w, n, tick.ratio, add.args) {
range <- par("usr")[if (w == "x") 1 : 2 else 3 : 4]
tick.pos <- if (w == "x") par("xaxp") else par("yaxp")
distance.between.minor <- (tick.pos[2] - tick.pos[1])/tick.pos[3]/n
possible.minors <- tick.pos[1] - (0 : 100) * distance.between.minor
low.candidates <- possible.minors >= range[1]
low.minor <- if (any(low.candidates))
min(possible.minors[low.candidates])
else
tick.pos[1]
possible.minors <- tick.pos[2] + (0 : 100) * distance.between.minor
hi.candidates <- possible.minors <= range[2]
hi.minor <- if (any(hi.candidates))
max(possible.minors[hi.candidates])
else
tick.pos[2]
axis.args <- c(list(if (w == "x") 1 else 2,
seq(low.minor, hi.minor, by = distance.between.minor),
labels = FALSE, tcl = par("tcl") * tick.ratio),
add.args);
do.call(axis, axis.args);
}
if (nx > 1)
ax("x", nx, tick.ratio = tick.ratio, x.args)
if (ny > 1)
ax("y", ny, tick.ratio = tick.ratio, y.args)
invisible()
}
Hmisc/R/list.tree.s 0000644 0001762 0000144 00000007726 13067145734 013641 0 ustar ligges users list.tree <- function(struct,depth=-1, numbers=FALSE, maxlen=22,
maxcomp=12, attr.print=TRUE, front="",
fill=". ", name.of, size=TRUE)
{
if(depth==0)
return()
opts <- options('digits')
options(digits=5)
on.exit(options(opts))
if (missing(name.of))
name.of <- deparse(substitute(struct))
len <- length(struct)
cat(front,name.of,"=",storage.mode(struct),len)
if(size)
cat(" (",object.size(struct)," bytes)",sep="")
if(is.array(struct))
cat("=",
if(length(dimnames(struct)))
"named",
"array",paste(dim(struct),collapse=" X "))
if(is.ts(struct)) cat("= time series",tsp(struct))
if(is.factor(struct))
cat("= factor (",length(levels(struct))," levels)",sep="")
if(length(attr(struct,'class'))>0)
cat("(",attr(struct,'class'),")")
if(is.atomic(struct) && !is.character(struct)&& len>0 && maxlen>0) {
field <- "="
for(i in 1:length(struct)) {
field <- paste(field,format(as.vector(struct[i])))
if(nchar(field)>maxlen-6) {
field <- paste(field,"...");
break
}
}
cat(field,"\n",sep="")
} else if(is.character(struct) && len>0 && maxlen>0)
cat("=",substring(struct[1:(last <- max(1,(1:len)
[cumsum(nchar(struct)+1)0) {
structnames <- names(struct)
if(!length(structnames))
structnames <- rep("",len)
noname <- structnames==""
structnames[noname] <-
paste("[[",(1:length(structnames))[noname],"]]",sep="")
for (i in 1:min(length(structnames),maxcomp))
if (mode(struct[[i]])=="argument" | mode(struct[[i]])=="unknown")
cat(front,fill," ",structnames[i]," = ",
as.character(struct[[i]])[1],"\n",sep="")
else
list.tree(struct[[i]],depth=depth-1,numbers,maxlen,maxcomp,
attr.print,
if(numbers)
paste(front,i,sep=".")
else paste(front,fill,sep=""),
fill,structnames[i],size=FALSE)
if(length(structnames)>maxcomp)
cat(front,fill," ... and ",length(structnames)-maxcomp,
" more\n",sep="")
}
attribs <- attributes(struct)
attribnames <- names(attribs)
if(length(attribnames)>0 && attr.print)
for (i in (1:length(attribnames))
[attribnames!="dim" & attribnames!="dimnames" &
attribnames!="levels" & attribnames!="class" &
attribnames!="tsp" &
(attribnames!="names" | mode(struct)!="list")])
list.tree(attribs[[i]],depth-1,numbers,maxlen,maxcomp,attr.print,
if(numbers)
paste(front,i,sep="A")
else paste(front,"A ",sep=""),
fill,attribnames[i],size=FALSE)
invisible()
}
##############################################################################
expr.tree <- function(struct,front="",fill=". ",name.of,numbers=FALSE,depth=-1,
show.comment=FALSE)
{
if (missing(name.of))
name.of <- deparse(substitute(struct))
else if(is.atomic(struct) | is.name(struct))
name.of <- paste(name.of,deparse(struct))
cat(front,"",name.of,"=",mode(struct),length(struct),"\n")
if(depth!=0 && is.recursive(struct) ) {
structlength <- length(struct)
structnames <- names(struct)
if(length(structnames)==0)
structnames <- rep("",structlength)
if(structlength>0)
for (i in 1:length(structnames)) {
if((mode(struct[[i]])!="missing" || is.function(struct)) &&
(mode(struct[[i]])!="comment" || show.comment))
expr.tree(struct[[i]],
if(numbers)
paste(front,i,sep=".")
else paste(front,fill,sep=""),
fill,structnames[i],numbers,"depth"=depth-1)
}
}
invisible(character(0))
}
Hmisc/R/show.pch.s 0000644 0001762 0000144 00000003541 12266127133 013441 0 ustar ligges users show.pch <- function(object=par('font'))
{
plot(0,0,xlim=c(-1,11),ylim=c(0,26),type='n',axes=FALSE,xlab='',ylab='')
j <- -1
for(i in 0:253) {
if(i %% 25==0) {
j <- j+1;
k <- 26
}
k <- k-1
points(j, k, pch=i, font=object)
text(j+.45, k, i)
}
invisible()
}
character.table <- function(font=1)
{
## Prints numeric equivalents to all latin characters
## Usage: graphsheet(orientation = "portrait")
## character.table()
## Print the resulting graphsheet. The printed version doesn't allways
## corresponds to the screen display. The character on line "xy" and column "z"
## of the table has code "xyz".
## These codes can be used as any other characters. e.g.
## title("\347\340 et \340")
## As the command line window of Splus can't print special characters
## cat("\347\340 et \340")
## will not print the special characters, at least under 4.5 and under 2000.
##
## Author:
## Pierre Joyet / Aktuariat pierre.joyet@bluewin.ch
v <- 40:377
v <- v[v %% 100 < 80 & v %% 10 < 8]
opar <- par(mar = c(5, 5, 4, 2) + 0.1, xpd=NA)
plot(0:7, seq(4, 31, length = 8), type = "n", axes = FALSE, xlab = "",
ylab = "")
k <- 1
for(i in 4:31)
for(j in 0:7) {
text(j, 35 - i, eval(parse(text = paste("\"\\", v[k], "\"",
sep = ""))), font = font)
k <- k + 1
}
text(0:7, rep(33, 7), as.character(0:7), font = 3)
text(rep(-1, 28), 31:4, as.character(c(4:7, 10:17, 20:27, 30:37)),
font = 3)
par(opar)
invisible()
}
show.col <- function(object=NULL)
{
plot(0,0,xlim=c(-1,10),ylim=c(0,10),type='n',axes=FALSE,xlab='',ylab='')
j <- -1
for(i in 0:99) {
if(i %% 10==0) {
j <- j+1;
k <- 10
}
k <- k-1
points(j, k, pch=15, col=i, cex=3)
text(j+.45, k, i)
}
invisible()
}
Hmisc/R/hashCheck.r 0000644 0001762 0000144 00000014610 14373545216 013576 0 ustar ligges users ##' Check for Changes in List of Objects
##'
##' Given an RDS file name and a list of objects, does the following:
##' * makes a vector of hashes, one for each object. Function objects are run through `deparse` so that the environment of the function will not be considered.
##' * see if the file exists; if not, return a list with result=NULL, `hash` = new vector of hashes, `changed='All'`
##' * if the file exists, read the file and its hash attribute as `prevhash`
##' * if `prevhash` is not identical to hash:
##' if `.print.=TRUE` (default), print to console a summary of what's changed
##' return a list with result=NULL, `hash` = new hash vector, changed
##' * if `prevhash = hash`, return a list with result=file object, `hash`=new hash, changed=''
##'
##' Set `options(debughash=TRUE)` to trace results in `/tmp/debughash.txt`
##' @title hashCheck
##' @param ... a list of objects including data frames, vectors, functions, and all other types of R objects that represent dependencies of a certain calculation
##' @param file name of file in which results are stored
##' @param .print. set to `FALSE` to suppress printing information messages about what has changed
##' @param .names. vector of names of original arguments if not calling `hashCheck` directly
##' @return a `list` with elements `result` (the computations), `hash` (the new hash), and `changed` which details what changed to make computations need to be run
##' @author Frank Harrell
##' @md
hashCheck <- function(..., file, .print.=TRUE, .names.=NULL) {
.d. <- list(...)
.nam. <- if(length(.names.)) .names. else as.character(sys.call())[-1]
.nam. <- .nam.[1 : length(.d.)]
names(.d.) <- .nam.
.debug. <- length(.Options$debughash) && .Options$debughash
ct <- if(.debug.)
function(...) cat(..., '\n', file='/tmp/debughash.txt', append=TRUE)
else
function(...) {}
if(! requireNamespace('digest', quietly=TRUE))
stop('must install digest package to use hashCheck or runifChanged')
ct(.nam.)
.g. <- function(x) digest::digest(if(is.function(x)) deparse(x) else x)
.hash. <- sapply(.d., .g.)
if(.debug.) prn(.hash., file='/tmp/debughash.txt')
.prevhash. <- NULL
if(! file.exists(file)) {
ct('no file', file)
return(list(result=NULL, hash=.hash., changed='All'))
}
R <- readRDS(file)
.prevhash. <- attr(R, 'hash')
if(! length(.prevhash.)) {
if(.print.) cat('\nRe-run because of no previous hash\n\n')
ct('no previous hash')
return(list(result=NULL, hash=.hash., changed='No previous hash'))
}
samelen <- length(.hash.) == length(.prevhash.)
if(samelen && all(.hash. == .prevhash.)) {
ct('no change')
return(list(result=R, hash=.hash., changed=''))
}
.s. <- character(0)
if(! samelen) {
.a. <- names(.prevhash.)
.b. <- names(.hash.)
.w. <- setdiff(.a., .b.)
if(length(.w.))
.s. <- c(.s., paste('objects removed:',
paste(.w., collapse=' ')))
.w. <- setdiff(.b., .a.)
if(length(.w.))
.s. <- c(.s., paste('objects added:',
paste(.w., collapse=' ')))
} else
.s. <- c(.s., paste('changes in the following objects:',
paste(.nam.[.hash. != .prevhash.], collapse=' ')))
.s. <- paste(.s., collapse=';')
ct(.s.)
if(.print.) cat('\nRe-run because of', .s., '\n\n')
list(result=NULL, hash=.hash., changed=.s.)
}
##' Re-run Code if an Input Changed
##'
##' Uses `hashCheck` to run a function and save the results if specified inputs have changed, otherwise to retrieve results from a file. This makes it easy to see if any objects changed that require re-running a long simulation, and reports on any changes. The file name is taken as the chunk name appended with `.rds` unless it is given as `file=`. `fun` has no arguments. Set `.inclfun.=FALSE` to not include `fun` in the hash check (for legacy uses). The typical workflow is as follows.
##' ```
##' f <- function( ) {
##' # . . . do the real work with multiple function calls ...
##' }
##' seed <- 3
##' set.seed(seed)
##' w <- runifChanged(f, seed, obj1, obj2, ....)
##' ```
##' `seed, obj1, obj2`, ... are all the objects that `f()` uses that if changed
##' would give a different result of `f()`. This can include functions such as
##' those in a package, and `f` will be re-run if any of the function's code
##' changes. `f` is also re-run if the code inside `f` changes.
##' The result of `f` is stored with `saveRDS` by default in file named `xxx.rds`
##' where `xxx` is the label for the current chunk. To control this use instead
##' `file=xxx.rds` add the file argument to `runifChanged(...)`. If nothing has
##' changed and the file already exists, the file is read to create the result
##' object (e.g., `w` above). If `f()` needs to be run, the hashed input objects
##' are stored as attributes for the result then the enhanced result is written to the file.
##'
##' See [here](https://hbiostat.org/rflow/caching.html) for examples.
##'
##' @title runifChanged
##' @param fun the (usually slow) function to run
##' @param ... input objects the result of running the function is dependent on
##' @param file file in which to store the result of `fun` augmented by attributes containing hash digests
##' @param .print. set to `TRUE` to list which objects changed that neessitated re-running `f`
##' @param .inclfun. set to `FALSE` to not include `fun` in the hash digest, i.e., to not require re-running `fun` if only `fun` itself has changed
##' @return the result of running `fun`
##' @author Frank Harrell
##' @md
runifChanged <- function(fun, ..., file=NULL, .print.=TRUE, .inclfun.=TRUE) {
if(! length(file)) {
file <- knitr::opts_current$get('label')
if(! length(file))
stop('attempt to run runifChanged without file= outside a knitr chunk')
file <- paste0(file, '.rds')
}
w <- list(...)
.names. <- (as.character(sys.call())[-1])[1 : (length(w) + 1)]
hashobj <- if(! .inclfun.) hashCheck(..., file=file,
.print.=.print., .names.=.names.[-1])
else {
w <- c(list(fun), w)
w$file <- file
w$.print. <- .print.
w$.names. <- .names.
do.call(hashCheck, w)
}
hash <- hashobj$hash
result <- hashobj$result
if(! length(result)) {
result <- fun()
attr(result, 'hash') <- hash
saveRDS(result, file, compress='xz')
}
result
}
Hmisc/R/curveRep.s 0000644 0001762 0000144 00000034113 14400362564 013503 0 ustar ligges users curveRep <- function(x, y, id, kn=5, kxdist=5, k=5, p=5, force1=TRUE,
metric=c('euclidean','manhattan'),
smooth=FALSE, extrap=FALSE, pr=FALSE) {
metric <- match.arg(metric)
id <- as.character(id)
omit <- is.na(x + y)
missfreq <- NULL; nomit <- sum(omit)
if(nomit) {
m <- tapply(omit, id, sum)
missfreq <- table(m)
x <- x[!omit]; y <- y[!omit]; id <- id[!omit]
}
n <- length(x)
ns <- table(id)
nunique <- length(unique(ns))
if(nunique==1 || nunique <= kn) ncuts <- c(sort(unique(ns)),Inf) else {
grouped.n <- cut2(ns, g=kn)
ncuts <- cut2(ns, g=kn, onlycuts=TRUE)
if(force1 && ncuts[2] > 1 && min(ns)==1)
ncuts <- sort(unique(c(1:2, ncuts)))
}
nlev <- length(ncuts)-1
res <- vector('list', nlev)
names(res) <- as.character(ncuts[-length(ncuts)])
clust <- function(x, k)
if(diff(range(x))==0 || NROW(x) < k+1) rep(1, NROW(x)) else
clara(x, k, metric=metric)$clustering
interp <- if(extrap)
function(x, y=NULL, xout) approxExtrap(x, y, xout=xout)$y else
function(x, y=NULL, xout) approx(x, y, xout=xout, rule=2)$y
## Cluster by sample size first
if(pr) cat('Creating',nlev,'sample size groups\n\n')
for(i in 1:nlev) {
## Get list of curve ids in this sample size group
if(i==nlev) {
below <- ns <= ncuts[i+1]
brack <- ']'
} else {
below <- ns < ncuts[i+1]
brack <- ')'
}
ids <- names(ns)[ns >= ncuts[i] & below]
if(pr) cat('Processing sample size [',ncuts[i],',',ncuts[i+1],
brack,' containing ', length(ids),' curves\n',sep='')
if(length(ids) < kxdist) res[[i]] <- list(ids) else {
## Cluster by distribution of x within sample size group
## Summarize these ids by clustering on range of x,
## plus the largest gap if minimum sample size > 2
## Use only the x position is min sample size is 1
s <- id %in% ids
ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))
z <- tapply((1:n)[s], id[s],
function(j) if(ssize==1) x[j][1] else
if(ssize==2) range(x[j]) else
c(range(x[j]),max(diff(sort(x[j])))))
z <- matrix(unlist(z), nrow=length(z), byrow=TRUE)
if(kxdist > nrow(z) - 1)
stop('number of curves to cluster must be >= kxdist+1')
distclusters <- clust(z, kxdist)
if(pr) {
cat(' Number of curves in each x-dist cluster:\n')
print(table(distclusters))
}
resi <- list()
## Within x distribution and within sample size interval,
## cluster on linearly interpolated y at p equally spaced x points
## unless <2 unique x-points for some curve
for(clus in 1:max(distclusters)) {
idc <- ids[distclusters==clus]
if(pr) cat(' Processing x-distribution group', clus,
'containing', length(idc),'curves\n')
s <- id %in% idc
ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))
if(ssize > 1) {
xrange <- range(x[s])
xseq <- seq(xrange[1], xrange[2], length.out=p)
}
g <- if(ssize==1) function(j) c(mean(x[j]), mean(y[j])) else
if(smooth && ssize > 2)
function(j) interp(clowess(x[j],y[j]), xout=xseq) else
function(j) interp(x[j], y[j], xout=xseq)
z <- tapply((1:n)[s], id[s], g)
z <- matrix(unlist(z), nrow=length(idc), byrow=TRUE)
yclusters <- clust(z, min(k, max(length(idc)-2,1)))
names(yclusters) <- idc
resi[[clus]] <- yclusters
}
res[[i]] <- resi
}
}
structure(list(res=res, ns=table(ns), nomit=nomit, missfreq=missfreq,
ncuts=ncuts, kn=kn, kxdist=kxdist, k=k, p=p,
smooth=smooth, x=x, y=y, id=id),
class='curveRep')
}
print.curveRep <- function(x, ...) {
sm <- if(x$smooth) 'smooth' else 'not smoothed'
ncuts <- x$ncuts
cat('kn:',x$kn, ' kxdist:',x$kxdist, ' k:',x$k,
' p:',x$p, ' ', sm, '\n\n', sep='')
cat('Frequencies of number of non-missing values per curve:\n')
print(x$ns)
if(length(x$missfreq)) {
cat(x$nomit, 'missing values excluded.\n\n')
cat('\nFrequency of number of missing values per curve:\n')
print(x$missfreq)
}
cat('\nSample size cuts:', paste(ncuts, collapse=' '),'\n')
cat('Number of x distribution groups per sample size group:',
paste(sapply(x$res, length), collapse=' '),'\n\n')
res <- x$res
ng <- length(res)
for(i in 1:ng) {
ngroup <- res[[i]]
maxclus <- max(unlist(ngroup))
w <- matrix(NA, nrow=maxclus, ncol=length(ngroup),
dimnames=list(paste('Cluster',1:maxclus),
paste('x-Dist', 1:length(ngroup))))
j <- 0
for(xdistgroup in ngroup) {
j <- j+1
w[,j] <- tabulate(xdistgroup, nbins=maxclus)
}
brack <- if(i==ng) ']' else ')'
z <- if(is.infinite(ncuts[i+1])) ncuts[i] else
paste('[', ncuts[i], ',', ncuts[i+1], brack, sep='')
cat('\nNumber of Curves for Sample Size ', z, '\n',sep='')
print(w)
}
invisible()
}
plot.curveRep <- function(x, which=1:length(res),
method=c('all','lattice','data'),
m=NULL, probs=c(.5,.25,.75),
nx=NULL, fill=TRUE,
idcol=NULL, freq=NULL, plotfreq=FALSE,
xlim=range(x), ylim=range(y),
xlab='x', ylab='y', colorfreq=FALSE, ...) {
method <- match.arg(method)
retdat <- FALSE
if(method == 'data') {
retdat <- TRUE
method <- 'lattice'
sRequire('lattice')
}
ncuts <- x$ncuts
res <- x$res; id <- x$id; y <- x$y; k <- x$k; x <- x$x
nng <- length(res)
samp <- function(ids)
if(!length(m) || is.character(m) ||
length(ids) <= m) ids else sample(ids, m)
if(is.character(m) &&
(m != 'quantiles' || method != 'lattice'))
stop('improper value of m')
if(method=='lattice') {
if(length(which) != 1)
stop('must specify one n range to plot for method="lattice" or "data"')
nres <- names(res)
nname <- if(length(nres)==1) NULL else
if(nres[which]=='1' & nres[which+1]=='2') 'n=1' else {
brack <- if(which==length(nres)) ']' else ')'
z <- if(is.infinite(ncuts[which+1])) ncuts[which] else
paste('[',ncuts[which],',',ncuts[which+1],brack,sep='')
paste('n ',z, sep='')
}
res <- res[[which]]
n <- length(x)
X <- Y <- xdist <- cluster <- sizecluster <- numeric(n)
curve <- character(n)
if(length(freq)) {
unique.cats <- unique(freq)
Freqtab <- matrix(0, nrow=n, length(unique.cats),
dimnames=list(NULL, unique.cats))
}
st <- 1
for(jx in 1:length(res)) {
xgroup <- res[[jx]]
ids <- names(xgroup)
for(jclus in 1:max(xgroup)) {
all.ids.in.cluster <- ids[xgroup==jclus]
if(length(freq)) {
freqtab <- table(freq[all.ids.in.cluster])
nfreqtab <- names(freqtab)
}
plotted.ids.in.cluster <- samp(all.ids.in.cluster)
for(cur in plotted.ids.in.cluster) {
s <- id %in% cur
np <- sum(s)
i <- order(x[s])
en <- st+np-1
if(en > n) stop('program logic error 1')
X[st:en] <- x[s][i]
Y[st:en] <- y[s][i]
xdist[st:en] <- jx
cluster[st:en] <- jclus
curve[st:en] <- cur
sizecluster[st:en] <- sum(xgroup==jclus)
if(length(freq)) Freqtab[st:en, nfreqtab] <- rep(freqtab, each=np)
st <- st+np
}
}
}
Y <- Y[1:en]; X <- X[1:en]
distribution <- xdist[1:en]; cluster <- cluster[1:en]
curve <- curve[1:en]; sizecluster <- sizecluster[1:en]
if(length(freq)) Freqtab <- Freqtab[1:en,,drop=FALSE]
textfun <- function(subscripts, groups=NULL) {
if(!length(subscripts)) return()
txt <- if(length(freq) && length(groups)) {
tab <- Freqtab[subscripts[1],]
if(plotfreq) {
mx <- max(Freqtab, na.rm=TRUE)
f <- mx/(.1*plotfreq)
y <- 1
fnam <- names(tab)
long <- fnam[nchar(fnam)==max(nchar(fnam))][1]
lx <- convertX(unit(1, 'strwidth', long), 'npc', valueOnly=TRUE)
for(i in 1:length(tab)) {
y <- y - .075
grid.text(fnam[i], x=lx-.005, y=y+.025, just=c(1,.5),
gp=gpar(fontsize=7, col=gray(.4)))
if(tab[i] > 0)
grid.polygon(x=c(lx, lx+tab[i]/f, lx+tab[i]/f, lx, lx),
y=c(y, y, y+.05, y+.05, y),
gp=gpar(fill=gray(.7), col=gray(.7)))
if(tab[i]==mx)
grid.text(mx, x=lx+mx/f + .01, y=y+.025,
just=c(0,.5), gp=gpar(fontsize=7, col=gray(.4)))
}
return()
}
txt <- paste(names(tab), tab, sep=':')
txt2 <- txt
paste(txt, collapse=';')
} else {
size <- sizecluster[subscripts[1]]
paste('N=',size,sep='')
}
if (!colorfreq | is.null(idcol) | is.null(freq)) #Do same as original
{
grid.text(txt, x = 0.005, y = 0.99, just = c(0, 1),
gp = gpar(fontsize = 9, col = gray(0.25)))
}
else #color freq text using idcol
{
mycolors<-data.frame(idcol,freq)
mycolors<-unique(mycolors)
curtext<-txt2[1]
curtrt<-strsplit(curtext,':')[[1]][1]
curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] )
grid.text(curtext,
x = 0.005,
y = 0.99,
just = c(0, 1),
gp = gpar(fontsize = 9, col =curcol)
)
emspace<-2*strwidth('M', units="figure")
for (i in 2:length(txt2))
{
curtext<-txt2[i]
curtrt<-strsplit(curtext,':')[[1]][1]
curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] )
grid.text(curtext,
x = emspace+strwidth(txt2[i-1], units="figure") ,
y = 0.99, just = c(0, 1),
gp = gpar(fontsize = 9, col = curcol)
)
}
}
}
pan <- if(length(idcol))
function(x, y, subscripts, groups, type, ...) {
groups <- as.factor(groups)[subscripts]
textfun(subscripts, groups)
for(g in levels(groups)) {
idx <- groups == g
xx <- x[idx]; yy <- y[idx]; ccols <- idcol[g]
if (any(idx)) {
switch(type,
p = lattice::lpoints(xx, yy, col = ccols),
l = lattice::llines(xx, yy, col = ccols),
b = { lattice::lpoints(xx, yy, col = ccols)
lattice::llines(xx, yy, col = ccols) })
}
}
} else function(x, y, subscripts, groups, ...) {
lattice::panel.superpose(x, y, subscripts, groups, ...)
textfun(subscripts, groups)
}
if(retdat) return(data.frame(x=X, y=Y, distribution, cluster,
curve=curve, ninterval=nname))
if(is.character(m))
print(xYplot(Y ~ X | distribution*cluster,
method='quantiles', probs=probs, nx=nx,
xlab=xlab, ylab=ylab,
xlim=xlim, ylim=ylim,
main=nname, as.table=TRUE,
panel=function(x, y, subscripts, ...) {
if(length(subscripts)) {
panel.xYplot(x, y, subscripts, ...)
textfun(subscripts)
}
})) else
print(lattice::xyplot(Y ~ X | distribution*cluster, groups=curve,
xlab=xlab, ylab=ylab,
xlim=xlim, ylim=ylim,
type=if(nres[which]=='1')'b' else 'l',
main=nname, panel=pan, as.table=TRUE))
return(invisible())
}
for(jn in which) {
ngroup <- res[[jn]]
for(jx in 1:length(ngroup)) {
xgroup <- ngroup[[jx]]
ids <- names(xgroup)
for(jclus in 1:max(xgroup)) {
rids <- ids[xgroup==jclus]
nc <- length(rids)
ids.in.cluster <- samp(rids)
for(curve in 1:length(ids.in.cluster)) {
s <- id %in% ids.in.cluster[curve]
i <- order(x[s])
type <- if(length(unique(x[s]))==1)'b' else 'l'
if(curve==1) {
plot(x[s][i], y[s][i], xlab=xlab, ylab=ylab,
type='n', xlim=xlim, ylim=ylim)
brack <- if(jn==nng) ']' else ')'
z <- if(is.infinite(ncuts[jn+1])) ncuts[jn] else
paste('[', ncuts[jn],',',ncuts[jn+1],brack,sep='')
title(paste('n ', z, ' x=',jx,
' c=',jclus,' ',nc,' curves', sep=''), cex=.5)
}
lines(x[s][i], y[s][i], type=type,
col=if(length(idcol))
idcol[ids.in.cluster[curve]] else curve)
}
}
if(fill && max(xgroup) < k)
for(i in 1:(k - max(xgroup)))
plot(0, 0, type='n', axes=FALSE, xlab='', ylab='')
}
}
}
curveSmooth <- function(x, y, id, p=NULL, pr=TRUE) {
omit <- is.na(x + y)
if(any(omit)) {
x <- x[!omit]; y <- y[!omit]; id <- id[!omit]
}
uid <- unique(id)
m <- length(uid)
pp <- length(p)
if(pp) {
X <- Y <- numeric(p*m)
Id <- rep(id, length.out=p*m)
}
st <- 1
en <- 0
ncurve <- 0
for(j in uid) {
if(pr) {
ncurve <- ncurve + 1
if((ncurve %% 50) == 0) cat(ncurve,'')
}
s <- id==j
xs <- x[s]
ys <- y[s]
if(length(unique(xs)) < 3) {
if(pp) {
en <- st + length(xs) - 1
X[st:en] <- xs
Y[st:en] <- ys
Id[st:en] <- j
}
} else {
if(pp) {
uxs <- sort(unique(xs))
xseq <- if(length(uxs) < p) uxs else
seq(min(uxs), max(uxs), length.out=p)
ye <- approx(clowess(xs, ys), xout=xseq)$y
n <- length(xseq)
en <- st + n - 1
X[st:en] <- xseq
Y[st:en] <- ye
Id[st:en] <- j
} else y[s] <- approx(clowess(xs, ys), xout=xs)$y
}
st <- en + 1
}
if(pr) cat('\n')
if(pp) {
X <- X[1:en]
Y <- Y[1:en]
Id <- Id[1:en]
list(x=X, y=Y, id=Id)
} else list(x=x, y=y, id=id)
}
Hmisc/R/mChoice.s 0000644 0001762 0000144 00000021230 14405605331 013250 0 ustar ligges users mChoice <- function(..., label='',
sort.levels=c('original','alphabetic'),
add.none=FALSE, drop=TRUE, ignoreNA=TRUE)
{
sort.levels <- match.arg(sort.levels)
dotlist <- list(...)
if(label == '') label <- label(dotlist[[1]])
if(label == '') label <- as.character(sys.call())[2]
if (drop)
lev <- unique(as.character(unlist(dotlist)))
else
lev <- unique(unlist(lapply(dotlist, function(x) levels(as.factor(x)))))
if(ignoreNA) lev <- setdiff(lev, NA)
if(sort.levels=='alphabetic') lev <- sort(lev)
lev <- lev[trimws(lev) != '']
dotlist <- lapply(dotlist, FUN=match, table=lev) #, nomatch=0)
g <- function(...) {
set <- c(...)
set <- set[!is.na(set)]
if(! length(set)) return(NA) # was return('')
paste(sort(unique(set)), collapse=';')
}
Y <- do.call(mapply,
c(list(FUN=g, SIMPLIFY=TRUE, USE.NAMES=FALSE, MoreArgs=NULL),
dotlist))
if(add.none && any(Y=='') && 'none' %nin% lev) {
lev <- c(lev, 'none')
Y[Y==''] <- as.character(length(lev))
}
if(add.none && any(is.na(Y)) && 'none' %nin% lev) {
lev <- c(lev, 'none')
Y[is.na(Y)] <- as.character(length(lev))
}
structure(Y, label=label, levels=lev, class=c('mChoice','labelled'))
}
Math.mChoice <- function(x, ...) {
stop(.Generic, " not meaningful for mChoice")
}
Summary.mChoice <- function(..., na.rm) {
.NotYetImplemented()
}
Ops.mChoice <- function(e1, e2)
{
ok <- switch(.Generic, "=="=, "!="=TRUE, FALSE)
if(!ok) {
warning(.Generic, " not meaningful for mChoice")
return(rep.int(NA, max(length(e1), if(!missing(e2))length(e2))))
}
nas <- is.na(e1) | is.na(e2)
if (nchar(.Method[1])) {
l1 <- levels(e1)
e1 <- l1[e1]
}
if (nchar(.Method[2])) {
l2 <- levels(e2)
e2 <- l2[e2]
}
if (all(nchar(.Method)) && (length(l1) != length(l2) ||
!all(sort.int(l2) == sort.int(l1))))
stop("level sets of factors are different")
value <- NextMethod(.Generic)
value[nas] <- NA
value
}
format.mChoice <- function(x, minlength=NULL, sep=";", ...)
{
lev <- attr(x, 'levels')
if(length(minlength)) lev <- abbreviate(lev, minlength)
w <- strsplit(x, ';')
sapply(w, function(x, lev, sep)
paste(lev[as.numeric(x)], collapse=sep), lev=lev, sep=sep)
}
'[.mChoice' <- function(x, ..., drop=FALSE) {
if(drop) stop('drop=TRUE not implemented')
atr <- attributes(x)
atr$names <- NULL
x <- NextMethod('[')
consolidate(attributes(x)) <- atr
x
}
print.mChoice <- function(x, quote=FALSE, max.levels=NULL, width = getOption("width"),
...) {
if (length(x) <= 0)
cat("mChoice", "(0)\n", sep = "")
else {
xx <- x
class(xx) <- NULL
levels(xx) <- NULL
xx[] <- as.character(x)
print(xx, quote=quote, ...)
}
maxl <- if (is.null(max.levels)){
TRUE
}else max.levels
if (maxl) {
n <- length(lev <- encodeString(levels(x),
quote = ifelse(quote, "\"", "")))
colsep <- " "
T0 <- "Levels: "
if(is.logical(maxl))
maxl <- {
width <- width - (nchar(T0, "w") + 3 + 1 + 3)
lenl <- cumsum(nchar(lev, "w") + nchar(colsep, "w"))
if(n <= 1 || lenl[n] <= width)
n
else max(1, which(lenl > width)[1] - 1)
}
drop <- n > maxl
cat(if(drop) paste(format(n), ""), T0,
paste(if(drop) {c(lev[1:max(1, maxl - 1)], "...", if (maxl > 1) lev[n])
}else lev, collapse = colsep), "\n", sep = "")
}
invisible(x)
}
as.character.mChoice <- function(x, ...) {
lev <- levels(x)
sapply(strsplit(x=x, split=';'),
function(z) paste(lev[as.integer(z)], collapse=';'))
}
as.double.mChoice <- function(x, drop=FALSE, ...) {
lev <- attr(x,'levels')
X <- matrix(0, nrow=length(x), ncol=length(lev),
dimnames=list(names(x), lev))
unused <- numeric(0)
for(i in 1:length(lev)) {
xi <- 1*inmChoice(x, i)
if(sum(xi)==0) unused <- c(unused, i)
X[,i] <- xi
}
if(drop && length(unused)) X <- X[, -unused, drop=FALSE]
X
}
nmChoice <- function(object) {
y <- gsub('[^;]', '', object)
nchoices <- nchar(y) + 1
nchoices[object == ''] <- 0
nchoices
}
summary.mChoice <- function(object, ncombos=5, minlength=NULL,
drop=TRUE, short=FALSE, ...) {
levels <- NULL
if(short) {
levels <- attr(object, 'levels')
attr(object, 'levels') <- paste0('(', 1 : length(levels), ')')
}
nunique <- length(unique(object))
y <- gsub('[^;]', '', object)
nchoices <- nchar(y) + 1
nchoices[object == ''] <- 0
nchoices <- table(nchoices, dnn=NULL)
X <- as.numeric(object, drop=drop)
if(length(minlength))
dimnames(X)[[2]] <- abbreviate(dimnames(X)[[2]],minlength)
crosstab <- crossprod(X)
combos <- table(format(object, minlength))
i <- order(-combos)
combos <- combos[i[1:min(ncombos,length(combos))]]
structure(list(nunique=nunique, nchoices=nchoices,
crosstab=crosstab, combos=combos,
label=label(object), levels=levels),
class='summary.mChoice')
}
print.summary.mChoice <- function(x, prlabel=TRUE, render=TRUE, ...) {
levels <- x$levels
crosstab <-format(x$crosstab)
crosstab[lower.tri(crosstab)] <- ''
s <- if(length(x$combos)==x$nunique) 'Frequencies of All Combinations' else
paste('Frequencies of Top', length(x$combos), 'Combinations')
R <- character(0)
if(prType() == 'html') {
lev <- x$levels
if(length(lev)) { # short was in effect
lev <- paste0('(', 1 : length(lev), ') ', lev)
ml <- max(nchar(lev))
sz <- if(ml > 45) round(0.825 * 85) else 85
half <- ceiling(length(lev) / 2)
left <- lev[1 : half]
rt <- lev[(half + 1) : length(lev)]
if(length(rt) < length(left)) rt <- c(rt, '')
tab <- paste0('
', left,
'
', rt,
'
')
R <- paste0('
',
paste(tab, collapse=' '), '
')
}
y <- list('', x$nchoices, crosstab, x$combos)
names(y) <- c(paste(x$nunique, 'unique combinatons'),
'Frequencies of Numbers of Choices Per Observation',
'Pairwise Frequencies (Diagonal Contains Marginal Frequencies)',
s)
R <- c(R, do.call(htmltabv, y))
return(if(render) rendHTML(R) else htmltools::HTML(R))
}
if(length(levels)) {
lev <- paste(paste0('(', 1 : length(levels), ') ', levels), collapse='; ')
cat('', strwrap(lev), '', sep='\n')
}
cat('\n', x$nunique, ' unique combinations\n\n', sep='')
if(prlabel) cat(x$label, '\n\n', sep='')
cat('Frequencies of Numbers of Choices Per Observation\n\n')
print(x$nchoices)
cat('\nPairwise Frequencies (Diagonal Contains Marginal Frequencies)\n\n')
print(crosstab, quote=FALSE)
cat('\n', s, '\n')
print(x$combos)
invisible()
}
match.mChoice <- function(x, table, nomatch = NA,
incomparables = FALSE) {
if (!is.logical(incomparables) || incomparables) {
.NotYetUsed("incomparables != FALSE")
}
lev <- attr(table, 'levels')
if(is.factor(x) || is.character(x)) {
x <- match(as.character(x), lev, nomatch=0)
}
return(.Call("do_mchoice_match", as.integer(x), table, as.integer(nomatch)))
}
# inmChoice <- function(x, values) {
# match.mChoice(values, x, nomatch=0) > 0
# }
inmChoice <- function(x, values, condition=c('any', 'all')) {
condition <- match.arg(condition)
lev <- attr(x, 'levels')
if(is.character(values)) {
v <- match(values, lev)
if(any(is.na(v))) stop(paste('values not in levels:',
paste(values[is.na(v)],collapse=';')))
values <- v
}
x <- paste(';', unclass(x), ';', sep='')
values <- paste(';', values, ';', sep='')
res <- rep(condition != 'any', length(x))
for(j in 1:length(values)) {
i <- grep(values[j], x)
if(length(i)) {
if(condition == 'any') res[i] <- TRUE
else
res[-i] <- FALSE
} else if(condition == 'all') res[] <- FALSE
}
res
}
inmChoicelike <- function(x, values, condition=c('any', 'all'),
ignore.case=FALSE, fixed=FALSE) {
condition <- match.arg(condition)
if(! is.character(values)) stop('values must be a character vector')
x <- as.character(x)
res <- rep(condition != 'any', length(x))
for(j in 1 : length(values)) {
i <- grep(values[j], x, ignore.case=ignore.case, fixed=fixed)
if(length(i)) {
if(condition == 'any') res[i] <- TRUE
else
res[-i] <- FALSE
} else if(condition == 'all') res[] <- FALSE
}
res
}
is.mChoice <- function(x) inherits(x, 'mChoice')
Hmisc/R/summaryS.s 0000644 0001762 0000144 00000075276 14400371203 013535 0 ustar ligges users summaryS <- function(formula, fun=NULL,
data=NULL, subset=NULL, na.action=na.retain,
continuous=10,
...) {
formula <- Formula::Formula(formula)
Y <- if(length(subset))
model.frame(formula, data=data, subset=subset, na.action=na.action)
else
model.frame(formula, data=data, na.action=na.action)
X <- Formula::model.part(formula, data=Y, rhs=1)
Y <- Formula::model.part(formula, data=Y, lhs=1)
nY <- NCOL(Y)
nX <- NCOL(X)
namY <- names(Y)
namX <- names(X)
if(nX == 0) X <- data.frame(x=rep(1, NROW(Y)))
ux <- unique(X)
Z <- NULL
n <- nrow(X)
g <- function(y) {
y <- y[! is.na(y)]
if(is.character(y)) c(NA, NA)
else if(is.factor(y)) c(1, length(levels(y)))
else if(length(unique(y)) < continuous) range(y)
else if(! is.matrix(y)) quantile(y, c(0.01, 0.99))
}
ylim <- lapply(Y, g)
w <- reshape(cbind(X, Y), direction='long', v.names='y',
varying=namY, times=namY, timevar='yvar')
if(inherits(Y[[1]], 'Surv')) {
at <- attributes(Y[[1]])
at <- at[setdiff(names(at), c('dim', 'dimnames'))]
attributes(w$y) <- c(attributes(w$y), at)
}
w$yvar <- factor(w$yvar, namY)
funlabel <- NULL
if(length(fun)) {
by <- c('yvar', if(length(namX)) namX else 'x')
y <- w$y
w <- summarize(y, w[by], fun, type='matrix', keepcolnames=TRUE)
funlabel <- if(is.matrix(w$y)) colnames(w$y)[1]
}
gg <- function(x) if(is.character(x) || is.factor(x))
'categorical' else 'numeric'
## For some reason sapply is doubling names e.g. sbp.sbp
sapply2 <- function(data, ...) {
s <- sapply(data, ...)
names(s) <- names(data)
s
}
xlabels <- sapply2(X, label)
xlabels <- ifelse(xlabels == '', names(xlabels), xlabels)
ylabels <- sapply2(Y, label)
ylabels <- ifelse(ylabels == '', names(ylabels), ylabels)
for(n in names(w)) # takes care of R change stringsAsFactors=FALSE
if(is.character(w[[n]])) w[[n]] <- factor(w[[n]])
structure(w, class=c('summaryS', 'data.frame'),
formula=formula, fun=fun,
xnames=names(X), xlabels=xlabels, xunits=sapply2(X, units),
xtype=sapply(X, gg),
ynames=namY, ylabels=ylabels, yunits=sapply2(Y, units),
ylim=ylim, funlabel=funlabel)
}
plot.summaryS <-
function(x, formula=NULL, groups=NULL, panel=NULL,
paneldoesgroups=FALSE, datadensity=NULL, ylab='',
funlabel=NULL, textonly='n', textplot=NULL, digits=3, custom=NULL,
xlim=NULL, ylim=NULL, cex.strip=1, cex.values=0.5, pch.stats=NULL,
key=list(columns=length(groupslevels),
x=.75, y=-.04, cex=.9,
col=lattice::trellis.par.get('superpose.symbol')$col,
corner=c(0,1)),
outerlabels=TRUE, autoarrange=TRUE, scat1d.opts=NULL, ...)
{
sRequire('lattice')
sRequire('latticeExtra')
xtype <- attr(x, 'xtype')
nn <- sum(xtype == 'numeric')
if(nn > 1) stop('does not handle more than one numeric continuous x')
X <- x
at <- attributes(x)
Form <- at$formula
nX <- at$nX
nY <- at$nY
ylabels <- at$ylabels
yunits <- at$yunits
ylims <- at$ylim
xnames <- at$xnames
xlabels <- at$xlabels
xunits <- at$xunits
fun <- at$fun
funlabel <- if(length(at$funlabel)) at$funlabel else funlabel
Panel <- panel
ptype <- if(length(fun)) { # used to always be 'dot'
if(length(Panel)) 'xy.special' else 'dot'
} else 'xy'
if(ptype %in% c('xy', 'xy.special') && ! any(xtype == 'numeric'))
stop('must have a numeric x variable to make x-y plot')
groupslevels <- if(length(groups)) levels(x[[groups]])
condvar <- xnames[xtype == 'categorical']
## Reorder condvar in descending order of number of levels
numu <- function(x)
if(is.factor(x)) length(levels(x))
else
length(unique(x[! is.na(x)]))
if(autoarrange && length(condvar) > 1) {
nlev <- sapply(X[condvar], numu)
condvar <- condvar[order(nlev)]
}
form <- if(length(formula)) formula
else {
## Non-groups conditioning variables
ngcond <- setdiff(condvar, groups)
## Collapsed non-group conditioning variables
ccv <- paste('|', paste(c(ngcond, 'yvar'), collapse=' * '))
## Collapsed non-group cond var after the first
ccv1 <- if(length(ngcond) > 1)
paste('|', paste(c(ngcond[-1], 'yvar'), collapse=' * '))
f <- if(ptype %in% c('xy', 'xy.special'))
paste('y ~', xnames[xtype == 'numeric'], ccv, sep='')
else paste(ngcond[1], '~ y', ccv1, sep='')
as.formula(f)
}
pst <- list(cex=cex.strip)
yvarlev <- NULL
for(v in levels(X$yvar)) {
un <- yunits[v]
l <- if(ylabels[v] == v && un == '') v else
labelPlotmath(ylabels[v], un)
yvarlev <- c(yvarlev, l)
}
strip <- function(which.given, which.panel, var.name, factor.levels, ...) {
current.var <- var.name[which.given]
levs <- if(current.var == 'yvar') yvarlev else factor.levels
lattice::strip.default(which.given, which.panel, var.name,
factor.levels=levs, ...)
}
ylev <- levels(X$yvar)
## lims <- if(length(xlim)) xlim else ylims[ylev]
lims <- ylims[ylev]
## lims needs to be repeated according to layout
vars <- all.vars(form)
cond <- vars[- (1 : 2)]
if(! all(cond %in% 'yvar') && cond[1] != 'yvar') {
ngny <- setdiff(cond, 'yvar')
nr <- length(unique(do.call('paste', X[ngny])))
lims <- rep(lims, each=nr)
}
if(length(ylim)) lims <- ylim
d <- if(ptype == 'xy') {
pan <- if(! length(datadensity)) function(...) {}
else
function(x, y, subscripts, groups=NULL, ...) {
gp <- length(groups)
plot.line <-
lattice::trellis.par.get(if(gp) "superpose.line" else "plot.line")
col <- plot.line$col
gr <- if(gp) groups[subscripts] else factor(rep('', length(x)))
ypos <- unit(1, 'npc')
for(i in 1:length(levels(gr))) {
j <- which(gr == levels(gr)[i])
ypos <- ypos -
if(length(scat1d.opts) && 'nhistSpike' %in% names(scat1d.opts))
unit(2, 'mm') else unit(1, 'mm')
do.call('scat1d', c(list(x=x[j], y=ypos, col=col[i], grid=TRUE),
scat1d.opts))
}
}
scal <- list(y=list(relation='free', limits=lims, rot=0))
if(length(xlim)) scal$x <- list(limits=xlim)
xlab <- labelPlotmath(xlabels[xtype == 'numeric'],
xunits [xtype == 'numeric'])
if(! length(groups)) {
if(! length(Panel)) Panel <- lattice::panel.xyplot
lattice::xyplot(form, data=X,
panel=function(...) {Panel(...); pan(...)},
xlab=xlab, ylab=ylab, scales=scal, strip=strip,
par.strip.text=pst, ...)
} else {
panel.groups <- if(paneldoesgroups) NULL else
if(length(Panel)) Panel else lattice::panel.xyplot
if(! paneldoesgroups) Panel <- lattice::panel.superpose
g <- if(length(panel.groups))
"lattice::xyplot(form, groups=%s, data=X, scales=scal, panel=function(...) {if(length(Panel)) Panel(..., scat1d.opts=scat1d.opts); pan(...)}, panel.groups=panel.groups, auto.key=key, xlab=xlab, ylab=ylab, strip=strip, par.strip.text=pst, ...)"
else
"lattice::xyplot(form, groups=%s, data=X, scales=scal, panel=function(...) {if(length(Panel)) Panel(..., scat1d.opts=scat1d.opts); pan(...)}, auto.key=key, xlab=xlab, ylab=ylab, strip=strip, par.strip.text=pst, ...)"
eval(parse(text=sprintf(g, groups)))
}
} else { # Dot chart or xy.special
## If > 1 calculated statistics, y is a matrix.
## Save first column as y and save remaining columns that are not
## text only as yother
y <- X$y
yother <- NULL
yText <- NULL
if(is.matrix(y) && length(c(textonly, textplot))) {
ptext <- colnames(y) %in% c(textonly, textplot)
if(any(ptext)) yText <- y[, ptext, drop=FALSE]
ponly <- colnames(y) %in% textonly
if(any(ponly)) y <- y[, ! ponly]
}
if(is.matrix(y) && ncol(y) > 1) {
yother <- y[, -1, drop=FALSE]
X$y <- y[, 1]
}
# ylev <- levels(X$yvar)
# lims <- if(length(xlim)) xlim else ylims[ylev]
# ## lims needs to be repeated according to layout
# vars <- all.vars(form)
# cond <- vars[- (1 : 2)]
# if(! all(cond %in% 'yvar') && cond[1] != 'yvar') {
# ngny <- setdiff(cond, 'yvar')
# nr <- length(unique(do.call('paste', X[ngny])))
# lims <- rep(lims, each=nr)
# }
# if(length(ylim)) lims <- ylim
scal <- list(x=list(relation='free', limits=xlim)) # limits=lims))
if(ptype == 'xy.special') names(scal) <- 'y'
if(ptype == 'dot') {
pan <- function(x, y, subscripts, groups=NULL, yother=NULL,...) {
gp <- length(groups)
dot.symbol <-
lattice::trellis.par.get(if(gp)'superpose.symbol' else 'dot.symbol')
plot.line <-
lattice::trellis.par.get(if(gp) "superpose.line" else "plot.line")
pch = dot.symbol$pch
col = dot.symbol$col
cex = dot.symbol$cex
font = dot.symbol$font
segmnts <- function (x0, y0, x1, y1, ...)
grid::grid.segments(x0, y0, x1, y1, default.units = "native",
gp = grid::gpar(...))
if(length(yother)) {
snames <- colnames(yother)
nc <- ncol(yother)
yoth <- yother[subscripts,, drop=FALSE]
gr <- if(gp) groups[subscripts] else factor(rep('', length(x)))
if(length(yText)) {
yText <- yText[subscripts,, drop=FALSE]
if(length(custom)) {
k <- custom(yText)
pasted <- k$result
llong <- unit(1, 'strwidth', k$longest)
} else {
pasted <- rep('', length(y))
for(i in 1 : ncol(yText)) {
if(i > 1) pasted <- paste(pasted, ' ', sep='')
pasted <- paste(pasted, colnames(yText)[i], '=',
format(round(yText[, i], digits=digits)),
sep='')
}
llong <- unit(1, 'strwidth',
paste(' ', pasted[which.max(nchar(pasted))],
sep=''))
}
xpos <- unit(1, 'npc') - unit(1, 'mm') - length(levels(gr)) * llong
}
for(i in 1:length(levels(gr))) {
j <- which(gr == levels(gr)[i])
if(nc > 1)
segmnts(yoth[j, 1], y[j], yoth[j, nc], y[j],
lwd=2 * plot.line$lwd[1],
lty=plot.line$lty[i], col=plot.line$col[i])
if(nc == 4)
segmnts(yoth[j ,2], y[j], yoth[j ,3], y[j],
lwd=2*plot.line$lwd[1],
lty=plot.line$lty[i], col=plot.line$col[i])
for(k in 1 : nc) {
if(length(pch.stats)) {
p <- pch.stats[snames[k]]
if(!is.na(p))
lattice::lpoints(yoth[j, k], y[j], pch=p, cex=cex, col=col[i],
font=font)
}
}
## Show selected statistics just under dot lines
if(length(yText)) {
xpos <- xpos + llong
grid::grid.text(pasted[j], xpos,
unit(y[j], 'native') - unit(1.75, 'mm'), just='right',
gp=grid::gpar(cex=cex.values, col=col[i]))
}
}
if(gp) lattice::panel.superpose(x, y, groups=as.numeric(groups),
subscripts=subscripts,
pch=pch, col=col, cex=cex, font=font, ...)
else
lattice::panel.dotplot(x, y, subscripts=subscripts,
pch=pch, col=col, cex=cex, font=font, ...)
}
else {
if(gp)
lattice::panel.superpose(x, y, groups=as.numeric(groups),
subscripts=subscripts,
pch=pch, col=col, cex=cex,
font=font, ...)
else
lattice::panel.dotplot(x, y, subscripts=subscripts,
pch=pch, col=col, cex=cex, font=font, ...)
}
}
d <- if(!length(groups))
lattice::dotplot(form, data=X, panel=pan, strip=strip, par.strip.text=pst,
xlab=funlabel, scale=scal, yother=yother,...)
else eval(parse(text=
sprintf("lattice::dotplot(form, groups=%s, data=X, panel=pan, strip=strip, par.strip.text=pst, auto.key=key, xlab=funlabel, scale=scal, yother=yother, ...)", groups) ))
} # end ptype 'dot'
else { # ptype 'xy.special'
xl <- labelPlotmath(xlabels[1], xunits[1])
yl <- if(ylab == '') funlabel else ylab
d <- if(!length(groups))
lattice::xyplot(form, data=X, panel=Panel, strip=strip,
par.strip.text=pst, xlab=xl, ylab=yl, scale=scal, yother=yother,
...)
else {
panel.groups <- if(paneldoesgroups) NULL else Panel
if(! paneldoesgroups) panel <- lattice::panel.superpose
g <- if(length(panel.groups))
"lattice::xyplot(form, groups=%s, data=X, panel=Panel, panel.groups=panel.groups, strip=strip, par.strip.text=pst, auto.key=key, xlab=xl, ylab=yl, scale=scal, yother=yother, ...)"
else "lattice::xyplot(form, groups=%s, data=X, panel=Panel, strip=strip, par.strip.text=pst, auto.key=key, xlab=xl, ylab=yl, scale=scal, yother=yother, ...)"
eval(parse(text=sprintf(g, groups)))
}
}
}
if(outerlabels && length(dim(d)) == 2)
d <- latticeExtra::useOuterStrips(d, strip=strip, strip.left=strip)
d
}
plotp.summaryS <-
function(data, formula=NULL,
groups=NULL, sfun=NULL, fitter=NULL,
showpts=! length(fitter),
funlabel=NULL, digits=5,
xlim=NULL, ylim=NULL,
shareX=TRUE, shareY=FALSE,
autoarrange=TRUE,
...)
{
xtype <- attr(data, 'xtype')
nn <- sum(xtype == 'numeric')
if(nn > 1) stop('does not handle more than one numeric continuous x')
X <- data
at <- attributes(data)
Form <- at$formula
nX <- at$nX
nY <- at$nY
ylabels <- at$ylabels
yunits <- at$yunits
ylims <- at$ylim
xnames <- at$xnames
xlabels <- at$xlabels
xunits <- at$xunits
fun <- at$fun
funlabel <- if(! length(funlabel) && length(at$funlabel))
at$funlabel else funlabel
funlabel <- htmlTranslate(funlabel, greek=TRUE)
ly <- length(ylabels)
ylab <- ylabels
for(i in 1 : length(ylab))
ylab[i] <- labelPlotmath(ylabels[i], yunits[i], html=TRUE)
aform <- function(n) as.formula(paste('~', n))
fmt <- function(x) htmlSN(x, digits=digits)
ptype <- if(length(fun)) { # used to always be 'dot'
if(length(sfun)) 'xy.special' else 'dot'
} else 'xy'
if(length(sfun)) ptype <- 'xy.special'
if(ptype %in% c('xy', 'xy.special') && ! any(xtype == 'numeric'))
stop('must have a numeric x variable to make x-y plot')
groupslevels <- if(length(groups)) levels(data[[groups]])
condvar <- xnames[xtype == 'categorical']
## Reorder condvar in descending order of number of levels
numu <- function(x)
if(is.factor(x)) length(levels(x))
else
length(unique(x[! is.na(x)]))
if(autoarrange && length(condvar) > 1) {
nlev <- sapply(X[condvar], numu)
condvar <- condvar[order(nlev)]
}
form <- if(length(formula)) formula
else {
## Non-groups conditioning variables
ngcond <- setdiff(condvar, groups)
## Collapsed non-group conditioning variables
ccv <- paste('|', paste(c(ngcond, 'yvar'), collapse=' * '))
## Collapsed non-group cond var after the first
ccv1 <- if(length(ngcond) > 1)
paste('|', paste(c(ngcond[-1], 'yvar'), collapse=' * '))
f <- if(ptype %in% c('xy', 'xy.special'))
paste('y ~', xnames[xtype == 'numeric'], ccv, sep='')
else paste(ngcond[1], '~ y', ccv1, sep='')
as.formula(f)
}
yvarlev <- NULL
for(v in levels(X$yvar)) {
un <- yunits[v]
l <- if(ylabels[v] == v && un == '') v else
labelPlotmath(ylabels[v], un, html=TRUE)
yvarlev <- c(yvarlev, l)
}
ylev <- levels(X$yvar)
lims <- ylims[ylev]
vars <- all.vars(form)
cond <- vars[- (1 : 2)]
if(! all(cond %in% 'yvar') && cond[1] != 'yvar') {
ngny <- setdiff(cond, 'yvar')
nr <- length(unique(do.call('paste', X[ngny])))
}
if(length(ylim)) lims <- ylim
gp <- length(groups)
ylev <- levels(X$yvar)
nyvar <- length(ylev)
xn <- setdiff(xnames, groups)
if(length(xn) %nin% 1:2)
stop(paste('expecting at most two variables, found these:',
paste(xn, collapse=', ')))
if(length(xn) > 1) {
strata <- aform(xn[2])
xn <- xn[1]
}
else
strata <- NULL
statnames <- if(is.matrix(X$y)) colnames(X$y)
.txt. <- paste0(xn, ': ', fmt(X[[xn]]))
if(gp) .txt. <- paste0(.txt., ' ', X[[groups]])
nstat <- length(statnames)
if(nstat == 0) .txt. <- paste0(.txt., ' ',
X$yvar, ': ', fmt(X$y))
else
for(i in 1 : nstat) { ## ?? was if(i > 1)' '
if(i == 2 && length(funlabel) && funlabel != '' && funlabel != ' ')
.txt. <- paste0(.txt., ' ', funlabel)
.txt. <- paste0(.txt., ' ', statnames[i], ': ',
fmt(X$y[, i]))
}
X$.txt. <- .txt.
xlab <- labelPlotmath(xlabels[xn], xunits[xn], html=TRUE)
gp <- length(groups)
gr <- if(gp) X[[groups]] else factor(rep('', nrow(X)))
if(ptype == 'xy') {
if(nstat > 0 && ! gp) X <- cbind(X, tracename=statnames[1])
p <- plotlyM(X, x = aform(xn), y = ~y, htext = ~.txt.,
color = if(gp) aform(groups),
multplot = ~yvar, strata = strata,
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
fitter=fitter, showpts=showpts,
...)
} else { ## end ptype xy
## Dot chart or xy.special
## If > 1 calculated statistics, y is a matrix.
## Save first column as y and save remaining columns as yother
y <- X$y
yother <- NULL
if(is.matrix(y) && ncol(y) > 1) {
yother <- y[, -1, drop=FALSE]
X$y <- y[, 1]
}
if(ptype == 'dot') {
R <- X
R$yhi <- NA
if(length(statnames) && ! gp)
R <- cbind(R, tracename=statnames[1])
if(length(yother)) {
snames <- colnames(yother)
nc <- ncol(yother)
## if(nc > 1) yother[,1] yother[,nc]
if((all(c('Lower', 'Upper') %in% snames)) && nc < 4) {
S <- R
S$y <- yother[, 'Lower']
S$yhi <- yother[, 'Upper']
if(! gp) S$tracename <- 'C.L.'
R <- rbind(R, S)
}
if(nc == 4) {
S <- R
S$y <- yother[, 2]
S$yhi <- yother[, 3]
if(! gp) S$tracename <- paste(snames[2:3], collapse=' ')
R <- rbind(R, S)
}
# for(k in 1 : nc) {
# if(length(pch.stats)) {
# p <- pch.stats[snames[k]]
# if(!is.na(p))
# R <- rbind(R, data.frame(x=X$y, y=yother[, k], yhi=NA, gr=gr,
# tracename=snames[k]))
# ## TODO: need to communicate pch=p
# }
# }
}
p <- plotlyM(R, x=aform(xn), multplot=~ yvar,
color = if(gp) aform(groups),
htext = ~ .txt.,
rotate = TRUE,
ylab = ylab,
xlim = xlim, ylim=ylim,
shareX = shareX, shareY=shareY, ...)
} # end ptype 'dot'
else { # ptype 'xy.special'
xl <- labelPlotmath(xlabels[1], xunits[1], html=TRUE)
yl <- if(! length(ylab) || ylab[1] == '') funlabel else ylab
p <- sfun(X[[xn]], X$y, groups=if(gp) gr, yother=yother, yvar=X$yvar,
maintracename=statnames[1], xlab=xl, ylab=yl,
xlim=xlim, ylim=ylim, zeroline=FALSE, ...)
}
}
p
}
mbarclPanel <- function(x, y, subscripts, groups=NULL, yother, ...) {
sRequire('lattice')
gp <- length(groups)
plot.line <-
lattice::trellis.par.get(if(gp) "superpose.line" else "plot.line")
yother <- yother[subscripts, , drop=FALSE]
se <- if('se' %in% colnames(yother)) yother[, 'se']
yother <- yother[, colnames(yother) %nin% c('n', 'se'), drop=FALSE]
if(all(c('0.375', '0.625') %in% colnames(yother))) {
## If HD median estimate is not between 0.375 and 0.625 quantiles
## take it to be the closer of the two
y375 <- yother[, '0.375']
y625 <- yother[, '0.625']
y <- pmin(y, y625)
y <- pmax(y, y375)
}
yother <- cbind(y=y, yother)
gr <- if(gp) groups[subscripts] else factor(rep('', length(x)))
lev <- levels(gr)
for(i in 1 : length(lev)) {
j <- which(gr == levels(gr)[i])
multLines(x[j], yother[j,, drop=FALSE],
col=plot.line$col[i],
lty=plot.line$lty[i],
lwd=plot.line$lwd[i],
grid=TRUE, pos=c('left', 'right')[i])
}
if(length(lev) == 2 && length(se)) {
xu <- sort(unique(x))
j1 <- gr == lev[1]
j2 <- gr == lev[2]
Y <- matrix(NA, nrow=length(xu), ncol=4,
dimnames=list(as.character(xu), c('y1', 'y2', 'se1', 'se2')))
x1 <- as.character(x)[j1]
x2 <- as.character(x)[j2]
Y[x1, 'y1' ] <- y [j1]
Y[x1, 'se1'] <- se[j1]
Y[x2, 'y2' ] <- y [j2]
Y[x2, 'se2'] <- se[j2]
ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2.
halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2)
col <- adjustcolor('black', alpha.f=0.7)
grid::grid.segments(xu, ymid - 0.5 * halfwidthci,
xu, ymid + 0.5 * halfwidthci,
default.units='native',
gp=grid::gpar(col=col, lwd=1.5))
}
}
medvPanel <-
function(x, y, subscripts, groups=NULL, violin=TRUE, quantiles=FALSE,
...) {
sRequire('lattice')
gp <- length(groups)
plot.line <-
lattice::trellis.par.get(if(gp) "superpose.line" else "plot.line")
sym <- lattice::trellis.par.get(if(gp) "superpose.symbol" else "plot.symbol")
quant <- function(y) {
probs <- c(0.05, 0.125, 0.25, 0.375)
probs <- sort(c(probs, 1. - probs))
y <- y[! is.na(y)]
if(length(y) < 3) {
if(quantiles) {
w <- c(median(y), rep(NA, 9), length(y))
names(w) <- c('Median', format(probs), 'se', 'n')
}
else w <- c(Median=median(y), se=NA, n=length(y))
return(w)
}
w <- if(quantiles) hdquantile(y, probs)
m <- hdquantile(y, 0.5, se=TRUE)
se <- as.numeric(attr(m, 'se'))
c(Median=as.numeric(m), w, se=se, n=length(y))
}
denpoly <- function(x, y, col, n=50, pos, ...) {
y <- y[! is.na(y)]
n <- length(y)
if(n < 2) return()
den <- density(y, n=n, ...)
d <- den$y
y <- den$x
## Scale density of 0-3 mm
d <- 3 * d / max(d)
d <- c(d, d[length(d)])
mm <- grid::convertUnit(unit(d, 'mm'), 'mm', typeFrom='dimension')
kol <- if(n < 5 ) adjustcolor(col, alpha.f=0.2)
else if(n < 10) adjustcolor(col, alpha.f=0.4)
else col
grid::grid.polygon(y=unit(c(y, y[1]), 'native'),
x=if(pos == 'left') unit(x, 'native') - mm
else unit(x, 'native') + mm,
gp=grid::gpar(col=FALSE, fill=kol))
}
gr <- if(gp) groups[subscripts] else factor(rep('', length(x)))
lev <- levels(gr)
W <- NULL
for(i in 1 : length(lev)) {
j <- which(gr == levels(gr)[i])
xj <- x[j]
yj <- y[j]
w <- summarize(yj, xj, quant, type='matrix', keepcolnames=TRUE)
Y <- w$yj
xu <- w$xj
lattice::lpoints(xu, Y[,'Median'], cex=sym$cex[i], pch=sym$pch[i], col=sym$col[i],
alpha=sym$alpha[i])
lattice::llines(xu, Y[,'Median'], col=plot.line$col[i], lty=plot.line$lty[i],
lwd=plot.line$lwd[i], alpha=plot.line$alpha)
col <- plot.line$col[i]
if(violin) for(xx in sort(unique(xj)))
denpoly(xx, yj[xj == xx],
col=adjustcolor(plot.line$col[i], alpha.f=0.4),
pos=c('left', 'right')[i])
if(quantiles)
multLines(xu, Y[, colnames(Y) %nin% c('se', 'n'), drop=FALSE],
col=plot.line$col[i],
lty=plot.line$lty[i],
lwd=plot.line$lwd[i],
grid=TRUE, pos=c('left', 'right')[i])
W <- rbind(W, cbind(gr=levels(gr)[i], w))
}
if(length(lev) == 2) {
x <- W$xj
xu <- sort(unique(W$xj))
j1 <- W$gr == lev[1]
j2 <- W$gr == lev[2]
Y <- matrix(NA, nrow=length(xu), ncol=4,
dimnames=list(as.character(xu), c('y1', 'y2', 'se1', 'se2')))
x1 <- as.character(x)[j1]
x2 <- as.character(x)[j2]
Y[x1, 'y1' ] <- W$yj[j1, 'Median']
Y[x1, 'se1'] <- W$yj[j1, 'se']
Y[x2, 'y2' ] <- W$yj[j2, 'Median']
Y[x2, 'se2'] <- W$yj[j2, 'se']
ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2.
halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2)
col <- adjustcolor('black', alpha.f=0.7)
grid::grid.segments(xu, ymid - 0.5 * halfwidthci,
xu, ymid + 0.5 * halfwidthci,
default.units='native',
gp=grid::gpar(col=col, lwd=1.5))
}
}
mbarclpl <- function(x, y, groups=NULL, yother, yvar=NULL,
maintracename='y', xlim=NULL, ylim=NULL,
xname='x', alphaSegments=0.45, ...) {
gp <- length(groups)
gr <- if(gp) groups else rep('', length(x))
gr <- as.factor(gr)
color <- if(gp) ~ .g.
lev <- levels(gr)
if(! length(yvar)) yvar <- rep('', length(x))
yvar <- as.factor(yvar)
se <- if('se' %in% colnames(yother)) yother[, 'se']
## prn(se, 'mbarclpl', fi='/tmp/z')
cy <- colnames(yother)
n <- if('n' %in% cy) yother[, 'n']
yother <- yother[, cy %nin% c('n', 'se'), drop=FALSE]
if(all(c('0.375', '0.625') %in% cy)) {
## If HD median estimate is not between 0.375 and 0.625 quantiles
## take it to be the closer of the two
y375 <- yother[, '0.375']
y625 <- yother[, '0.625']
y <- pmin(y, y625)
y <- pmax(y, y375)
}
fmt <- function(x) htmlSN(x, digits=5)
xdel <- 0.01 * diff(range(x, na.rm=TRUE))
xtxt <- paste0(xname, ': ', fmt(x), ' ')
if(length(n)) xtxt <- paste0(xtxt, 'n:', n, ' ')
R <- data.frame(x=x, y=y, yhi=NA, .g.=gr, .yvar.=yvar,
tracename = maintracename, connect=TRUE,
txt = paste0(xtxt, maintracename, ': ',
fmt(y)))
p <- ncol(yother)
half <- p / 2
x0 <- x
for(i in 1 : half) {
i1 <- i
i2 <- p - i + 1
tn <- paste0(colnames(yother)[i1], ' - ', colnames(yother)[i2])
x0 <- ifelse(gr == lev[1],
x - i * xdel, x + i * xdel)
txt <- paste0(xtxt, tn, ': [', fmt(yother[, i1]),
', ', fmt(yother[, i2]), ']')
r <- data.frame(x=x0, y=yother[, i1], yhi=yother[, i2],
.g.=gr, .yvar.=yvar,
tracename=tn, connect=NA, txt=txt)
R <- rbind(R, r)
}
if(length(lev) == 2 && length(se)) {
rr <- NULL
for(yv in levels(yvar)) {
k <- yvar == yv
xk <- x[k]; yk <- y[k]; sek <- se[k]; grk <- gr[k]
xu <- sort(unique(xk))
j1 <- grk == lev[1]
j2 <- grk == lev[2]
Y <- matrix(NA, nrow=length(xu), ncol=4,
dimnames=list(as.character(xu),
c('y1', 'y2', 'se1', 'se2')))
x1 <- as.character(xk)[j1]
x2 <- as.character(xk)[j2]
Y[x1, 'y1' ] <- yk [j1]
Y[x1, 'se1'] <- sek[j1]
Y[x2, 'y2' ] <- yk [j2]
Y[x2, 'se2'] <- sek[j2]
ymid <- (Y[, 'y1'] + Y[, 'y2']) / 2.
halfwidthci <- qnorm(0.975) * sqrt(Y[, 'se1']^2 + Y[, 'se2']^2)
ydel <- Y[, 'y2'] - Y[, 'y1']
txt <- paste0(xname, ': ', fmt(xu),
' \u0394: ', fmt(ydel),
' 0.95 C.I. for \u0394: [',
fmt(ydel - halfwidthci), ', ',
fmt(ydel + halfwidthci), ']')
r <- data.frame(x = xu,
y = ymid - 0.5 * halfwidthci,
yhi = ymid + 0.5 * halfwidthci,
.g. = paste0(lev[1], ' vs. ', lev[2]),
.yvar. = yv, txt=txt,
tracename='\u00BD 0.95 C.I. for \u0394', connect=FALSE)
## prn(cbind(yother, 2 * rep(halfwidthci, each=2)), fi='/tmp/z')
rr <- rbind(rr, r)
}
R <- rbind(R, rr)
}
plotlyM(R, multplot=~.yvar., color=color, htext=~txt,
xlim=xlim, ylim=ylim, alphaSegments=alphaSegments, ...)
}
medvpl <-
function(x, y, groups=NULL, yvar=NULL, maintracename='y',
xlim=NULL, ylim=NULL, xlab=xname, ylab=NULL, xname='x',
zeroline=FALSE, yother=NULL, alphaSegments=0.45,
dhistboxp.opts=NULL, ...) {
gp <- length(groups)
gr <- if(gp) groups else factor(rep('', length(x)))
lev <- levels(gr)
yvarpres <- length(yvar)
if(! length(yvar)) yvar <- rep('', length(x))
if(! length(ylab)) ylab <- structure(unique(yvar), names=yvar)
if(! length(names(ylab))) stop('ylab must have names')
fmt <- function(x) htmlSN(x, digits=5)
R <- NULL
for(yv in unique(yvar)) {
k <- yvar == yv
xk <- x[k]
yk <- y[k]
gk <- gr[k]
r <- do.call('dhistboxp',
c(list(yk, group=gk, strata=xk, xlab=ylab[yv]),
dhistboxp.opts))
r$strata <- NULL
ry <- r$y
r$y <- r$x
r$x <- ry
ryhi <- r$yhi
r$yhi <- r$xhi
r$xhi <- ryhi
r$yvar <- yv
R <- rbind(R, r)
xku <- unique(xk)
if(length(lev) == 2) {
for(xa in xku) {
Y <- matrix(NA, nrow=length(xku), ncol=4,
dimnames=list(as.character(xku), c('y1','y2','se1','se2')))
for(ga in lev) {
j <- xk == xa & gk == ga
ykj <- yk[j]
ykj <- ykj[! is.na(ykj)]
if(length(ykj) < 3) {
m <- median(ykj)
se <- NA
}
else {
m <- hdquantile(ykj, probs=0.5, se=TRUE)
se <- attr(m, 'se')
}
med <- as.vector(m)
if(ga == lev[1]) {
med1 <- med
se1 <- se
} else {
med2 <- med
se2 <- se
}
}
ydel <- med2 - med1
ymid <- (med1 + med2) / 2.
halfwidthci <- qnorm(0.975) * sqrt(se1 ^ 2 + se2 ^ 2)
txt <- paste0(xname, ': ', fmt(xa),
' \u0394: ', fmt(ydel),
' 0.95 C.I. for \u0394: [',
fmt(ydel - halfwidthci), ', ',
fmt(ydel + halfwidthci), ']')
R <- rbind(R,
data.frame(x =xa, y =ymid - 0.5 * halfwidthci,
xhi=NA, yhi=ymid + 0.5 * halfwidthci,
group=paste0(lev[1], ' vs. ', lev[2]),
yvar = yv,
txt = txt,
type = '', connect=NA))
}
}
}
R$group <- paste(R$group, R$type)
R$type <- NULL
plotlyM(R, htext=~txt, multplot=~yvar, color=~group,
alphaSegments=alphaSegments,
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, zeroline=zeroline,
...)
}
Hmisc/R/sys.s 0000644 0001762 0000144 00000000330 12250353053 012512 0 ustar ligges users ## Improvements by Sebastian Weber 26Aug03
sys <- function(command, text=NULL, output=TRUE) {
cmd <- if(length(text)) paste(command, text) else command
system(cmd, intern=output)
}
Hmisc/R/t.test.cluster.s 0000644 0001762 0000144 00000007122 13067147751 014620 0 ustar ligges users t.test.cluster <- function(y, cluster, group, conf.int=.95)
{
## See:
## Donner A, Birkett N, Buck C, Am J Epi 114:906-914, 1981.
## Donner A, Klar N, J Clin Epi 49:435-439, 1996.
## Hsieh FY, Stat in Med 8:1195-1201, 1988.
group <- as.factor(group)
cluster <- as.factor(cluster)
s <- !(is.na(y)|is.na(cluster)|is.na(group))
y <- y[s];
cluster <- cluster[s];
group <- group[s]
n <- length(y)
if(n<2)
stop("n<2")
gr <- levels(group)
if(length(gr)!=2)
stop("must have exactly two treatment groups")
n <- table(group)
nc <- tapply(cluster, group, function(x)length(unique(x)))
bar <- tapply(y, group, mean)
u <- unclass(group)
y1 <- y[u==1];
y2 <- y[u==2]
c1 <- factor(cluster[u==1]);
c2 <- factor(cluster[u==2]) #factor rids unused lev
b1 <- tapply(y1, c1, mean);
b2 <- tapply(y2, c2, mean)
m1 <- table(c1);
m2 <- table(c2)
if(any(names(m1)!=names(b1)))
stop("logic error 1")
if(any(names(m2)!=names(b2)))
stop("logic error 2")
if(any(m2 < 2))
stop(paste('The following clusters contain only one observation:',
paste(names(m2[m2 < 2]), collapse=' ')))
M1 <- mean(y1);
M2 <- mean(y2)
ssc1 <- sum(m1*((b1-M1)^2));
ssc2 <- sum(m2*((b2-M2)^2))
if(nc[1]!=length(m1))
stop("logic error 3")
if(nc[2]!=length(m2))
stop("logic error 4")
df.msc <- sum(nc)-2
msc <- (ssc1+ssc2)/df.msc
v1 <- tapply(y1,c1,var);
v2 <- tapply(y2,c2,var)
ssw1 <- sum((m1-1)*v1);
ssw2 <- sum((m2-1)*v2)
df.mse <- sum(n)-sum(nc)
mse <- (ssw1+ssw2)/df.mse
na <- (sum(n)-(sum(m1^2)/n[1]+sum(m2^2)/n[2]))/(sum(nc)-1)
rho <- (msc-mse)/(msc+(na-1)*mse)
r <- max(rho, 0)
C1 <- sum(m1*(1+(m1-1)*r))/n[1]
C2 <- sum(m2*(1+(m2-1)*r))/n[2]
v <- mse*(C1/n[1]+C2/n[2])
v.unadj <- mse*(1/n[1]+1/n[2])
de <- v/v.unadj
dif <- diff(bar)
se <- sqrt(v)
zcrit <- qnorm((1+conf.int)/2)
cl <- c(dif-zcrit*se, dif+zcrit*se)
z <- dif/se
P <- 2*pnorm(-abs(z))
stats <-
matrix(NA, nrow=20, ncol=2,
dimnames=list(c("N","Clusters","Mean",
"SS among clusters within groups",
"SS within clusters within groups",
"MS among clusters within groups","d.f.",
"MS within clusters within groups","d.f.",
"Na","Intracluster correlation",
"Variance Correction Factor","Variance of effect",
"Variance without cluster adjustment","Design Effect",
"Effect (Difference in Means)",
"S.E. of Effect",paste(format(conf.int),"Confidence limits"),
"Z Statistic","2-sided P Value"), gr))
stats[1,] <- n
stats[2,] <- nc
stats[3,] <- bar
stats[4,] <- c(ssc1, ssc2)
stats[5,] <- c(ssw1, ssw2)
stats[6,1] <- msc
stats[7,1] <- df.msc
stats[8,1] <- mse
stats[9,1] <- df.mse
stats[10,1] <- na
stats[11,1] <- rho
stats[12,] <- c(C1, C2)
stats[13,1] <- v
stats[14,1] <- v.unadj
stats[15,1] <- de
stats[16,1] <- dif
stats[17,1] <- se
stats[18,] <- cl
stats[19,1] <- z
stats[20,1] <- P
attr(stats,'class') <- "t.test.cluster"
stats
}
print.t.test.cluster <- function(x, digits, ...)
{
## if(!missing(digits)).Options$digits <- digits 6Aug00
if(!missing(digits)) {
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
}
cstats <- t(apply(x,1,format))
## cstats <- format(x)
attr(cstats,'class') <- NULL
cstats[is.na(x)] <- ""
invisible(print(cstats, quote=FALSE))
}
Hmisc/R/areg.s 0000644 0001762 0000144 00000033616 14430416734 012637 0 ustar ligges users areg <- function(x, y, xtype=NULL, ytype=NULL, nk=4,
B=0, na.rm=TRUE,
tolerance=NULL, crossval=NULL) {
yname <- deparse(substitute(y))
xname <- deparse(substitute(x))
ism <- is.matrix(x)
if(!ism) {
x <- as.matrix(x)
if(!length(colnames(x))) colnames(x) <- xname
}
if(na.rm) {
omit <- is.na(x %*% rep(1,ncol(x))) | is.na(y)
nmiss <- sum(omit)
if(nmiss) {
x <- x[!omit,,drop=FALSE]
y <- y[!omit]
}
} else nmiss <- 0
d <- dim(x)
n <- d[1]; p <- d[2]
xnam <- colnames(x)
if(!length(xnam)) xnam <- paste('x', 1:p, sep='')
nuy <- length(unique(y))
if(!length(ytype)) ytype <-
if(is.factor(y) || is.character(y)) 'c' else
if(nk==0 || (nuy < 3)) 'l' else 's'
if(nk==0 && ytype=='s') ytype <- 'l'
if(ytype=='s' && nk > 0 && nuy < 3) {
warning('requested smooth transformation of y but less than 3 unique values; forced linear')
ytype <- 'l'
}
if(!length(xtype)) xtype <- rep(if(nk==0)'l' else 's', p)
xtype[nk==0 & xtype=='s'] <- 'l'
names(xtype) <- xnam
nux <- apply(x, 2, function(z) length(unique(z)))
tooTied <- xtype=='s' & nux < nk
if(any(tooTied)) {
warning(paste('the following x variables have too few unique values for the value of nk;\nlinearity forced:', paste(xnam[tooTied], collapse=',')))
xtype[nux] <- 'l'
}
fcancor <- function(X, Y) {
## colnames must exist for correct insertion of zeros for singular
## elements
colnames(X) <- paste('x', 1:ncol(X), sep='')
colnames(Y) <- paste('y', 1:ncol(Y), sep='')
## If canonical variate transformation of Y is descending in Y,
## negate all parameters
f <- cancor(X, Y)
f$r2 <- f$cor[1]^2
n <- nrow(Y); if(!length(n)) n <- length(y)
varconst <- sqrt(n-1)
## cancor returns rows only for non-singular variables
## For first canonical variate insert zeros for singular variables
xcoef <- 0. * X[1, ]
b <- f$xcoef[, 1]
xdf <- length(b)
xcoef[names(b)] <- b
xcoef <- c(intercept = -sum(xcoef * f$xcenter), xcoef) * varconst
ycoef <- 0. * Y[1, ]
b <- f$ycoef[, 1]
ydf <- length(b)
ycoef[names(b)] <- b
ycoef <- c(intercept = -sum(ycoef * f$ycenter), ycoef) * varconst
ty <- matxv(Y, ycoef)
g <- lm.fit.qr.bare(Y, ty, singzero=TRUE)
if(g$coefficients[2] < 0) {
xcoef <- -xcoef
ycoef <- -ycoef
ty <- -ty
}
f$xcoef <- xcoef
f$ycoef <- ycoef
f$ty <- ty
f$xdf <- xdf
f$ydf <- ydf
f
}
need2getinv <- FALSE
xpxi <- NULL
Y <- aregTran(y, ytype, nk, functions=TRUE)
at <- attributes(Y)
ytrans <- at$fun
yinv <- at$inversefun ## NULL if type='s'; need coef
yparms <- at$parms
xdf <- ifelse(xtype=='l', 1, nk - 1)
j <- xtype == 'c'
if(any(j)) xdf[j] <- nux[j] - 1
names(xdf) <- xnam
X <- matrix(NA, nrow=n, ncol=sum(xdf))
xparms <- list()
j <- 0
xn <- character(0)
for(i in 1 : p) {
w <- aregTran(x[,i], xtype[i], nk)
xparms[[xnam[i]]] <- attr(w, 'parms')
m <- ncol(w)
xdf[i] <- m
X[, (j + 1) : (j + m)] <- w
j <- j + m
xn <- c(xn, paste(xnam[i], 1:m, sep=''))
}
## See if rcpsline.eval could not get desired no. of knots due to ties
if(ncol(X) > sum(xdf)) X <- X[, 1 : sum(xdf), drop=FALSE]
covx <- covy <- r2opt <- r2boot <-
madopt <- madboot <- medopt <- medboot <- NULL
if(B > 0L) {
r <- 1L + sum(xdf)
barx <- rep(0., r)
vname <- c('Intercept', xn)
covx <- matrix(0, nrow=r, ncol=r, dimnames=list(vname,vname))
if(ytype != 'l') {
r <- ncol(Y) + 1L
bary <- rep(0., r)
vname <- c('Intercept', paste(yname, 1 : (r - 1), sep=''))
covy <- matrix(0, nrow=r, ncol=r, dimnames=list(vname, vname))
}
}
if(ytype == 'l') {
f <- lm.fit.qr.bare(X, Y, tolerance=tolerance, xpxi=TRUE, singzero=TRUE)
xcof <- f$coefficients
r2 <- f$rsquared
xpxi <- f$xpxi
cof <- 1
ty <- y
ydf <- 1
lp <- f$fitted.values
res <- f$residuals
mad <- mean (abs(y - lp))
med <- median(abs(y - lp))
if(B > 0) {
r2opt <- madopt <- medopt <- 0
for(j in 1:B) {
s <- sample(1:n, replace=TRUE)
if(ytype=='c' && length(unique(y[s])) < nuy)
stop('a bootstrap resample had too few unique values of y')
xch <- which(xtype == 'c')
if(length(xch)) {
xtf <- apply(x[s, xch, drop=FALSE], 2,
function(z)length(unique(z))) < nux[xch]
if(any(xtf))
stop(paste('a bootstrap resample had too few unique values of the following x variables:',
paste(xnam[xch[xtf]], collapse=','), sep='\n'))
}
g <- lm.fit.qr.bare(X[s,, drop=FALSE], Y[s], singzero=TRUE)
b <- g$coefficients
r2boot <- g$rsquared
yhat <- matxv(X, b)
r2orig <- cor(yhat, y)^2
r2opt <- r2opt + r2boot - r2orig
er <- abs(Y[s] - g$fitted.values)
madboot <- mean(er)
medboot <- median(er)
er <- abs(y - yhat)
madorig <- mean(er)
medorig <- median(er)
madopt <- madopt + madboot - madorig
barx <- barx + b
b <- as.matrix(b)
covx <- covx + b %*% t(b)
}
r2opt <- r2opt / B
r2boot <- r2 - r2opt
madopt <- madopt / B
madboot <- mad - madopt
medopt <- medopt / B
medboot <- med - medopt
barx <- as.matrix(barx / B)
covx <- (covx - B * barx %*% t(barx)) / (B - 1)
}
} else {
f <- fcancor(X, Y)
r2 <- f$r2
xcof <- f$xcoef
cof <- f$ycoef
ty <- f$ty
ydf <- f$ydf
lp <- as.vector(matxv(X, xcof))
res <- as.vector(ty - lp)
if(!length(yinv)) {
## spline transformation, need coef to get inverse y transform
yy <- seq(min(y), max(y), length=1000)
tyy <- ytrans(yy, coef=cof)
yinv <- inverseFunction(yy, tyy)
need2getinv <- TRUE
}
puy <- yinv(lp, what='sample')
if(length(y) != length(puy)) stop('program logic error')
mad <- mean (abs(y - puy))
med <- median(abs(y - puy))
if(B > 0) {
r2opt <- madopt <- medopt <- 0
for(j in 1L : B) {
s <- sample(1L : n, replace=TRUE)
if(ytype=='c' && length(unique(y[s])) < nuy)
stop('a bootstrap resample had too few unique values of y')
xch <- which(xtype == 'c')
if(length(xch)) {
xtf <- apply(x[s, xch, drop=FALSE], 2,
function(z)length(unique(z))) < nux[xch]
if(any(xtf))
stop(paste('a bootstrap resample had too few unique values of the following x variables:',
paste(xnam[xch[xtf]], collapse=','), sep='\n'))
}
f <- fcancor(X[s,, drop=FALSE], Y[s,, drop=FALSE])
bx <- f$xcoef
by <- f$ycoef
r2boot <- f$r2
xbeta <- matxv(X, bx)
ybeta <- matxv(Y, by)
r2orig <- cor(xbeta, ybeta)^2
r2opt <- r2opt + r2boot - r2orig
puyall <- if(need2getinv) {
tyyb <- ytrans(yy, coef=by) ## keeping constant knots
yinvb <- inverseFunction(yy, tyyb)
yinvb(xbeta, coef=by, what='sample')
}
else
yinv(xbeta, coef=by)
er <- abs(y[s] - puyall[s])
madboot <- mean(er)
medboot <- median(er)
er <- abs(y - puyall)
madorig <- mean(er)
medorig <- median(er)
madopt <- madopt + madboot - madorig
medopt <- medopt + medboot - medorig
barx <- barx + bx
bx <- as.matrix(bx)
covx <- covx + bx %*% t(bx)
bary <- bary + by
by <- as.matrix(by)
covy <- covy + by %*% t(by)
}
r2opt <- r2opt / B
r2boot <- r2 - r2opt
madopt <- madopt / B
madboot <- mad - madopt
medopt <- medopt / B
medboot <- med - medopt
barx <- as.matrix(barx / B)
bary <- as.matrix(bary / B)
covx <- (covx - B * barx %*% t(barx)) / (B - 1)
covy <- (covy - B * bary %*% t(bary)) / (B - 1)
}
}
j <- 0
beta <- xcof[-1]
tx <- x
xmeans <- list()
for(i in 1:p) {
m <- xdf[i]
z <- matxv(X[, (j + 1) : (j + m), drop=FALSE], beta[(j + 1) : (j + m)])
mz <- mean(z)
xmeans[[xnam[i]]] <- mz
tx[,i] <- z - mz
j <- j + m
}
r2cv <- madcv <- medcv <- NULL
if(length(crossval)) {
s <- sample(1:crossval, n, replace=TRUE)
r2cv <- madcv <- medcv <- 0
for(j in 1:crossval) {
g <- fcancor(X[s!=j,, drop=FALSE], Y[s!=j,, drop=FALSE])
bx <- g$xcoef
by <- g$ycoef
xbo <- matxv(X[s==j,, drop=FALSE], bx)
ybo <- matxv(Y[s==j,, drop=FALSE], by)
r2cv <- r2cv + cor(xbo, ybo)^2
puy <- if(need2getinv) {
tyyb <- ytrans(yy, coef=by) ## keeping constant knots
yinvb <- inverseFunction(yy, tyyb)
yinvb(xbo, coef=by, what='sample')
}
else yinv(xbo, coef=by)
er <- abs(y[s==j] - puy)
madcv<- madcv + mean(er)
medcv<- medcv + median(er)
}
r2cv <- r2cv / crossval
madcv <- madcv / crossval
medcv <- medcv / crossval
}
structure(list(y=y, x=x, ty=ty, tx=tx,
rsquared=r2, rsquaredcv=r2cv, nk=nk, xdf=xdf, ydf=ydf,
xcoefficients=xcof, ycoefficients=cof,
xparms=xparms, yparms=yparms, xmeans=xmeans,
ytrans=ytrans, yinv=yinv,
linear.predictors=lp, residuals=res,
xtype=xtype, ytype=ytype, yname=yname,
r2boot=r2boot, r2opt=r2opt,
mad=mad, madboot=madboot, madopt=madopt,
med=med, medboot=medboot, medopt=medopt,
madcv=madcv, medcv=medcv,
xcov=covx, ycov=covy, xpxi=xpxi,
n=n, m=nmiss, B=B, crossval=crossval),
class='areg')
}
aregTran <- function(z, type, nk = length(parms), parms = NULL,
functions = FALSE) {
if(type=='l' || (type=='s' && nk==0))
return(if(functions)
structure(as.matrix(z),
fun =function(x,...) x,
inversefun=function(x,...) x) else as.matrix(z))
if(type=='c') {
n <- length(z)
lp <- length(parms)
## Assume z is integer code if parms is given
w <- if(lp) z else factor(z)
x <- as.integer(w)
if(!lp) parms <- 1 : max(x, na.rm=TRUE)
z <- matrix(0, nrow=n, ncol=length(parms) - 1)
z[cbind(1 : n, x - 1)] <- 1
attr(z, 'parms') <- if(lp)parms else levels(w)
if(functions) {
attr(z, 'fun') <- function(x, parms, coef) {
if(length(parms) > length(coef)) coef <- c(0,coef)
coef[-1] <- coef[-1] + coef[1]
names(coef) <- parms
coef[x]
}
formals(attr(z, 'fun')) <-
list(x=integer(0), parms=parms, coef=numeric(0))
## what is ignored; for compatibility with inverseFunction in Misc.s
attr(z, 'inversefun') <- function(y, parms, coef, what=character(0)) {
if(length(parms) > length(coef)) coef <- c(0, coef)
isna <- is.na(y)
y[isna] <- 0
x <- parms[whichClosest(c(coef[1], coef[1] + coef[-1]), y)]
x[isna] <- NA
x
}
formals(attr(z, 'inversefun')) <-
list(y=numeric(0), parms=parms,
coef=numeric(0), what=character(0))
}
z
}
else {
z <- rcspline.eval(z, knots=parms, nk=nk, inclx=TRUE)
knots <- attr(z, 'knots')
attr(z,'parms') <- knots
if(functions) attr(z, 'fun') <- rcsplineFunction(knots)
## inverse function created later when coefficients available
z
}
}
predict.areg <- function(object, x, type=c('lp','fitted','x'),
what=c('all','sample'), ...) {
type <- match.arg(type)
what <- match.arg(what)
beta <- object$xcoefficients
xparms <- object$xparms
xtype <- object$xtype
xdf <- object$xdf
ybeta <- object$ycoefficients
yinv <- object$yinv
x <- as.matrix(x)
p <- length(xdf)
X <- matrix(NA, nrow=nrow(x), ncol=sum(xdf))
j <- 0
xnam <- names(xtype)
for(i in 1:p) {
w <- aregTran(x[,i], xtype[i], parms=xparms[[xnam[i]]])
m <- ncol(w)
X[, (j + 1) : (j + m)] <- w
j <- j + m
}
if(type == 'x') return(cbind(1, X))
xb <- matxv(X, beta)
if(type=='fitted') yinv(xb, what=what, coef=ybeta) else xb
}
print.areg <- function(x, digits=4, ...) {
xdata <- x[c('n', 'm', 'nk', 'rsquared', 'xtype', 'xdf', 'ytype', 'ydf')]
xinfo <- data.frame(type=xdata$xtype, d.f.=xdata$xdf,
row.names=names(xdata$xtype))
cat('\nN:', xdata$n, '\t', xdata$m,
' observations with NAs deleted.\n')
cat('R^2: ', round(xdata$rsquared, 3), '\tnk: ', xdata$nk,
'\tMean and Median |error|: ', format(x$mad, digits=digits), ', ',
format(x$med, digits=digits), '\n\n', sep='')
if(length(x$r2boot)) {
x1 <- format(c(x$r2opt, x$madopt, x$medopt), digits=digits)
x2 <- format(c(x$r2boot, x$madboot, x$medboot), digits=digits)
n <- c('R^2', 'Mean |error|', 'Median |error|')
d <- cbind('Bootstrap Estimates'=n, Optimism=x1, 'Optimism-corrected'=x2)
row.names(d) <- rep('', 3)
print(d, quote=FALSE, right=TRUE)
}
if(length(x$crossval)) {
x1 <- format(c(x$rsquaredcv, x$madcv, x$medcv), digits=digits)
n <- c('R^2', 'Mean |error|', 'Median |error|')
d <- cbind(n, x1)
dimnames(d) <- list(rep('',3),
c(paste(x$crossval,'-fold Cross-validation',sep=''),
'Estimate'))
cat('\n')
print(d, quote=FALSE, right=TRUE)
}
cat('\n')
print(xinfo)
cat('\ny type:', xdata$ytype, '\td.f.:', xdata$ydf, '\n\n')
invisible()
}
plot.areg <- function(x, whichx=1 : ncol(x$x), ...) {
plot(x$y, x$ty, xlab=x$yname,
ylab=paste('Transformed',x$yname))
r2 <- round(x$rsquared, 3)
title(sub=bquote(R^2==.(r2)), adj=0)
xdata <- x$x
cn <- colnames(xdata)
for(i in whichx)
plot(xdata[,i], x$tx[,i],
xlab=cn[i], ylab=paste('Transformed', cn[i]), ...)
invisible()
}
Hmisc/R/confbar.s 0000644 0001762 0000144 00000005007 12243661443 013323 0 ustar ligges users confbar <- function(at, est, se, width,
q=c(.7,.8,.9,.95,.99),
col=gray(c(0,.25,.5,.75,1)),
type=c("v","h"), labels=TRUE, ticks=FALSE,
cex=.5, side="l", lwd=5, clip=c(-1e30, 1e30),
fun=function(x)x,
qfun=function(x)
ifelse(x==.5, qnorm(x),
ifelse(x<.5,qnorm(x/2),qnorm((1+x)/2))))
{
type <- match.arg(type)
iusr <- if(type=="v")
1:2
else
3:4
if(missing(width))
width <- diff(par("usr")[iusr])*.02
if(side=="b")
side <- "l" #treat bottom as left
if(length(q)!=length(col))
stop("q and col must have same length")
q <- c(1-rev(q), .5, q)
##qe <- seq(.01, .99, length=n)
##col <- seq(.8,.01, length=n/2)
col <- c(rev(col), col)
w <- width/2
if(type=="v") {
polyg <- function(a, b, col, clip)
{
b[b < clip[1] | b > clip[2]] <- NA
polygon(a, b, col=col)
}
Lines <- function(a, b, lwd=1, clip)
{
b[b < clip[1] | b > clip[2]] <- NA
lines(a, b, lwd=lwd)
}
Text <- function(a, b, clip, ...)
{
b[b < clip[1] | b > clip[2]] <- NA
text(a, b, ...)
}
srt <- 0
} else {
polyg <- function(a, b, col, clip)
{
b[b < clip[1] | b > clip[2]] <- NA
polygon(b, a, col=col)
}
Lines <- function(a, b, lwd=1, clip)
{
b[b < clip[1] | b > clip[2]] <- NA
lines(b, a, lwd=lwd)
}
Text <- function(a, b, clip, ...)
{
b[b < clip[1] | b > clip[2]] <- NA
text(b, a, ...)
}
srt <- 45
}
for(i in 1:(length(q)-1))
polyg(c(at-w,at+w,at+w,at-w),fun(est+se*qfun(c(q[i],q[i],q[i+1],q[i+1]))),
col=col[i], clip=clip)
a <- fun(est)
z <- w*.24
Lines(c(at-w-3.5*z, at+w+3.5*z), c(a,a), lwd=lwd, clip=clip)
a <- fun(est+se*qfun(q))
do <- TRUE
if(labels || ticks)
for(i in 1:length(q)) {
b <- c(a[i], a[i])
if(ticks) {
Lines(c(at-w-z,at-w),b, clip=clip)
Lines(c(at+w+z,at+w),b, clip=clip)
}
if(labels && do && q[i]!=.5) {
if(side=="l")
Text(at-w-2*z, a[i], format(max(1-q[i],q[i])),
cex=cex, adj=1, srt=srt, clip=clip)
else
Text(at+w+2*z, a[i], format(max(1-q[i],q[i])),
cex=cex, adj=0, srt=srt, clip=clip)
}
if(q[i]!=.5)
do <- !do
}
names(a) <- format(q)
invisible(a)
}
Hmisc/R/substi.s 0000644 0001762 0000144 00000004471 12250442200 013210 0 ustar ligges users ##Substitute y when element of x is missing
##also return an attribute "substi.source"=vector of var names and NAs
substi <- function(x,y,pr=TRUE)
{
if(length(x)!=length(y))
stop("lengths of x and y are different")
nf <- is.factor(x) + is.factor(y)
if(nf==1)
stop("both x and y must be factor variables if either is")
isna <- is.na(x)
vnames <- sys.call()[c(2,3)]
if(pr) {
cat("Variables:",vnames,"\n")
cat("Used first variable:",sum(!is.na(x)),"\n")
cat("Used second variable:",sum(is.na(x) & !is.na(y)),"\n")
}
if(nf) {
levs <- unique(c(levels(x),levels(y)))
x <- as.character(x)
y <- as.character(y)
x[isna] <- y[isna]
x <- factor(x,levs)
y <- factor(y,levs)
} else
x[isna] <- y[isna]
ss <- ifelse(isna & is.na(y),NA,ifelse(isna,2,1))
attr(ss,"names") <- NULL
ss <- factor(ss,labels=vnames)
if(pr)
cat("Obs:",sum(!is.na(x))," Obs missing:",sum(is.na(x)),"\n")
attr(x,"substi.source") <- ss
attr(x,'class') <- c("substi",attr(x,'class'))
x
}
substi.source <- function(x) attr(x,"substi.source")
"[.substi" <- function(x, ...)
{
ss <- attr(x,"substi.source")
ats <- attributes(x)
ats$dimnames <- ats$dim <- ats$names <- ats$substi.source <-
attr(x,'class') <- NULL
x <- (x)[...]
attributes(x) <- ats
attr(x,"substi.source") <- ss[...]
x
}
print.substi <- function(x, ...)
{
i <- unclass(attr(x, "substi.source"))
if(!length(i)) {
print.default(x)
return(invisible())
}
if(is.factor(x))
w <- as.character(x)
else w <- format(x)
names(w) <- names(x)
w[i==2] <- paste(w[i==2], "*", sep = "")
attr(w, "label") <- attr(w, "substi.source") <- attr(w, "class") <- NULL
print.default(w, quote = FALSE)
invisible()
}
as.data.frame.substi <- function(x, row.names = NULL, optional = FALSE, ...)
{
nrows <- length(x)
if(!length(row.names)) {
## the next line is not needed for the 1993 version of data.class and is
## included for compatibility with 1992 version
if(length(row.names <- names(x)) == nrows &&
!any(duplicated(row.names))) {
}
else if(optional)
row.names <- character(nrows)
else row.names <- as.character(1:nrows)
}
value <- list(x)
if(!optional)
names(value) <- deparse(substitute(x))[[1]]
structure(value, row.names=row.names, class='data.frame')
}
Hmisc/R/Key.s 0000644 0001762 0000144 00000001005 12622407253 012431 0 ustar ligges users .tmpfunction <- function(...)
stop("This function callback has not been defined yet")
Key <- function(...) {
.tmpfunction(...)
}
environment(Key) <- new.env()
.setKey <- function(x) {
environment(Key)$.tmpfunction <- x
}
Key2 <- function(...)
.tmpfunction(...)
environment(Key2) <- new.env()
.setKey2 <- function(x)
environment(Key2)$.tmpfunction <- x
sKey <- function(...)
.tmpfunction(...)
environment(sKey) <- new.env()
.setsKey <- function(x)
environment(sKey)$.tmpfunction <- x
Hmisc/R/rcspline.plot.s 0000644 0001762 0000144 00000017502 14742436014 014510 0 ustar ligges users rcspline.plot <- function(x, y, model=c("logistic","cox","ols"), xrange,
event, nk=5, knots=NULL, show=c("xbeta", "prob"),
adj=NULL, xlab, ylab, ylim, plim=c(0,1),
plotcl=TRUE, showknots=TRUE, add=FALSE, subset,
lty=1, noprint=FALSE, m, smooth=FALSE, bass=1,
main="auto", statloc)
{
model <- match.arg(model)
show <- match.arg(show)
if(! missing(event))
model<-"cox"
if(model == "cox" & missing(event))
stop('event must be given for model="cox"')
if(show == "prob" & ! missing(adj))
stop('show="prob" cannot be used with adj')
if(show == "prob" & model != "logistic")
stop('show="prob" can only be used with model="logistic"')
if(length(x) != length(y))
stop('x and y must have the same length')
if(! missing(event) && length(event) != length(y))
stop('y and event must have the same length')
if(! missing(adj)) {
if(! is.matrix(adj)) adj <- as.matrix(adj)
if(dim(adj)[1] != length(x))
stop('x and adj must have the same length')
}
if(missing(xlab))
xlab <- label(x)
if(missing(ylab))
ylab <- label(y)
isna <- is.na(x) | is.na(y)
if(! missing(event))
isna <- isna | is.na(event)
nadj <- 0
if(! missing(adj)) {
nadj <- ncol(adj)
isna <- isna | apply(is.na(adj), 1, sum) > 0
}
if(! missing(subset))
isna <- isna | (! subset)
x <- x[! isna]
y <- y[! isna]
if(! missing(event))
event <- event[! isna]
if(! missing(adj))
adj <- adj[! isna, ]
n <- length(x)
if(n<6)
stop('fewer than 6 non-missing observations')
if(missing(xrange)) {
frac<-10./max(n, 200)
xrange<-quantile(x, c(frac, 1.-frac))
}
if(missing(knots))
xx <- rcspline.eval(x, nk=nk)
else xx <- rcspline.eval(x, knots)
knots <- attr(xx, "knots")
nk <- length(knots)
df1 <- nk-2
if(model == "logistic") {
if (!requireNamespace("rms", quietly = TRUE))
stop("The 'logistic' model requires the 'rms' package.")
b <- rms::lrm.fit(cbind(x, xx, adj), y)
beta <- b$coef
cov <- vcov(b)
model.lr <- b$stats["Model L.R."]
offset <- 1 #to skip over intercept parameter
ylabl <-
if(show == "prob")
"Probability"
else "log Odds"
sampled <- paste("Logistic Regression Model, n=", n," d=", sum(y), sep="")
}
if(model == "cox") {
sRequire('survival')
##11mar04
## added coxph.control around iter.max, eps 11mar04
lllin <- survival::coxph.fit(cbind(x, adj), cbind(y, event), strata=NULL,
offset=NULL, init=NULL,
control=survival::coxph.control(iter.max=10, eps=.0001),
method="efron", rownames=NULL)$loglik[2]
b <- survival::coxph.fit(cbind(x, xx, adj), cbind(y, event), strata=NULL,
offset=NULL, init=NULL,
control=survival::coxph.control(iter.max=10, eps=.0001),
method="efron", rownames=NULL)
beta <- b$coef
if(! noprint) {
print(beta);
print(b$loglik)
}
beta <- b$coef
cov <- vcov(b)
model.lr <- 2*(b$loglik[2]-b$loglik[1])
offset <- 0
ylabl <- "log Relative Hazard"
sampled <- paste("Cox Regression Model, n=",n," events=",sum(event),
sep="")
}
if(model == "logistic" | model == "cox") {
model.df <- nk - 1 + nadj
model.aic <- model.lr-2.*model.df
v <- solve(cov[(1 + offset) : (nk + offset - 1), (1 + offset) : (nk + offset - 1)])
assoc.chi <- beta[(1 + offset) : (nk + offset - 1)] %*% v %*%
beta[(1 + offset) : (nk + offset - 1)]
assoc.df <- nk - 1 #attr(v,"rank")
assoc.p <- 1.-pchisq(assoc.chi, nk - 1)
v <- solve(cov[(2 + offset) : (nk + offset - 1), (2 + offset) : (nk + offset - 1)])
linear.chi <- beta[(2 + offset) : (nk + offset - 1)] %*% v %*%
beta[(2 + offset) : (nk + offset - 1)]
linear.df <- nk - 2 #attr(v,"rank")
linear.p <- 1. - pchisq(linear.chi, linear.df)
if(nadj > 0) {
ntot <- offset + nk - 1 + nadj
v <- solve(cov[(nk + offset) : ntot, (nk + offset) : ntot])
adj.chi <- beta[(nk + offset) : ntot] %*% v %*%
beta[(nk + offset) : ntot]
adj.df <- ncol(v) #attr(v,"rank")
adj.p <- 1. - pchisq(adj.chi, adj.df)
} else {
adj.chi <- 0
adj.p <- 0
}
}
## Evaluate xbeta for expanded x at desired range
xe <- seq(xrange[1], xrange[2], length=600)
if(model == "cox")
xx <- rcspline.eval(xe, knots, inclx=TRUE)
else
xx<- cbind(rep(1, length(xe)), rcspline.eval(xe, knots, inclx=TRUE))
xbeta <- xx %*% beta[1 : (nk - 1 + offset)]
var <- drop(((xx %*% cov[1 : (nk - 1 + offset), 1 : (nk - 1 + offset)])*xx) %*%
rep(1, ncol(xx)))
lower <- xbeta - 1.96*sqrt(var)
upper <- xbeta + 1.96*sqrt(var)
if(show == "prob") {
xbeta <- 1./(1. + exp(-xbeta))
lower <- 1./(1. + exp(-lower))
upper <- 1./(1. + exp(-upper))
}
xlim <- range(pretty(xe))
if(missing(ylim))
ylim <- range(pretty(c(xbeta, if(plotcl) lower, if(plotcl) upper)))
if(main == "auto") {
if(show == "xbeta")
main <- "Estimated Spline Transformation"
else main <- "Spline Estimate of Prob{Y=1}"
}
if(! interactive() & missing(statloc))
statloc<-"ll"
if(! add) {
oldmar<-par("mar")
if(! missing(statloc) && statloc[1] == "ll")
oldmar[1]<- 11
oldpar <- par(err= - 1, mar=oldmar)
plot(xe, xbeta, type="n", main=main, xlab=xlab, ylab=ylabl,
xlim=xlim, ylim=ylim)
lines(xe, xbeta, lty=lty)
ltext<-function(z, line, label, cex=.8, adj=0)
{
zz<-z
zz$y<-z$y-(line - 1)*1.2*cex*par("csi")*(par("usr")[4]-par("usr")[3])/
(par("fin")[2]) #was 1.85
text(zz, label, cex=cex, adj=adj)
}
sl<-0
if(missing(statloc)) {
cat("Click left mouse button at upper left corner for statistics\n")
z<-locator(1)
statloc<-"l"
} else if(statloc[1] != "none") {
if(statloc[1] == "ll") {
z<-list(x=par("usr")[1], y=par("usr")[3])
sl<-3
} else z<-list(x=statloc[1], y=statloc[2])
}
if(statloc[1] != "none" & (model == "logistic" | model == "cox")) {
rnd <- function(x, r=2) as.single(round(x, r))
ltext(z, 1 + sl, sampled)
ltext(z, 2 + sl, " Statistic X2 df")
chistats<-format(as.single(round(c(model.lr, model.aic,
assoc.chi, linear.chi, adj.chi), 2)))
pvals<-format(as.single(round(c(assoc.p, linear.p, adj.p), 4)))
ltext(z, 3 + sl, paste("Model L.R. ", chistats[1], model.df,
" AIC=", chistats[2]))
ltext(z, 4 + sl, paste("Association Wald ", chistats[3], assoc.df,
" p= ", pvals[1]))
ltext(z, 5 + sl, paste("Linearity Wald ", chistats[4], linear.df,
" p= ", pvals[2]))
if(nadj > 0)ltext(z, 6 + sl, paste("Adjustment Wald " , chistats[5],
adj.df, " p= ", pvals[3]))}
} else lines(xe, xbeta, lty=lty)
if(plotcl) {
prn(cbind(xe, lower, upper))
lines(xe, lower, lty=2)
lines(xe, upper, lty=2)
}
if(showknots) {
bot.arrow <- par("usr")[3]
top.arrow <- bot.arrow + .05 * (par("usr")[4]-par("usr")[3])
for(i in 1 : nk)
arrows(knots[i], top.arrow, knots[i], bot.arrow, length=.1)
}
if(model == "logistic" & nadj == 0) {
if(smooth) {
z<-supsmu(x, y, bass=bass)
if(show == "xbeta") z$y <- logb(z$y/(1.-z$y))
points(z, cex=.4)
}
if(! missing(m)) {
z<-groupn(x, y, m=m)
if(show == "xbeta") z$y <- logb(z$y/(1.-z$y))
points(z, pch=2, mkh=.05)}
}
if(! add)
par(oldpar)
invisible(list(knots=knots, x=xe, xbeta=xbeta, lower=lower, upper=upper))
}
Hmisc/R/zoom.s 0000644 0001762 0000144 00000000732 13067150346 012675 0 ustar ligges users ## Function to use the mouse to zoom in on plots.
## Author: Bill Dunlap
zoom <- function(fun, ...) {
on.exit(par(oldpar))
oldpar <- par('err')
par(err = -1)
fun(...)
while(TRUE) {
cat("Click mouse over corners of zoom area: ")
p <- locator(n = 2)
if(length(p$x) != 2) break
xlim <- range(p$x)
ylim <- range(p$y)
cat("xlim=", xlim, "ylim=", ylim, "\n")
fun(..., xlim=xlim, ylim=ylim)
}
cat("Bye! \n")
}
Hmisc/R/ftu.s 0000644 0001762 0000144 00000003320 12243661443 012503 0 ustar ligges users ##Dan Heitjan dheitjan@biostats.hmc.psu.edu
ftupwr <- function(p1,p2,bign,r,alpha)
{
## Compute the power of a two-sided level alpha test of the
## hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there are
## bign observations, bign/(1+r) in group 1 and r*bign/(1+r) in
## group 2. This is based on the two-tailed test version of
## formula (6) in Fleiss, Tytun and Ury (1980 Bcs 36, 343--346).
## This may be used for del not too small (del>=0.1) and r not
## too big or small (0.33<=r<=3).
## Daniel F. Heitjan, 30 April 1991
mstar <- bign/(r+1)
del <- abs(p2-p1)
rp1 <- r+1
zalp <- qnorm(1-alpha/2)
pbar <- (p1+r*p2)/(1+r)
qbar <- 1-pbar
num <- (r*del^2*mstar-rp1*del)^0.5-zalp*(rp1*pbar*qbar)^0.5
den <- (r*p1*(1-p1)+p2*(1-p2))^0.5
zbet <- num/den
pnorm(zbet)
}
ftuss <- function(p1,p2,r,alpha,beta)
{
## Compute the approximate sample size needed to have power 1-beta
## for detecting significance in a two-tailed level alpha test of
## the hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there
## are to be m in group 1 and rm in group 2. The calculation is
## based on equations (3) and (4) of Fleiss, Tytun and Ury (1980
## Bcs 36, 343--346). This is accurate to within 1% for
## moderately large values of del(p2-p1) (del>=0.1) and sample
## sizes that are not too disproportionate (0.5<=r<=2).
## Daniel F. Heitjan, 30 April 1991
zalp <- qnorm(1-alpha/2)
zbet <- qnorm(1-beta)
rp1 <- (r+1)
pbar <- (p1+r*p2)/rp1
qbar <- 1-pbar
q1 <- 1-p1
q2 <- 1-p2
del <- abs(p2-p1)
num <- (zalp*(rp1*pbar*qbar)^0.5+zbet*(r*p1*q1+p2*q2)^0.5)^2
den <- r*del^2
mp <- num/den
m <- 0.25*mp*(1+(1+2*rp1/(r*mp*del))^0.5)^2
list(n1=floor(m+1),n2=floor(m*r+1))
}
Hmisc/R/data.frame.labelled.s 0000644 0001762 0000144 00000000444 12243661443 015456 0 ustar ligges users ## For every object in a data frame that has a 'label' attribute, make it
## class 'labelled'
data.frame.labelled <- function(object)
{
for(n in names(object))
if(length(attr(object[[n]],'label')))
attr(object[[n]],'class') <- c('labelled',attr(object[[n]],'class'))
object
}
Hmisc/R/labcurve.s 0000644 0001762 0000144 00000076201 14401147543 013516 0 ustar ligges users labcurve <- function(curves, labels=names(curves),
method=NULL, keys=NULL, keyloc=c('auto','none'),
type='l', step.type=c('left','right'),
xmethod=if(any(type=='s')) 'unique' else 'grid',
offset=NULL,
xlim=NULL, tilt=FALSE, window=NULL,
npts=100, cex=NULL,
adj='auto', angle.adj.auto=30,
lty=pr$lty, lwd=pr$lwd, col.=pr$col,
transparent=TRUE, arrow.factor=1,
point.inc=NULL, opts=NULL, key.opts=NULL,
empty.method=c('area','maxdim'),
numbins=25,
pl=!missing(add), add=FALSE,
ylim=NULL, xlab="", ylab="",
whichLabel=1:length(curves),
grid=FALSE, xrestrict=NULL, ...)
{
if(pl && !add) {
plot.new(); par(new=TRUE) # enables strwidth etc.
}
oxpd <- par('xpd')
par(xpd=NA)
on.exit(par(xpd=oxpd))
gfun <- ordGridFun(grid) ## see Misc.s
gun <- gfun$unit
diffu <- function(v) diff(unclass(v)) # mainly for POSIXt
## also look at difftime
mcurves <- missing(curves)
pr <- par(c('cex','col','lwd','lty'))
if(!mcurves)
{
nc <- length(curves)
type <- rep(type, length.out=nc)
lty <- rep(lty, length.out=nc)
lwd <- rep(lwd, length.out=nc)
col. <- rep(col., length.out=nc)
for(i in 1:nc)
{
z <- curves[[i]]
if(pl && !add)
{
if(i==1)
{
xlm <- range(z[[1]],na.rm=TRUE)
ylm <- range(z[[2]],na.rm=TRUE)
}
else
{
xlm <- range(xlm,z[[1]],na.rm=TRUE)
ylm <- range(ylm,z[[2]],na.rm=TRUE)
}
}
if(length(a <- z$type)) type[i] <- a
if(length(a <- z$lty)) lty[i] <- a
if(length(a <- z$lwd)) lwd[i] <- a
if(length(a <- z$col)) col.[i] <- a
}
}
## Optionally bring arguments from opts as if they were listed outside opts
## This is used when opts is passed through to a function calling labcurve
if(length(opts) && is.list(opts))
{
names.opts <- names(opts)
full.names <- c('labels','method','keys','keyloc','type','step.type',
'xmethod','offset','xlim','tilt','window','npts','cex',
'adj','angle.adj.auto','lty','lwd','col.','n.auto.keyloc',
'transparent','arrow.factor','point.inc','key.opts',
'empty.method','numbins','ylim','xlab','ylab')
i <- charmatch(names.opts, full.names, -1)
if(any(i < 1))
stop(paste('Illegal elements in opts:',
paste(names.opts[i < 1], collapse=' ')))
for(j in 1:length(opts)) assign(full.names[i[j]],opts[[j]],immediate=TRUE)
}
if(mcurves) nc <- length(labels)
else if(!is.logical(labels) && nc != length(labels))
stop('length of labels is not equal to # curves')
type <- rep(type, length.out=nc)
lty <- rep(lty, length.out=nc)
lwd <- rep(lwd, length.out=nc)
col. <- rep(col., length.out=nc)
if(pl)
{
if(mcurves) stop('curves must be given if pl=T')
if(!add)
{
if(!length(xlim)) xlim <- xlm
if(!length(ylim)) ylim <- ylm
namcur <- names(curves[[1]])
if(!is.expression(xlab) && xlab=='' && length(namcur))
xlab <- namcur[1]
if(!is.expression(ylab) && ylab=='' && length(namcur))
ylab <- namcur[2]
if(grid) stop("grid=TRUE when pl=TRUE is not yet implemented")
else
plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab,
type='n', xaxt='n')
if(inherits(xlim,'POSIXt') || inherits(xlim,'POSIXct'))
axis.POSIXct(1)
else if(inherits(xlim,'Date')) axis.Date(1)
else axis(1)
pr <- par(c('cex','col','lwd','lty'))
}
for(i in 1:nc)
{
z <- curves[[i]]
gfun$lines(z[[1]], z[[2]], type=type[i], lty=lty[i],
lwd=lwd[i], col=col.[i], xpd=FALSE)
}
}
if(length(method) && method=='none') return(invisible())
pr <- parGrid(grid)
usr <- pr$usr; uin <- pr$uin
is.keys <- length(keys) > 0
lines.keys <- length(keys)==1 && is.character(keys) && keys=='lines'
if(!length(method)) {
if(is.keys)
method <- if(is.numeric(keys) || lines.keys) 'on top'
else 'offset'
else method <- 'offset'
}
## Expand abbreviations for method - couldn't use match.arg
possible.methods <- c('offset','on top','arrow','mouse','locator')
i <- charmatch(method, possible.methods, -1)
if(i < 1)
stop(paste('method must be one of ',
paste(possible.methods,collapse=' ')))
method <- possible.methods[i]
if(!length(cex)) cex <- pr$cex
if(mcurves && method %nin% c('mouse','locator'))
stop('must specify curves unless method="mouse" or "locator"')
if(!lines.keys && is.keys && length(keys) != nc)
stop('number of keys must = number of curves')
if(method %in% c('mouse','locator')) {
if(adj=='auto') adj <- .5
xt <- yt <- numeric(nc)
for(i in 1:nc) {
if(i %in% whichLabel) {
cat('\nPosition pointer to desired center of curve label and click for',
labels[i],'\n')
lab.pos <- locator(1)
xt[i] <- lab.pos$x
yt[i] <- lab.pos$y
gfun$text(lab.pos, labels[i], cex=cex, adj=adj, col=col.[i],
...)
}
}
return(invisible(list(x=xt, y=yt, offset=0,
adj=adj, cex=cex, angle=0, col=col., lwd=lwd,
key.opts=key.opts, ...)))
}
if(is.character(keyloc)) keyloc <- match.arg(keyloc)
empty.method <- match.arg(empty.method)
if(!length(offset))
offset <- if(grid) unit(.75,"strheight","m") else
strheight('m','user', cex)*.75
if(!length(xlim)) xlim <- usr[1:2]
if(!length(ylim)) ylim <- usr[3:4]
if(nc==1) {
ci <- curves[[1]]
xx <- ci[[1]]; yy <- ci[[2]]
s <- is.finite(xx+yy)
xx <- xx[s]; yy <- yy[s]
imid <- trunc((length(xx)+1)/2)
adj <- if(is.character(adj))0.5 else adj
if(any(whichLabel==1))
gfun$text(xt <- gun(xx[imid]),
yt <- gun(yy[imid])+offset,
labels,
cex=cex, adj=adj, col=col., ...)
return(invisible(list(x=xt, y=yt, offset=offset,
adj=adj, cex=cex, col=col., lwd=lwd, angle=0,
key.opts=key.opts, ...)))
}
if(xmethod %nin% c('grid','unique'))
stop('xmethod must be "grid" or "unique"')
step.type <- match.arg(step.type)
if(is.character(adj)) {
adj.does.vary <- TRUE
adj.needs.to.vary <- TRUE
adj <- rep(.5, nc)
}
else {
adj.does.vary <- length(adj) > 1
adj.needs.to.vary <- FALSE
adj <- rep(adj, length.out=nc)
}
if(xmethod=='grid') xs <- seq(xlim[1],xlim[2],length.out=npts) else {
xs <- unlist(sapply(curves, function(z)z[[1]]))
xs <- sort(unique(xs[!is.na(xs)]))
xs <- xs[xs>=xlim[1] & xs<=xlim[2]]
}
ys <- matrix(NA, nrow=length(xs), ncol=nc)
rng <- matrix(NA, nrow=2, ncol=nc)
for(i in 1:nc) {
ci <- curves[[i]]
xx <- ci[[1]]; yy <- ci[[2]]
s <- is.finite(xx+yy)
xx <- xx[s]
y <- approx(xx, yy[s], xout=xs,
f=if(step.type=='left') 0 else 1,
method=if(type[i]=='l') "linear" else "constant")$y
y <- pmax(pmin(y,usr[4]),usr[3])
## Where one curve is not defined, consider this gap to have an ordinate
## that is far from the other curves so labels where be placed where
## the other curves haven't started or after they've ended
y[is.na(y)] <- 1e10
ys[,i] <- y
rxx <- range(xx)
if(length(xrestrict)) {
rxx[1] <- max(rxx[1],xrestrict[1])
rxx[2] <- min(rxx[2],xrestrict[2])
}
rng[,i] <- rxx
## Save real range of each x-vector so candidates for labeling
## will be where the curve really exists
}
if(method=='on top' && is.keys && is.numeric(keys)) {
## Draw periodic symbols
sym <- function(curve, pch, inc, offset, type, step.type, col.,
grid, gfun) {
x <- curve[[1]]; y <- curve[[2]]
s <- is.finite(x+y)
x <- x[s]; y <- y[s]
if(length(x) < 2)
stop("when specifying numeric keys (pch) you must have >=2 data points")
lim <- range(x)
xx <-
if(grid)
convertX(gun(seq(lim[1],lim[2],by=inc) + offset),
'native', valueOnly=TRUE)
else
seq(lim[1], lim[2], by=inc) + offset
if(length(xx)>1) xx <- xx[-1]
xx <- xx[xx<=lim[2]]
if(length(xx)==0)
warning('curve was too short to mark with a symbol.\nMay want to change point.inc or xmethod for labcurve')
else {
yy <- approx(x, y, xout=xx,
method=if(type=='l') 'linear' else 'constant',
f=if(step.type=='left') 0 else 1)$y
gfun$points(xx, yy, pch=pch, col=col.)
}
}
if(!length(point.inc)) point.inc <- diffu(xlim)/5
for(i in 1:nc)
sym(curves[[i]], keys[i], point.inc, (i-1)*point.inc/nc,
type[i], step.type, col.=col.[i], grid, gfun)
xt <- yt <- NULL
}
else {
xt <- yt <- direction <- numeric(nc)
angle <- rep(0,nc)
g <- function(x) {
## finds min(abs(x)) but keeps original sign
ax <- abs(x)
if(all(is.na(ax)))
return(NA)
w <- min(ax, na.rm=TRUE)
(x[ax==w])[1] #use first occurrence
}
for(i in 1:nc) {
yi <- ys[,i]
yi[xsrng[2,i]] <- NA
diffmat <- ys[,-i,drop=FALSE] - yi
mindiff <- apply(diffmat, 1, g)
z <- abs(mindiff)==max(abs(mindiff),na.rm=TRUE)
maxid <- min(c(1:length(mindiff))[z], na.rm=TRUE)
xt[i] <- xs[maxid]
yt[i] <- ys[maxid,i]
if(!is.na(mindiff[maxid]))
direction[i] <- 1-2*(mindiff[maxid]>0)
yto <- yt[i] + direction[i] *
(if(grid) convertY(offset,'native',valueOnly=TRUE)
else offset)
if(!is.na(yto))
if(yto >= usr[4] || yto <= usr[3])
direction[i] <- -direction[i]
## Find slope of curve i at xt[i]
if(tilt || adj.needs.to.vary) {
angle[i] <- if(type[i]=='s') 0
else {
ci <- curves[[i]]
xx <- ci[[1]]; yy <- ci[[2]]
s <- is.finite(xx+yy)
w <-
if(length(window))
window
else {
nch <-
if(lines.keys) nchar(labels[i])
else if(is.keys)
1*is.numeric(keys) +
nchar(keys[i])*is.character(keys)
else
nchar(labels[i])
w <-
if(grid)
nch*convertX(unit(.75,"strwidth","m"),
'native',valueOnly=TRUE)
else
nch*strwidth('m','user',cex)
}
yy <- approx(xx[s], yy[s], xout=c(xt[i]-w/2,xt[i]+w/2),
rule=2)$y
slope <- diff(yy)/w
180*atan(slope*uin[2]/uin[1])/pi
}
}
if(adj.needs.to.vary) {
adj[i] <-
if(type[i]=='s')
1*(direction[i]<0)
else {
if(is.na(angle[i]) || abs(angle[i])<=angle.adj.auto)
.5
else if((direction[i]<0 && slope>0) ||
(direction[i]>0 && slope<0)) 0 else 1
}
}
}
if(!tilt) angle[] <- 0
if(!lines.keys && method=='offset' && (!is.logical(labels) || labels)) {
if(is.keys) {
if(is.numeric(keys))
for(i in 1:nc)
gfun$points(xt[i], (gun(yt) + direction*offset)[i],
pch=keys[i], col=col.[i])
else if(i %in% whichLabel)
gfun$text(xt, gun(yt) + direction*offset,
keys, cex=cex,
adj=adj[1], col=col., ...)
} else {
if(tilt || adj.does.vary)
for(i in whichLabel)
gfun$text(xt[i], gun(yt[i])+direction[i]*offset,
labels[i], cex=cex, srt=angle[i],
adj=adj[i], col=col.[i],...)
else
gfun$text(xt, gun(yt)+direction*offset, labels,
cex=cex, adj=adj[1], col=col., ...)
}
}
retlist <- list(x=xt, y=yt, offset=direction*offset,
adj=adj, cex=cex, col=col., lwd=lwd, angle=if(tilt) angle,
key.opts=key.opts, ...)
}
if(method %in% c('on top','arrow') && (!is.logical(labels) || labels)) {
retlist <- list(x=xt, y=yt, offset=0,
adj=.5, cex=cex, col=col., lwd=lwd, angle=0,
key.opts=key.opts, ...)
if(method == 'on top' && !lines.keys) {
if(is.keys) {
if(is.character(keys))
gfun$text(xt, yt, keys, cex=cex, col=col., adj=.5, ...)
## numeric keys (periodic plotting symbols) already handled above
}
else
gfun$text(xt, yt, labels, cex=cex, col=col., adj=.5, ...)
}
else if(method=='arrow') {
ydelta <- if(grid) unit(1/17,'npc') else diffu(ylim)/17
xdelta <- if(grid) unit(1/26,'npc') else diffu(xlim)/26
lab.pos <- list(x=gun(xt) + xdelta*arrow.factor,
y=gun(yt) + ydelta*arrow.factor)
gfun$arrows(gun(xt)+xdelta*.6*arrow.factor,
gun(yt)+ydelta*.6*arrow.factor,
xt,yt,open=TRUE,size=.06,col=col.)
gfun$text(lab.pos, labels, cex=cex, col=col., ...)
}
}
if(is.keys && (!is.character(keyloc) || keyloc!='none')) {
## Make legend
s <- whichLabel
if(is.character(keyloc) && keyloc=='auto') {
## Find emptiest spot for drawing legend by finding
## center of largest empty rectangle large enough to hold
## this rectangle
Xs <- rep(xs, nc)
Ys <- as.vector(ys)
putKeyEmpty(Xs, Ys,
labels=if(lines.keys || is.numeric(keys))
labels[s]
else
paste(keys,' ',labels, sep='')[s],
pch=if(is.numeric(keys))
keys[s],
lty=lty[s], lwd=lwd[s], cex=cex, col=col.[s],
transparent=transparent, plot=TRUE,
key.opts=key.opts, xlim=xlim, ylim=ylim, grid=grid)
} else putKey(keyloc,
labels=if(lines.keys || is.numeric(keys))
labels[s]
else
paste(keys,' ',labels, sep='')[s],
pch=if(is.numeric(keys))
keys[s],
lty=lty[s], lwd=lwd[s], cex=cex, col=col.[s],
transparent=transparent, plot=TRUE,
key.opts=key.opts, grid=grid)
}
invisible(retlist)
}
## Version of legend for R that implements plot=FALSE, adds grid=TRUE
## Also defaults lty, lwd, pch to NULL and checks for length>0 rather
## than missing(), so it's easier to deal with non-applicable parameters
##
## rlegendg is better to use when grid is in effect. In R 2.0, you
## can't use strwidth etc. after a lattice drawing has been rendered
rlegendg <- function(x, y, legend, col=pr$col[1], lty=NULL,
lwd=NULL, pch=NULL, cex=pr$cex[1], other=NULL)
{
sRequire('lattice')
pr <- par()
if(is.list(x)) {
y <- x[[2]]
x <- x[[1]]
}
do.lines <- (length(lty) && any(lty > 0)) || length(lwd)
do.points <- length(pch)
cmd <- NULL
if(do.lines)
cmd$lines <- list(col=col, lty=lty, lwd=lwd)
if(do.points)
cmd$points<- list(col=col, pch=pch, cex=cex)
cmd$text <- list(lab=legend)
if(length(other))
cmd <- c(cmd, other)
lattice::draw.key(cmd, draw=TRUE,
vp=viewport(x=unit(x,'npc'),y=unit(y,'npc')))
invisible()
}
rlegend <- function (x, y, legend, fill, col = "black", lty=NULL, lwd=NULL,
pch=NULL, angle = NULL,
density = NULL, bty = "o", bg = par("bg"),
pt.bg = NA, cex = 1,
xjust = 0, yjust = 1, x.intersp = 1, y.intersp= 1,
adj = 0, text.width = NULL,
merge = do.lines && has.pch, trace = FALSE,
ncol = 1, horiz = FALSE, plot=TRUE, grid=FALSE,
...)
{
gfun <- ordGridFun(grid) ## see Misc.s
if (is.list(x)) {
if (!missing(y)) {
if (!missing(legend))
stop("`y' and `legend' when `x' is list (need no `y')")
legend <- y
}
y <- x$y
x <- x$x
}
else if (missing(y)) stop("missing y")
if (!is.numeric(x) || !is.numeric(y)) stop("non-numeric coordinates")
if ((nx <- length(x)) <= 0 || nx != length(y) || nx > 2)
stop("invalid coordinate lengths")
xlog <- par("xlog")
ylog <- par("ylog")
rect2 <- function(left, top, dx, dy, ...) {
r <- left + dx
if (xlog) {
left <- 10^left
r <- 10^r
}
b <- top - dy
if (ylog) {
top <- 10^top
b <- 10^b
}
gfun$rect(left, top, r, b, angle = angle, density = density,
...)
}
segments2 <- function(x1, y1, dx, dy, ...) {
x2 <- x1 + dx
if (xlog) {
x1 <- 10^x1
x2 <- 10^x2
}
y2 <- y1 + dy
if (ylog) {
y1 <- 10^y1
y2 <- 10^y2
}
gfun$segments(x1, y1, x2, y2, ...)
}
points2 <- function(x, y, ...) {
if (xlog) x <- 10^x
if (ylog) y <- 10^y
gfun$points(x, y, ...)
}
text2 <- function(x, y, ...) {
if (xlog) x <- 10^x
if (ylog) y <- 10^y
gfun$text(x, y, ...)
}
if (trace)
catn <- function(...) do.call("cat", c(lapply(list(...),
formatC), list("\n")))
pr <- parGrid(grid)
cin <- pr$cin
Cex <- (if(length(unique(cex)) > 1)
mean(cex,na.rm=TRUE)
else
cex) * pr$cex
if (!length(text.width))
text.width <- max(strwidth(legend, units = "user", cex = cex))
else if (!is.numeric(text.width) || text.width < 0)
stop("text.width must be numeric, >= 0")
xc <- Cex * xInch(cin[1], warn.log = FALSE, grid=grid)
yc <- Cex * yInch(cin[2], warn.log = FALSE, grid=grid)
xchar <- xc
yextra <- yc * (y.intersp - 1)
ymax <- max(yc, strheight(legend, units = "user", cex = cex))
ychar <- yextra + ymax
if (trace)
catn(" xchar=", xchar, "; (yextra,ychar)=", c(yextra, ychar))
if (!missing(fill))
{
xbox <- xc * 0.8
ybox <- yc * 0.5
dx.fill <- xbox
}
do.lines <- (length(lty) && any(lty > 0)) || length(lwd)
n.leg <- length(legend)
n.legpercol <-
if (horiz) {
if (ncol != 1)
warning(paste("horizontal specification overrides: Number of columns :=",
n.leg))
ncol <- n.leg
1
}
else ceiling(n.leg/ncol)
if (has.pch <- length(pch)) {
if (is.character(pch) && nchar(pch[1]) > 1) {
if (length(pch) > 1)
warning("Not using pch[2..] since pch[1] has multiple chars")
np <- nchar(pch[1])
pch <- substr(rep(pch[1], np), 1:np, 1:np)
}
if (!merge)
dx.pch <- x.intersp/2 * xchar
}
x.off <- if (merge) -0.7 else 0
## if (xlog) x <- log10(x) 25Oct13
## if (ylog) y <- log10(y)
if (nx == 2) {
x <- sort(x)
y <- sort(y)
left <- x[1]
top <- y[2]
w <- diff(x)
h <- diff(y)
w0 <- w/ncol
x <- mean(x)
y <- mean(y)
if (missing(xjust)) xjust <- 0.5
if (missing(yjust)) yjust <- 0.5
}
else {
h <- n.legpercol * ychar + yc
w0 <- text.width + (x.intersp + 1) * xchar
if (!missing(fill))
w0 <- w0 + dx.fill
if (has.pch && !merge)
w0 <- w0 + dx.pch
if (do.lines)
w0 <- w0 + (2 + x.off) * xchar
w <- ncol * w0 + 0.5 * xchar
left <- x - xjust * w
top <- y + (1 - yjust) * h
}
if (bty != "n") {
if (trace)
catn(" rect2(", left, ",", top, ", w=", w, ", h=",
h, "...)", sep = "")
if(plot)
rect2(left, top, dx = w, dy = h, col = bg) ## FEH
}
xt <- left + xchar +
(w0 * rep(0:(ncol - 1), rep(n.legpercol, ncol)))[1:n.leg]
yt <- top - (rep(1:n.legpercol, ncol)[1:n.leg] - 1) * ychar -
0.5 * yextra - ymax
if (!missing(fill)) {
fill <- rep(fill, length.out = n.leg)
if(plot)
rect2(left = xt, top = yt + ybox/2, dx = xbox, dy = ybox,
col = fill)
xt <- xt + dx.fill
}
if (has.pch || do.lines)
col <- rep(col, length.out = n.leg)
if (do.lines) {
seg.len <- 2
ok.l <-
if (!length(lty)) {
lty <- 1
TRUE
} else
lty > 0
if (!length(lwd)) lwd <- pr$lwd
lty <- rep(lty, length.out = n.leg)
lwd <- rep(lwd, length.out = n.leg)
if (trace)
catn(" segments2(", xt[ok.l] + x.off * xchar, ",",
yt[ok.l], ", dx=", seg.len * xchar, ", dy=0, ...)",
sep = "")
if(plot)
segments2(xt[ok.l] + x.off * xchar, yt[ok.l], dx = seg.len *
xchar, dy = 0, lty = lty[ok.l], lwd = lwd[ok.l],
col = col[ok.l])
xt <- xt + (seg.len + x.off) * xchar
}
if (has.pch) {
pch <- rep(pch, length.out = n.leg)
pt.bg <- rep(pt.bg, length.out = n.leg)
ok <- is.character(pch) | pch >= 0
x1 <- (if (merge)
xt - (seg.len/2) * xchar
else
xt)[ok]
y1 <- yt[ok]
if (trace)
catn(" points2(", x1, ",", y1, ", pch=", pch[ok],
"...)")
if(plot) points2(x1, y1, pch = pch[ok], col = col[ok], cex = cex,
bg = pt.bg[ok])
if (!merge) xt <- xt + dx.pch
}
xt <- xt + x.intersp * xchar
if(plot)
text2(xt, yt, labels = legend,
adj = adj,
cex = min(cex, na.rm=TRUE))
invisible(list(rect = list(w = w, h = h, left = left, top = top),
text = list(x = xt, y = yt)))
}
putKey <- function(z, labels, type=NULL,
pch=NULL, lty=NULL, lwd=NULL,
cex=par('cex'), col=rep(par('col'),nc),
transparent=TRUE, plot=TRUE, key.opts=NULL,
grid=FALSE)
{
sRequire('lattice')
nc <- length(labels)
if(!length(pch)) pch <- rep(NA, nc)
if(!length(lty)) lty <- rep(NA, nc)
if(!length(lwd)) lwd <- rep(NA, nc)
pp <- !is.na(pch)
lp <- !is.na(lty) | !is.na(lwd)
lwd <- ifelse(is.na(lwd), par('lwd'), lwd)
if(!length(type)) type <- ifelse(!(pp | lp), 'n',
ifelse(pp & lp, 'b',
ifelse(pp, 'p', 'l')))
pch <- ifelse(is.na(pch) & type!='p' & type!='b', NA, pch)
lty <- ifelse(is.na(lty) & type=='p', NA, lty)
lwd <- ifelse(is.na(lwd) & type=='p', 1, lwd)
cex <- ifelse(is.na(cex) & type!='p' & type!='b', 1, cex)
if(any(is.na(lwd)))
stop("lwd can not be NA for type='l' or 'b'")
if(any(is.na(cex)))
stop("cex can not be NA for type='p' or 'b'")
m <- list()
ldk <- lattice::draw.key
m[[1]] <- as.name(if(grid) 'ldk' else 'rlegend')
if(!grid) {
m$x <- z[[1]]; m$y <- z[[2]]
}
if(grid) {
w <- list(text=list(labels, col=col))
if(!(all(is.na(lty)) & all(is.na(lwd)))) {
lns <- list()
if(!all(is.na(lty))) lns$lty <- lty
if(!all(is.na(lwd))) lns$lwd <- lwd
lns$col <- col
w$lines <- lns
}
if(!all(is.na(pch))) w$points <- list(pch=pch, col=col)
m$key <- c(w, key.opts)
m$draw <- plot
if(plot)
m$vp <- viewport(x=unit(z[[1]], 'native'),
y=unit(z[[2]], 'native'))
z <- eval(as.call(m))
size <-
if(plot) c(NA,NA)
else
c(convertUnit(grobWidth(z), 'native', 'x', 'location', 'x',
'dimension', valueOnly=TRUE)[1],
convertUnit(grobHeight(z), 'native', 'y', 'location', 'y',
'dimension', valueOnly=TRUE)[1])
return(invisible(size))
}
else {
m$legend <- labels
m$xjust <- m$yjust <- .5
m$plot <- plot
m$col <- col
m$cex <- cex
if(!all(is.na(lty))) m$lty <- lty
if(!all(is.na(lwd))) m$lwd <- lwd
if(!all(is.na(pch))) m$pch <- pch
if(length(key.opts)) m[names(key.opts)] <- key.opts
w <- eval(as.call(m))$rect
return(invisible(c(w$w[1], w$h[1])))
}
m$transparent <- transparent
m$corner <- c(.5,.5)
m$plot <- plot
m$type <- type
if(!plot) labels <- substring(labels, 1, 10)
## key gets length wrong for long labels
m$text <- list(labels, col=col)
if(all(type=='p'))
m$points <- list(pch=pch, cex=cex, col=col)
else
m$lines <-
if(any(type!='l'))
list(lty=lty, col=col, lwd=lwd, pch=pch, cex=cex)
else
list(lty=lty, col=col, lwd=lwd)
if(length(key.opts))
m[names(key.opts)] <- key.opts
invisible(eval(as.call(m))) ## execute key(....)
}
putKeyEmpty <- function(x, y, labels, type=NULL,
pch=NULL, lty=NULL, lwd=NULL,
cex=par('cex'), col=rep(par('col'),nc),
transparent=TRUE, plot=TRUE, key.opts=NULL,
empty.method=c('area','maxdim'),
numbins=25,
xlim=pr$usr[1:2], ylim=pr$usr[3:4],
grid=FALSE)
{
nc <- length(labels)
empty.method <- match.arg(empty.method)
pr <- parGrid(grid)
uin <- pr$uin
uin <- 1 ## already in x,y units
z <- putKey(list(0, 0), labels, type, pch, lty, lwd, cex, col,
transparent=transparent, plot=FALSE,
key.opts=key.opts, grid=grid)/uin
## /uin converts to x,y units
## Find center of largest empty rectangle large enough to hold
## this rectangle
s <- is.finite(x + y)
if(length(xlim))
s <- s & (x >= xlim[1] & x <= xlim[2])
if(length(ylim))
s <- s & (y >= ylim[1] & y <= ylim[2])
x <- x[s]
y <- y[s]
keyloc <- largest.empty(x, y, xlim=xlim, ylim=ylim,
width=z[1], height=z[2],
method=empty.method, numbins=numbins, grid=grid)
if(is.na(keyloc$x))
{
cat('No empty area large enough for automatic key positioning. Specify keyloc or cex.\n')
cat('Width and height of key as computed by key(), in data units:',
format(z),'\n')
return(keyloc)
}
else if(plot) putKey(keyloc, labels, type,
pch, lty, lwd, cex, col, transparent, plot=TRUE,
key.opts=key.opts, grid=grid)
invisible(keyloc)
}
largest.empty <- function(x, y,
width=0, height=0,
numbins=25,
method=c('exhaustive','rexhaustive','area','maxdim'),
xlim=pr$usr[1:2], ylim=pr$usr[3:4],
pl=FALSE, grid=FALSE)
{
method <- match.arg(method)
if(missing(xlim) || missing(ylim)) pr <- parGrid(grid)
isna <- is.na(x + y)
if(any(isna)) {
x <- x[!isna]
y <- y[!isna]
}
n <- length(x)
o <- order(y)
x <- x[o]
y <- y[o]
itype <- 3 * (method=='exhaustive') + 4 * (method=='rexhaustive') +
1 * (method=='area') + 2 * (method=='maxdim')
storage.mode(x) <- storage.mode(y) <- storage.mode(xlim) <-
storage.mode(ylim) <- 'double'
storage.mode(numbins) <- storage.mode(itype) <- storage.mode(n) <- 'integer'
if(method %in% c('area','maxdim')) {
storage.mode(width) <- storage.mode(height) <- 'double' }
if(method %in% c('area','maxdim')) {
a <-
.Fortran(F_largrec, x, y, n,
xlim, ylim,
width, height, numbins, itype,
rx=double(2), ry=double(2))
x <- a$rx
if(any(x > 1e29)) {
warning('no empty rectangle was large enough')
return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA))
}
y <- a$ry
} else if(method=='rexhaustive') {
## Author: Hans Borchers
maxEmptyRect <- function(ax, ay, x, y, width=0, height=0) {
n <- length(x)
d <- sort(c(ax, x))
D <- diff(d)
m <- which.max(D)
## check vertical slices
mgap <- D[m]
maxr <- mgap * (ay[2] - ay[1])
maxR <- c(d[m], ay[1], d[m+1], ay[2])
## o <- order(y)
## X <- x[o]; Y <- y[o]
for (i in 1:n) {
tl <- ax[1]; tr <- ax[2]
if (i < n) {
for (j in (i+1):n) {
if (x[j] > tl && x[j] < tr) {
## check horizontal slices (j == i+1)
## and (all) rectangles above (x[i], y[i])
area <- (tr-tl)*(y[j]-y[i])
if (area > maxr && ((tr - tl) > width) &&
((y[j] - y[i]) > height)) {
maxr <- area
maxR <- c(tl, y[i], tr, y[j])
}
if (x[j] > x[i]) tr <- x[j]
else tl <- x[j]
}
}
}
## check open rectangles above (x[i], y[i])
area <- (tr-tl)*(ay[2]-y[i])
if (area > maxr && ((tr - tl) > width) &&
((ay[2] - y[i]) > height )){
maxr <- area
maxR <- c(tl, y[i], tr, ay[2])
}
## check open rectangles below (x[i], y[i])
ri <- min(ax[2], x[y < y[i] & x > x[i]])
li <- max(ax[1], x[y < y[i] & x < x[i]])
area <- (ri - li)*(ay[2] - y[i])
if (area > maxr && ((ri - li) > width) &&
((y[i] - ay[1]) > height)) {
maxr <- area
maxR <- c(li, ay[1], ri, y[i])
}
}
return(list(x=maxR[c(1,3)], y=maxR[c(2,4)]))
}
a <- maxEmptyRect(xlim, ylim, x, y, width, height)
x <- a$x
y <- a$y
if(diff(x) < width || diff(y) < height) {
warning('no empty rectangle was large enough')
return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA))
}
} else {
d <- sort(c(xlim, x))
D <- diff(d)
m <- which.max(D)
a <- .Fortran(F_maxempr, xlim, ylim, x, y, n,
as.double(width), as.double(height),
as.double(c(D[m], d[m], d[m+1])),
area=double(1), rect=double(4))
x <- a$rect[c(1,3)]
y <- a$rect[c(2,4)]
if(diff(x) < width || diff(y) < height) {
warning('no empty rectangle was large enough')
return(list(x=NA, y=NA, rect=list(x=rep(NA,4),y=rep(NA,4)), area=NA))
}
}
rectx <- x[c(1,2,2,1)]
recty <- y[c(1,1,2,2)]
if(pl)
ordGridFun(grid)$polygon(rectx, recty, col=1+itype)
structure(list(x=mean(x), y=mean(y), rect=list(x=rectx, y=recty),
area=diff(x)*diff(y)))
}
Hmisc/R/sedit.s 0000644 0001762 0000144 00000016034 14112731327 013017 0 ustar ligges users sedit <- function(text, from, to, test=NULL, wild.literal=FALSE)
{
to <- rep(to, length.out=length(from))
for(i in seq_along(text)) {
s <- text[i]
if(length(s))
for(j in 1:length(from)) {
old <- from[j]
front <- back <- FALSE
if(!wild.literal) {
if(substring(old,1,1) == '^') {
front <- TRUE;
old <- substring(old,2)
}
if(substring(old,nchar(old)) == '$') {
back <- TRUE; old <- substring(old, 1, nchar(old)-1)
}
}
new <- to[j]
lold <- nchar(old)
if(lold > nchar(s))
next
ex.old <- substring(old, 1:lold, 1:lold)
if(!wild.literal && any(ex.old == '*'))
s <- replace.substring.wild(s, old, new, test=test, front=front, back=back)
else {
l.s <- nchar(s)
is <- 1:(l.s-lold+1)
if(front)
is <- 1
ie <- is + lold - 1
if(back)
ie <- l.s
ss <- substring(s, is, ie)
k <- ss == old
if(!any(k))
next
k <- is[k]
substring2(s, k, k+lold-1) <- new
}
}
text[i] <- s
}
text
}
substring.location <- function(text, string, restrict)
{
if(length(text) > 1)
stop('only works with a single character string')
l.text <- nchar(text)
l.string <- nchar(string)
if(l.string > l.text)
return(list(first=0,last=0))
if(l.string == l.text)
return(if(text == string)
list(first=1,last=l.text)
else
list(first=0,last=0))
is <- 1:(l.text-l.string+1)
ss <- substring(text, is, is+l.string-1)
k <- ss == string
if(!any(k))
return(list(first=0,last=0))
k <- is[k]
if(!missing(restrict))
k <- k[k>=restrict[1] & k<=restrict[2]]
if(length(k) == 0)
return(list(first=0,last=0))
list(first=k, last=k+l.string-1)
}
## if(version$major < 5) 14Sep00
substring2 <- function(text, first, last=100000L)
base::substring(text, first, last)
'substring2<-' <- function(text, first, last=100000, value)
{
if(is.character(first)) {
if(!missing(last))
stop('wrong # arguments')
return(sedit(text, first, value)) ## value was setto 25May01
}
lf <- length(first)
if(length(text) == 1 && lf > 1) {
if(missing(last))
last <- nchar(text)
last <- rep(last, length.out=lf)
for(i in 1:lf) {
text <- paste(if(first[i]>1)
substring(text, 1, first[i]-1),
value,
substring(text, last[i]+1), sep='')
if(i < lf) {
j <- (i+1):lf
w <- nchar(value) - (last[i]-first[i]+1)
first[j] <- first[j] + w
last[j] <- last[j] + w
}
}
return(text)
}
text <- paste(ifelse(first>1,substring(text, 1, first-1),''), value,
substring(text, last+1), sep='')
text
}
replace.substring.wild <- function(text, old, new, test=NULL,
front=FALSE, back=FALSE)
{
if(length(text)>1)
stop('only works with a single character string')
if(missing(front) && missing(back)) {
if(substring(old,1,1) == '^') {
front <- TRUE;
old <- substring(old,2)
}
if(substring(old, nchar(old)) == '$') {
back <- TRUE
old <- substring(old, 1, nchar(old)-1)
}
}
if((front || back) && old!='*')
stop('front and back (^ and $) only work when the rest of old is *')
star.old <- substring.location(old,'*')
if(length(star.old$first)>1)
stop('does not handle > 1 * in old')
if(sum(star.old$first) == 0)
stop('no * in old')
star.new <- substring.location(new,'*')
if(length(star.new$first)>1)
stop('cannot have > 1 * in new')
if(old == '*' && (front | back)) {
if(front && back)
stop('may not specify both front and back (or ^ and $) with old=*')
if(length(test) == 0)
stop('must specify test= with old=^* or *$')
et <- nchar(text)
if(front) {
st <- rep(1, et);
en <- et:1
} else {
st <- 1:et;
en <- rep(et, et)
}
qual <- test(substring(text, st, en))
if(!any(qual))
return(text)
st <- (st[qual])[1]
en <- (en[qual])[1]
text.before <- if(st == 1)''
else substring(text, 1, st-1)
text.after <- if(en == et)''
else substring(text, en+1, et)
text.star <- substring(text, st, en)
new.before.star <-
if(star.new$first>1)
substring(new, 1, star.new$first-1)
else ''
new.after.star <- if(star.new$last == length(new))''
else substring(new, star.new$last+1)
return(paste(text.before, new.before.star, text.star, new.after.star,
text.after, sep=''))
}
old.before.star <- if(star.old$first == 1)''
else substring(old, 1, star.old$first-1)
old.after.star <- if(star.old$last == nchar(old))''
else substring(old, star.old$first+1)
if(old.before.star == '')
loc.before <- list(first=0, last=0)
else {
loc.before <- substring.location(text, old.before.star)
loc.before <- list(first=loc.before$first[1], last=loc.before$last[1])
}
if(sum(loc.before$first+loc.before$last) == 0)
return(text)
loc.after <- if(old.after.star == '') list(first=0, last=0)
else {
la <- substring.location(text, old.after.star,
restrict=c(loc.before$last+1,1e10))
lastpos <- length(la$first)
la <- list(first=la$first[lastpos], last=la$last[lastpos])
if(la$first+la$last == 0)
return(text)
la
}
loc.star <- list(first=loc.before$last+1,
last=if(loc.after$first == 0) nchar(text)
else loc.after$first-1)
star.text <- substring(text, loc.star$first, loc.star$last)
if(length(test) && !test(star.text))
return(text)
if(star.new$first == 0)
return(paste(if(loc.before$first>1)substring(text,1,loc.before$first-1),
new, sep=''))
new.before.star <- if(star.new$first == 1)''
else substring(new, 1, star.new$first-1)
new.after.star <- if(star.new$last == nchar(new)) ''
else substring(new, star.new$first+1)
paste(if(loc.before$first>1)substring(text,1,loc.before$first-1),
new.before.star,
substring(text,loc.star$first,loc.star$last),
new.after.star,
if(loc.after$last0)
substring(text,loc.after$last+1),
sep='')
}
## Some functions useful as test= arguments to replace.substring.wild, sedit
numeric.string <- function(string) suppressWarnings(!is.na(as.numeric(string)))
all.digits <- function(string)
{
k <- length(string)
result <- logical(k)
for(i in 1:k) {
st <- string[i]
ls <- nchar(st)
ex <- substring(st, 1:ls, 1:ls)
result[i] <- all(match(ex,c('0','1','2','3','4','5','6','7','8','9'),nomatch=0)>0)
}
result
}
Hmisc/R/histSpikeg.s 0000644 0001762 0000144 00000012665 14764563231 014041 0 ustar ligges users histSpikeg <- function(formula=NULL, predictions=NULL, data, plotly=NULL,
lowess=FALSE, xlim=NULL, ylim=NULL,
side=1, nint=100,
frac=function(f) 0.01 + 0.02*sqrt(f-1) /
sqrt(max(f,2)-1),
span=3/4, histcol='black', showlegend=TRUE) {
## Raw data in data, predicted curves are in predictions
## If predictions is not given, side (1 or 3) is used
v <- all.vars(formula)
yv <- v[ 1]
xv <- v[-1]
X <- xv[1]
if(lowess) {
if(yv %nin% names(data))
stop(paste(yv, 'must be in data if lowess=TRUE'))
yval <- data[[yv]]
iter <- if(length(unique(yval[! is.na(yval)])) > 2) 3 else 0
lows <- function(x, y, iter, span) {
i <- ! is.na(x + y)
lowess(x[i], y[i], iter=iter, f=span)
}
}
if(length(xv) > 1 && ! (lowess || length(predictions)))
stop('predictions must be given or lowess=TRUE if formula has > 1 variable on the right')
x1 <- data[[X]]
if(length(xlim)) {
data <- data[x1 >= xlim[1] & x1 <= xlim[2], ]
x1 <- data[[X]]
}
if(length(unique(x1)) > nint) {
eps <- diff(range(x1, na.rm=TRUE)) / nint
x1 <- round(x1 / eps) * eps
data[[X]] <- x1
}
p <- predictions
# xr <- NULL
# if(length(p)) xr <- range(p[[X]], na.rm=TRUE)
# else if(lowess) xr <- range(data[[X]], na.rm=TRUE)
xr <- xlim
if(length(xv) == 1) {
tab <- as.data.frame(do.call(table, data[X]))
tab[[X]] <- as.numeric(as.character(tab[[X]]))
if(length(xr)) tab <- tab[tab[[X]] >= xr[1] & tab[[X]] <= xr[2], ]
if(lowess) {
p <- as.data.frame(lows(data[[X]], yval, iter, span))
names(p) <- c(X, yv)
}
if(length(p)) tab$.yy. <- approxExtrap(p[[X]], p[[yv]], xout=tab[[X]])$y
} else { ## grouping variable(s) present
tab <- as.data.frame(do.call(table, data[xv]))
tab <- tab[tab$Freq > 0, ]
tab[[X]] <- as.numeric(as.character(tab[[X]]))
if(length(xr)) tab <- tab[tab[[X]] >= xr[1] & tab[[X]] <= xr[2], ]
tab$.yy. <- rep(NA, nrow(tab))
gv <- xv[-1] # grouping variables
U <- unique(tab[gv]) # unique combinations
if(! lowess) {
for(k in 1 : nrow(U)) {
i <- rep(TRUE, nrow(tab))
j <- rep(TRUE, nrow(p))
for(l in 1 : ncol(U)) {
currgroup <- as.character(U[k, gv[l], drop=TRUE])
i <- i & (tab[[gv[l]]] == currgroup)
j <- j & (p[[gv[l]]] == currgroup)
} ## now all grouping variables intersected
tab$.yy.[i] <- approxExtrap(p[[X]][j], p[[yv]][j], xout=tab[[X]][i])$y
}
} ## end if(! lowess)
else { # lowess; need to compute p
p <- NULL
for(k in 1 : nrow(U)) {
i <- rep(TRUE, nrow(tab))
j <- rep(TRUE, nrow(data))
for(l in 1 : ncol(U)) {
currgroup <- as.character(U[k, gv[l], drop=TRUE])
i <- i & (tab[[gv[l]]] == currgroup)
j <- j & (data[[gv[l]]] == currgroup)
} ## now all grouping variables intersected
sm <- lows(data[[X]][j], data[[yv]][j], iter, span)
Sm <- sm; names(Sm) <- c(X, yv)
Uk <- U[k, , drop=FALSE]; row.names(Uk) <- NULL
p <- rbind(p, data.frame(Uk, Sm))
tab$.yy.[i] <- approxExtrap(sm, xout=tab[[X]][i])$y
}
} ## end lowess
} ## end grouping variables present
if(! length(ylim)) {
if(length(data) && yv %in% names(data))
ylim <- range(data[[yv]], na.rm=TRUE)
else if(length(p)) ylim <- range(p[[yv]], na.rm=TRUE)
}
if(! length(ylim)) stop('no way to infer ylim from information provided')
tab$.rf. <- frac(tab$Freq) * diff(ylim)
n <- nrow(tab)
tab$.ylo. <- if(length(p)) tab$.yy. - tab$.rf.
else if(side == 1) rep(ylim[1], n) else rep(ylim[2], n)
tab$.yhi. <- if(length(p)) tab$.yy. + tab$.rf.
else if(side == 1) ylim[1] + tab$.rf. else ylim[2] - tab$.rf.
P <- plotly
if(! is.null(P) && requireNamespace("plotly")) {
tab$.xx. <- tab[[X]]
## Got NA/NaN argument, numerical expression has 8 elements
## when using add_segments
# P <- plotly:add_segments(P, data=tab,
# x=~ .xx., xend=~ .xx.,
# y=~ .ylo., yend=~ .yhi.,
# hoverinfo='none',
# name='Histogram', legendgroup='Histogram')
n <- 3 * nrow(tab)
.xx. <- .yy. <- rep(NA, n)
.xx.[seq(1, n, by=3)] <- tab[[X]]
.xx.[seq(2, n, by=3)] <- tab[[X]]
.yy.[seq(1, n, by=3)] <- tab$.ylo.
.yy.[seq(2, n, by=3)] <- tab$.yhi.
P <- plotly::add_lines(P, data=data.frame(.xx., .yy.),
x=~.xx., y=~.yy.,
line=list(color=histcol, width=1.4),
hoverinfo='none', showlegend=showlegend,
name='Histogram', legendgroup='Histogram')
if(lowess) {
af <- function(x) as.formula(paste('~', x))
P <- plotly::add_lines(P, data=p, x=af(X), y=af(yv),
hoverinfo='none', showlegend=showlegend,
name='loess', legendgroup='loess')
}
return(P)
}
res <- list(hist=geom_segment(data=tab, aes(x=.data[[X]], xend=.data[[X]],
y=.data$.ylo., yend=.data$.yhi.),
size=0.25, col=histcol),
lowess=if(lowess) geom_line(data=p, aes(x=.data[[X]], y=.data[[yv]])) )
res
}
# utils::globalVariables(c('aes', 'Freq', '.ylo.', '.yhi.', 'x', 'y')) Hmisc/R/pMedian.r 0000644 0001762 0000144 00000002202 14765123723 013264 0 ustar ligges users #' Pseudomedian
#'
#' Uses fast Fortran code to compute the pseudomedian of a numeric vector. The pseudomedian is the median of all possible midpoints of two observations. The pseudomedian is also called the Hodges-Lehmann one-sample estimator. The Fortran code is was originally from JF Monahan, and was converted to C++ in the `DescTools` package. It has been converted to Fortran 2018 here.
#' @title pMedian
#' @param x a numeric vector
#' @param na.rm set to `TRUE` to exclude `NA`s before computing the pseudomedian
#'
#' @return a scalar numeric value
#' @export
#' @md
#' @seealso ,
#' @examples
#' x <- c(1:4, 10000)
#' pMedian(x)
#' # Compare with brute force calculation and with wilcox.test
#' w <- outer(x, x, '+')
#' median(w[lower.tri(w, diag=TRUE)]) / 2
#' wilcox.test(x, conf.int=TRUE)
pMedian <- function(x, na.rm = FALSE) {
if(na.rm) x <- x[! is.na(x)]
n <- length(x)
if(n == 0) return(NA_real_)
if(n == 1) return(as.double(x))
.Fortran(F_hlqest, as.double(sort(x)), as.double(runif(1000)), as.integer(n), result=double(1))$result
}
Hmisc/R/na.delete.s 0000644 0001762 0000144 00000004741 14044103667 013554 0 ustar ligges users ## Enhancement of na.omit F. Harrell 20 Oct 91
## Allows an element of the data frame to be another data frame
## Note: S does not invoke na.action if only a data frame variable is missing!
na.delete <- function(frame)
{
y.detail <- na.detail.response(frame)
n <- length(frame)
omit <- FALSE
vars <- seq(length = n)
nmiss <- rep(0,n)
storage.mode(nmiss) <- "integer"
for(j in vars) {
x <- frame[[j]]
if(is.data.frame(x))
x <- as.matrix(x)
class(x) <- NULL #so Surv object is.na ignored
if(!is.atomic(x))
stop("non-atomic, non-data frame variables not allowed")
## variables are assumed to be either some sort of matrix, numeric or cat'y
isna <- is.na(x) #Change from T. Therneau
d <- dim(x)
if(is.null(d) || length(d) != 2) {
##isna <- is.na(x)
nmiss[j] <- sum(isna)
omit <- omit | isna
} else {
##isna <-is.na(x %*% rep(0,d[2]))
isna <- (isna %*% rep(1,d[2])) > 0
nmiss[j] <- sum(isna)
omit <- omit | isna
}
}
if(any(omit)) {
rn <- row.names(frame)
frame <- frame[!omit,,drop=FALSE]
names(nmiss) <- names(frame)
## a %ia% b terms are included - delete them since main effects
## already counted (next 2 stmts reinstated 27Oct93)
i <- grep("%ia%", names(nmiss))
if(length(i)>0)
nmiss <- nmiss[-i]
attr(frame,"nmiss") <- nmiss # for backward compatibility
temp <- seq(omit)[omit]
names(temp) <- rn[omit]
na.info <- list(nmiss=nmiss, omit=temp,
na.detail.response=y.detail)
class(na.info) <- "delete"
attr(frame, "na.action") <- na.info
}
frame
}
naprint.delete <- function(x, ...)
{
if(length(g <- x$nmiss)) {
cat("Frequencies of Missing Values Due to Each Variable\n")
print(g)
cat("\n")
}
if(length(g <- x$na.detail.response)) {
cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n")
print(unclass(g))
cat("\n")
}
invisible()
}
globalVariables("naresid.omit")
naresid.delete <- napredict.delete <- function(omit, x, ...)
{
omit <- omit$omit
if(exists('naresid.omit')) naresid.omit(omit, x)
else {
if(!existsFunction('naresid.exclude'))
naresid.exclude <- getFromNamespace('naresid.exclude','stats')
naresid.exclude(omit, x)
}
}
nafitted.delete <- function(obj, x)
{
omit <- obj$omit
if(exists('naresid.omit'))
naresid.omit(omit, x)
else
getFromNamespace('naresid.exclude','stats')(omit, x)
}
Hmisc/R/dotchartpl.s 0000644 0001762 0000144 00000031207 14501340672 014053 0 ustar ligges users dotchartpl <- function(x, major=NULL, minor=NULL,
group=NULL, mult=NULL,
big=NULL, htext=NULL,
num=NULL, denom=NULL,
numlabel='', denomlabel='',
fun=function(x) x, ifun=function(x) x,
op='-',
lower=NULL, upper=NULL,
refgroup=NULL, sortdiff=TRUE, conf.int=0.95,
minkeep=NULL,
xlim=NULL, xlab='Proportion',
tracename=NULL, limitstracename='Limits',
nonbigtracename='Stratified Estimates',
dec=3, width=800, height=NULL,
col=colorspace::rainbow_hcl
) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
mu <- markupSpecs$html
bold <- mu$bold
if(! length(xlim)) xlim <- c(min(c(x, lower), na.rm=TRUE),
max(c(x, upper), na.rm=TRUE))
majorpres <- length(major) > 0
major <- if(majorpres) as.character(major) else rep(' ', length(x))
minorpres <- length(minor) > 0
if(! (majorpres || minorpres)) stop('must specify major or minor or both')
grouppres <- length(group) > 0 ## superpositioning variable
multpres <- length(mult) > 0
limspres <- length(lower) * length(upper) > 0
rgpres <- length(refgroup) > 0
if(minorpres) minor <- as.character(minor)
if(grouppres) group <- as.character(group)
if(multpres) mult <- as.character(mult)
ugroup <- if(grouppres) unique(group) else ''
fmt <- function(x) format(round(x, dec))
if(rgpres) {
if(! grouppres || multpres || length(big))
stop('when refgroup is given, group must be given and mult and big must not be used')
## big=TRUE for non-stratified estimates
if(length(ugroup) != 2)
stop('refgroup only works for 2 groups')
if(refgroup %nin% unique(group))
stop(paste('refgroup must be one of the following:',
paste(unique(group), collapse=', ')))
altgroup <- setdiff(ugroup, refgroup)
}
cols <- if(length(col)) {
if(! is.function(col)) col
else
if(grouppres) col(length(unique(group)))
else
col(1) }
if(! length(col) && ! grouppres) cols <- 'black'
dropped <- character(0)
D <- NULL
if(rgpres && minorpres) {
z <- pairUpDiff(x, major, minor, group,
refgroup=refgroup, lower=lower, upper=upper,
minkeep=minkeep, sortdiff=sortdiff, conf.int=conf.int)
Z <- z$X
D <- z$D
dropped <- z$dropped
i <- Z[,'subscripts']
x <- x[i]
major <- major[i]
minor <- minor[i]
group <- group[i]
if(length(num)) num <- num[i]
if(length(denom)) denom <- denom[i]
if(length(mult)) mult <- mult[i]
if(length(big)) big <- big[i]
if(length(lower)) lower <- lower[i]
if(length(upper)) upper <- upper[i]
if(length(htext)) htext <- htext[i]
} # end if(rgpres && ...
ht <- htext
if(numlabel != '') numlabel <- paste0(' ', numlabel)
if(denomlabel != '') denomlabel <- paste0(' ', denomlabel)
if(length(num))
ht <- paste0(ht, if(length(htext)) mu$lspace,
fmt(fun(x)), mu$lspace,
mu$frac(paste0(num, numlabel),
paste0(denom, denomlabel), size=95))
## if confidence interval for differences are not to be displayed,
## put point estimate confidence intervals in point hypertext
if(length(ugroup) != 2 && limspres)
ht <- paste0(ht, ' [',
fmt(fun(lower)), ', ',
fmt(fun(upper)), ']')
ht <- paste0(ht, if(length(ht)) ' ',
if(majorpres) paste0(major, ': '))
if(minorpres) ht <- paste0(ht, minor)
if(grouppres) ht <- paste0(ht, ' ',
gsub(' stratified by .*', '', group))
if(multpres) ht <- paste0(ht, ' ', mult)
n <- length(x)
minor <- if(minorpres) minor else rep('', n)
group <- if(grouppres) group else rep('', n)
mult <- if(multpres) mult else rep('', n)
if(! length(big)) big <- rep(TRUE, n)
w <- c(length(x), length(major), length(minor), length(group),
length(num), length(denom),
length(mult), length(big), length(htext),
length(lower), length(upper))
w <- w[w > 0]
if(diff(range(w)) > 0)
stop('x, major, minor, group, num, denom, mult, big, htext, lower, upper must all have same length when used')
y <- 1
X <- Y <- Ydiff <- Lower <- Upper <- numeric(0)
Group <- Htext <- Htextl <- character(0)
Big <- logical(0)
yl <- numeric(0); yt <- ytnb <- character(0)
difflower <- diffupper <- Diff <- numeric(0)
coldiff <- htdiff <- character(0)
lines <- 0
for(ma in unique(major)) {
y <- y - 1
yl <- c(yl, y)
yt <- c(yt, bold(ma))
ytnb <- c(ytnb, ma)
lminor <- unique(minor[major == ma])
y <- y + if(all(lminor == '')) 0.4 else 0
lines <- lines + 1
for(mi in lminor) {
y <- y - 0.4
lines <- lines + (mi != '')
j <- which(major == ma & minor == mi)
X <- c(X, x[j])
Y <- c(Y, ifelse(big[j], y, y - .14))
if(length(D)) {
k <- which(D$major == ma & D$minor == mi)
if(length(k) != 1)
stop('must have one observation for a major/minor/group combination')
diff <- D$diff[k]
coldiff <- c(coldiff,
ifelse(diff > 0,
paste0(altgroup, ' ',
htmlTranslate('>'), ' ', refgroup),
paste0(refgroup, ' ',
htmlTranslate('>='), ' ', altgroup)))
Ydiff <- c(Ydiff, y)
htd <- if(majorpres) paste0(ma, ': ') else ''
if(minorpres) htd <- paste0(htd, mi)
htd <- paste0(htd, ' ', altgroup, ' ', op, ' ',
refgroup, ': ', fmt(fun(diff)))
if(! is.logical(conf.int) && limspres && length(D)) {
dlower <- fmt(fun(D$lower[k]))
dupper <- fmt(fun(D$upper[k]))
htd <- paste0(htd, ' ', conf.int, ' C.L.: [', dlower,
', ', dupper, ']')
Diff <- c(Diff, diff)
difflower <- c(difflower, D$lowermid[k])
diffupper <- c(diffupper, D$uppermid[k])
}
htdiff <- c(htdiff, htd)
} # end if(length(D))
if(limspres && ! length(D)) {
Lower <- c(Lower, lower[j])
Upper <- c(Upper, upper[j])
Htextl <- c(Htextl,
paste0('[',
fmt(fun(lower[j])), ', ',
fmt(fun(upper[j])), ']' ) )
}
Group <- c(Group, group[j])
Big <- c(Big, big[j])
Htext <- c(Htext, ht[j])
if(mi != '') {
yl <- c(yl, y)
yt <- c(yt, mi)
ytnb <- c(ytnb, mi)
}
} # end mi in lminor
} # end ma in lmajor
d <- data.frame(X, Y, Group, Htext, Big)
if(limspres && ! length(D)) {
d$Lower <- Lower
d$Upper <- Upper
d$Htextl <- Htextl
}
if(! grouppres) d$Group <- NULL
if(any(d$Big)) {
db <- subset(d, Big) # non-stratified estimates
## For some reason, colors= in add_marker did not always take
if(! length(height)) height <- plotlyParm$heightDotchart(lines)
auto <- .Options$plotlyauto
if(length(auto) && auto) height <- width <- NULL
p <- plotly::plot_ly(data=db, colors=cols, height=height, width=width)
if(limspres && length(D)) {
ddiff <- data.frame(Diff, difflower, diffupper, Ydiff, coldiff, htdiff)
## Could not get color=coldiff to work; perhaps conflicted with
## earlier use of color =
nDiff <- Diff[! is.na(Diff)]
if(length(nDiff)) {
if(any(nDiff > 0))
p <- plotly::add_segments(p, data=subset(ddiff, Diff > 0),
x= ~ difflower, xend= ~ diffupper,
y= ~ Ydiff, yend= ~ Ydiff,
color = I('lightgray'),
text = ~ htdiff, hoverinfo='text',
name = paste0(htmlSpecial('half'),
' CL of difference ',
coldiff[Diff > 0][1]))
if(any(nDiff <= 0))
p <- plotly::add_segments(p, data=subset(ddiff, Diff <= 0),
x= ~ difflower, xend= ~ diffupper,
y= ~ Ydiff, yend= ~ Ydiff,
color = I('lavender'),
text = ~ htdiff, hoverinfo='text',
name = paste0(htmlSpecial('half'),
' CL of difference ',
coldiff[Diff <= 0][1]))
}
}
## tracename and limitstracename are used if groups not used
if(limspres && ! length(D)) ## && length(ugroup) == 2) why ??
p <- if(grouppres)
plotly::add_segments(p, data=db,
x=~ Lower, xend=~ Upper,
y=~ Y, yend=~ Y,
color=~ Group, text= ~ Htextl,
colors=cols, hoverinfo='text')
else
plotly::add_segments(p, x=~ Lower, xend=~ Upper,
y=~ Y, yend=~ Y,
text= ~ Htextl,
color=I('lightgray'), hoverinfo='text',
name=limitstracename)
p <- if(grouppres)
plotly::add_markers(p, x=~ X, y=~ Y,
color=~ Group, text=~ Htext,
colors=cols, hoverinfo='text')
else
plotly::add_markers(p, x=~ X, y=~ Y,
text=~ Htext, color=I('black'),
hoverinfo='text',
name=if(length(tracename)) tracename
else
if(any(! d$Big)) 'All' else '')
} else p <- plotly::plot_ly(colors=cols) # Big is not used
if(any(! d$Big)) {
dnb <- subset(d, ! Big) # stratified estimates
if(limspres)
p <- if(grouppres && length(ugroup) == 2)
plotly::add_segments(p, data=dnb,
x=~ Lower, xend=~ Upper,
y=~ Y, yend=~ Y,
color=~ Group, text=~ Htextl,
colors=cols, hoverinfo='text')
else plotly::add_segments(p, data=dnb,
x=~ Lower, xend=~ Upper,
y=~ Y, yend=~ Y,
text=~ Htextl,
color=I('lightgray'), hoverinfo='text',
name=limitstracename)
dnb$sGroup <- paste0(dnb$Group, '\nby ',
gsub('Stratified by\n', '', nonbigtracename))
## Don't understand why if use ~ sGroup right below the symbols no
## longer appear in the legend for the non-stratified estimates
p <- if(grouppres)
plotly::add_markers(p, data=dnb,
x=~ X, y=~ Y,
color=~ Group,
text=~ Htext,
colors=cols,
marker=list(opacity=0.45, size=4),
hoverinfo='text')
# name=nonbigtracename)
else
plotly::add_markers(p, data=dnb,
x=~ X, y=~ Y,
text=~ Htext,
marker=list(opacity=0.45, size=4),
color=I('black'), hoverinfo='text',
name=nonbigtracename)
} # end if(any(! Big))
leftmargin <- plotlyParm$lrmargin(ytnb)
tickvals <- pretty(fun(xlim), n=10)
xaxis <- list(title=xlab, range=xlim, zeroline=FALSE,
tickvals=ifun(tickvals), ticktext=format(tickvals))
yaxis <- list(title='',
range=c(min(Y) - 0.2, 0.2),
zeroline=FALSE, tickvals=yl, ticktext=yt)
p <- plotly::layout(p,
xaxis=xaxis,
yaxis=yaxis,
margin=list(l=leftmargin),
legend=list(traceorder=if(length(difflower))
'reversed' else 'normal'))
attr(p, 'levelsRemoved') <- dropped
p
}
Hmisc/R/rcorrp.cens.s 0000644 0001762 0000144 00000013665 14400370362 014152 0 ustar ligges users ## Computes rank correlation measures between a variable X and a possibly
## censored Surv variable Y
## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5)
## See Harrell et al JAMA 1984(?)
## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal
## gamma-type rank correlation)
## No. This is the version extended to paired predictions
## method=1: concordance=delta x1 < delta x2
## method=2: concordance=x1 concordant and x2 discordant
rcorrp.cens <- function(x1, x2, S, outx=FALSE, method=1)
{
if(inherits(S, 'Surv')) {
if(attr(S, 'type') != 'right')
stop('only handles right censored times')
} else S <- cbind(S, rep(1, length(S)))
y <- S[,1]
event <- S[,2]
if(length(x1)!=length(x2))
stop("x1 and x3 must have same length")
if(length(y)!=length(x1))
stop("y must have same length as x")
if(method!=1 & method!=2)
stop("method must be 1 or 2")
miss <- is.na(x1+x2+y+event)
nmiss <- sum(miss)
if(nmiss>0) {
miss <- !miss
x1 <- x1[miss]
x2 <- x2[miss]
y <- y[miss]
event <- event[miss]
}
n <- length(x1)
if(n<2)
stop("<2 non-missing observations")
ne <- sum(event)
storage.mode(x1) <- "double"
storage.mode(x2) <- "double"
storage.mode(y) <- "double"
storage.mode(event) <- "logical"
storage.mode(method) <- "integer"
storage.mode(outx) <- "logical"
z <-
.Fortran(F_cidxcp,x1,x2,y,event,length(x1),method,outx,
nrel=double(1),nuncert=double(1),
c1=double(1),c2=double(1),gamma1=double(1),gamma2=double(1),
gamma=double(1),sd=double(1),c12=double(1),c21=double(1))
r <- c(z$gamma,z$sd,z$c12,z$c21,n,nmiss,ne,z$nrel,z$nuncert,z$c1,z$c2,
z$gamma1,z$gamma2)
names(r) <- c("Dxy","S.D.","x1 more concordant","x2 more concordant",
"n","missing","uncensored",
"Relevant Pairs","Uncertain","C X1","C X2","Dxy X1","Dxy X2")
r
}
improveProb <- function(x1, x2, y)
{
s <- is.na(x1+x2+y)
if(any(s))
{
s <- !s
x1 <- x1[s]
x2 <- x2[s]
y <- y[s]
}
n <- length(y)
y <- as.numeric(y)
u <- sort(unique(y))
if(length(u) != 2 || u[1] != 0 || u[2] != 1)
stop('y must have two values: 0 and 1')
r <- range(x1,x2)
if(r[1] < 0 || r[2] > 1)
stop('x1 and x2 must be in [0,1]')
a <- y==1
b <- y==0
na <- sum(a)
nb <- sum(b)
d <- x2 - x1
nup.ev <- sum(d[a] > 0); pup.ev <- nup.ev/na
nup.ne <- sum(d[b] > 0); pup.ne <- nup.ne/nb
ndown.ev <- sum(d[a] < 0); pdown.ev <- ndown.ev/na
ndown.ne <- sum(d[b] < 0); pdown.ne <- ndown.ne/nb
nri.ev <- pup.ev - pdown.ev
# se.nri.ev <- sqrt((pup.ev + pdown.ev)/na) # old est under H0
v.nri.ev <- (nup.ev + ndown.ev)/(na^2) - ((nup.ev - ndown.ev)^2)/(na^3)
se.nri.ev <- sqrt(v.nri.ev)
z.nri.ev <- nri.ev/se.nri.ev
nri.ne <- pdown.ne - pup.ne
# se.nri.ne <- sqrt((pdown.ne + pup.ne)/nb) # old est under H0
v.nri.ne <- (ndown.ne + nup.ne)/(nb^2) - ((ndown.ne - nup.ne)^2)/(nb^3)
se.nri.ne <- sqrt(v.nri.ne)
z.nri.ne <- nri.ne/se.nri.ne
nri <- pup.ev - pdown.ev - (pup.ne - pdown.ne)
# old estimate under H0:
# se.nri <- sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb)
se.nri <- sqrt(v.nri.ev + v.nri.ne)
z.nri <- nri/se.nri
improveSens <- sum(d[a])/na
improveSpec <- -sum(d[b])/nb
idi <- mean(d[a]) - mean(d[b])
var.ev <- var(d[a])/na
var.ne <- var(d[b])/nb
se.idi <- sqrt(var.ev + var.ne)
z.idi <- idi/se.idi
structure(llist(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne,
nri, se.nri, z.nri,
nri.ev, se.nri.ev, z.nri.ev,
nri.ne, se.nri.ne, z.nri.ne,
improveSens, improveSpec,
idi, se.idi, z.idi, labels=FALSE), class='improveProb')
}
print.improveProb <- function(x, digits=3, conf.int=.95, ...)
{
cat('\nAnalysis of Proportions of Subjects with Improvement in Predicted Probability\n\n')
cat('Number of events:', x$na,'\tNumber of non-events:', x$nb, '\n\n')
p <- matrix(c(x$pup.ev, x$pup.ne, x$pdown.ev, x$pdown.ne),
dimnames=list(c(
'Increase for events (1)',
'Increase for non-events (2)',
'Decrease for events (3)',
'Decrease for non-events (4)'),
'Proportion'))
cat('Proportions of Positive and Negative Changes in Probabilities\n\n')
print(p, digits=digits)
zpci <- function(m, se, conf.int)
{
z <- qnorm((1+conf.int)/2)
cbind(m/se, 2 * pnorm(- abs(m/se)), m - z*se, m + z*se)
}
p <- cbind(c(x$nri, x$nri.ev, x$nri.ne),
c(x$se.nri, x$se.nri.ev, x$se.nri.ne),
zpci(c(x$nri, x$nri.ev, x$nri.ne),
c(x$se.nri, x$se.nri.ev, x$se.nri.ne),
conf.int=conf.int))
low <- paste('Lower', conf.int)
hi <- paste('Upper', conf.int)
dimnames(p) <- list(c('NRI (1-3+4-2)',
'NRI for events (1-3)',
'NRI for non-events (4-2)'),
c('Index', 'SE', 'Z', '2P', low, hi))
cat('\n\nNet Reclassification Improvement\n\n')
print(p, digits=digits)
cat('\n\nAnalysis of Changes in Predicted Probabilities\n\n')
p <- matrix(c(x$improveSens, x$improveSpec),
dimnames=list(c('Increase for events (sensitivity)',
'Decrease for non-events (specificity)'),
'Mean Change in Probability'))
print(p, digits=digits)
cat("\n\nIntegrated Discrimination Improvement\n (average of sensitivity and 1-specificity over [0,1];\n also is difference in Yates' discrimination slope)\n\n")
p <- c(x$idi, x$se.idi, zpci(x$idi, x$se.idi, conf.int=conf.int))
names(p) <- c('IDI', 'SE', 'Z', '2P', low, hi)
print(p, digits=digits)
invisible()
}
Hmisc/R/groupn.s 0000644 0001762 0000144 00000001012 12243661443 013213 0 ustar ligges users groupn <- function(x, y, m=150)
{
s <- !is.na(x + y)
x<-x[s]
y<-y[s]
i<-order(x)
x<-x[i]
y<-y[i]
n<-length(x)
if(n n) {
meanx <- c(meanx,mean(x[n-m+1:n]))
meany <- c(meany,mean(y[n-m+1:n]))
}
return(list(x=meanx,y=meany))
}
Hmisc/R/summaryM.s 0000644 0001762 0000144 00000112746 14400242070 013520 0 ustar ligges users summaryM <- function(formula, groups=NULL, data=NULL, subset,
na.action=na.retain,
overall=FALSE, continuous=10, na.include=FALSE,
quant=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625,
0.75, 0.875, 0.95, 0.975),
nmin=100, test=FALSE,
conTest=conTestkw, catTest=catTestchisq,
ordTest=ordTestpo) {
marg <- length(data) && '.marginal.' %in% names(data)
if(marg) formula <- update(formula, .~. + .marginal.)
formula <- Formula::Formula(formula)
Y <- if(!missing(subset) && length(subset))
model.frame(formula, data=data, subset=subset, na.action=na.action)
else
model.frame(formula, data=data, na.action=na.action)
# mf <- match.call(expand.dots=FALSE)
# m <- match(c('formula', 'data', 'subset', 'na.action'), names(mf), 0)
# mf <- mf[c(1, m)]
# if(missing(na.action)) mf$na.action <- na.retain
# formula <- Formula(formula)
# mf[[1]] <- as.name('model.frame')
# mf$formula <- formula
# Y <- eval(mf, parent.frame())
X <- Formula::model.part(formula, data=Y, rhs=1)
Y <- Formula::model.part(formula, data=Y, lhs=1)
getlab <- function(x, default) {
lab <- attr(x, 'label')
if(!length(lab) || lab=='') default else lab
}
if(marg) {
xm <- X$.marginal.
X$.marginal. <- NULL
} else xm <- rep('', nrow(X))
if(length(X)) {
xname <- names(X)
if(length(xname) == 1 && ! length(groups)) groups <- xname
if(! length(groups) && length(xname) > 1) {
warnings('Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.')
groups <- xname[1]
}
svar <- if(length(xname) == 1) factor(rep('.ALL.', nrow(X)))
else do.call('interaction', list(X[setdiff(xname, groups)], sep=' '))
group <- X[[groups]]
glabel <- getlab(group, groups)
} else {
svar <- factor(rep('.ALL.', nrow(Y)))
group <- rep('', nrow(Y)) # y1 + y2 ~ 1, no grouping
groups <- group.freq <- NULL
glabel <- ''
}
quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 0.5,
0.625, 0.75, 0.875, 0.95, 0.975 ))
nv <- ncol(Y)
nameY <- names(Y)
R <- list()
for(strat in levels(svar)) {
instrat <- svar == strat
n <- integer(nv)
type <- n
comp <- dat <- vector("list", nv)
names(comp) <- names(dat) <- nameY
labels <- Units <- vector("character", nv)
if(test) {
testresults <- vector('list', nv)
names(testresults) <- names(comp)
}
gr <- group[instrat]
xms <- xm[instrat]
## Need to ignore marginal summaries in N unless stratifying by
## the variable that is marginalized over
if(all(xms != '')) xms <- rep('', length(xms))
group.freq <- table(gr)
group.freq <- group.freq[group.freq > 0]
if(overall) group.freq <- c(group.freq, Combined=sum(group.freq))
for(i in 1 : nv) {
w <- Y[instrat, i]
if(length(attr(w, "label")))
labels[i] <- attr(w, "label")
if(length(attr(w, 'units'))) Units[i] <- attr(w, 'units')
if(is.character(w))
w <- as.factor(w)
if(!inherits(w, 'mChoice')) {
if(!is.factor(w) && !is.logical(w) &&
length(unique(w[! is.na(w)])) < continuous)
w <- as.factor(w)
s <- !is.na(w)
if(na.include && !all(s) && length(levels(w))) {
w <- na.include(w)
levels(w)[is.na(levels(w))] <- 'NA'
s <- rep(TRUE, length(s))
}
n[i] <- sum(s & xms == '')
w <- w[s]
g <- gr[s, drop=TRUE]
if(is.factor(w) || is.logical(w)) {
tab <- table(w, g)
if(test) {
if(is.ordered(w))
testresults[[i]] <- ordTest(g, w)
else
testresults[[i]] <- catTest(tab)
}
if(nrow(tab) == 1) {
b <- casefold(dimnames(tab)[[1]], upper=TRUE)
pres <- c('1', 'Y', 'YES', 'PRESENT')
abse <- c('0', 'N', 'NO', 'ABSENT')
jj <- match(b, pres, nomatch=0)
if(jj > 0) bc <- abse[jj]
else {
jj <- match(b, abse, nomatch=0)
if(jj > 0) bc <- pres[jj]
}
if(jj) {
tab <- rbind(tab, rep(0, ncol(tab)))
dimnames(tab)[[1]][2] <- bc
}
}
if(overall)
tab <- cbind(tab, Combined=apply(tab, 1, sum))
comp[[i]] <- tab
type[i] <- 1
} else {
sfn <- function(x, quant) {
## Won't lose precision in quantile names with digits=15
y <- c(quantile(x,quant), Mean=mean(x), SD=sqrt(var(x)),
N=sum(!is.na(x)))
names(y) <-
c(paste0(formatC(100 * quant, format='fg', width=1, digits=15),
'%'), 'Mean', 'SD', 'N')
y
}
qu <- tapply(w, g, sfn, simplify=TRUE, quants)
if(test) testresults[[i]] <- conTest(g, w)
if(overall) qu$Combined <- sfn(w, quants)
comp[[i]] <- matrix(unlist(qu), ncol=length(quants) + 3,
byrow=TRUE,
dimnames=list(names(qu),
c(format(quants), 'Mean', 'SD', 'N')))
if(any(group.freq <= nmin))
dat[[i]] <-
lapply(split(w, g), nmin=nmin,
function(x, nmin)
if(length(x) <= nmin) x
else NULL)
type[i] <- 2
}
} else {
w <- as.numeric(w) == 1 ## multiple choice variables
## n[i] <- nrow(w)
n[i] <- sum(! is.na(apply(w, 1, sum)) & xms == '')
g <- as.factor(gr)
ncat <- ncol(w)
tab <- matrix(NA, nrow=ncat, ncol=length(levels(g)),
dimnames=list(dimnames(w)[[2]], levels(g)))
if(test) {
pval <- numeric(ncat)
names(pval) <- dimnames(w)[[2]]
d.f. <- stat <- pval
}
for(j in 1 : ncat) {
tab[j,] <- tapply(w[,j], g, sum, simplify=TRUE, na.rm=TRUE)
if(test) {
tabj <- rbind(table(g)-tab[j,],tab[j,])
st <- catTest(tabj)
pval[j] <- st$P
stat[j] <- st$stat
d.f.[j] <- st$df
}
}
if(test)
testresults[[i]] <- list(P=pval,
stat = stat,
df = d.f.,
testname = st$testname,
namefun = st$namefun,
statname = st$statname,
latexstat = st$latexstat,
plotmathstat= st$plotmathstat)
if(overall) tab <- cbind(tab, Combined=apply(tab,1,sum))
comp[[i]] <- tab
type[i] <- 3
}
}
labels <- ifelse(nchar(labels), labels, names(comp))
R[[strat]] <- list(stats=comp, type=type,
group.freq=group.freq,
labels=labels, units=Units,
quant=quant, data=dat,
N=sum(!is.na(gr) & xms ==''), n=n,
testresults=if(test)testresults)
}
structure(list(results=R, group.name=groups, group.label=glabel,
call=call, formula=formula),
class="summaryM")
}
plot.summaryM <-
function(x, vnames = c('labels', 'names'),
which = c('both', 'categorical', 'continuous'),
vars = NULL,
xlim = c(0,1),
xlab = 'Proportion',
pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE,
main, ncols=2,
prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001,
conType = c('bp', 'dot', 'raw'),
cex.means = 0.5, cex=par('cex'),
height='auto', width=700, ...)
{
obj <- x
vnames <- match.arg(vnames)
which <- match.arg(which)
conType <- match.arg(conType)
if(grType() == 'plotly')
return(plotpsummaryM(x, vnames=vnames, which=which, vars=vars,
xlim=xlim,
xlab=xlab, exclude1=exclude1, ncols=ncols,
prtest=prtest, pdig=3, eps=0.001,
height=height, width=width))
html <- FALSE
ul <- vnames=='labels'
if(is.logical(prtest) && !prtest) prtest <- 'none'
for(strat in names(x$results)) {
obj <- x$results[[strat]]
test <- obj$testresults
if(!length(test)) prtest <- 'none'
varNames <- names(obj$stats)
vn <- if(ul) obj$labels
else varNames
Units <- obj$units
nw <- if(lg <- length(obj$group.freq)) lg
else 1
gnames <- names(obj$group.freq)
if(missing(main)) main <-
if(strat != '.ALL.') strat
else if(nw == 1) ''
else
paste('Proportions',
'Stratified by',
x$group.label)
pch <- rep(pch, length.out=nw)
lab <- vnd <- z <- nmiss <- vnamd <- NULL
type <- obj$type; n <- obj$n
opar <- par()
on.exit(setParNro(opar))
npages <- 0
if(which != 'continuous' && any(type %in% c(1, 3))) {
ftstats <- NULL
for(i in (1:length(type))[type %in% c(1, 3)]) {
nam <- vn[i]
tab <- obj$stats[[i]]
if(nw == 1)
tab <- as.matrix(tab)
nr <- nrow(tab)
denom <- if(type[i] == 1) apply(tab, 2, sum)
else obj$group.freq
y <- sweep(tab, 2, denom, FUN='/')
lev <- dimnames(y)[[1]]
exc <- exclude1 && (nr == 2)
jstart <- if(exc) 2 else 1
rl <- casefold(lev)
binary <- type[i] == 1 && exc &&
(all(rl %in% c("0", "1")) | all(rl %in% c("false", "true"))|
all(rl %in% c("absent", "present")))
for(j in jstart : nrow(y)) {
if(nw==1) z <- rbind(z, y[j,])
else {
yj <- rep(NA, nw)
names(yj) <- gnames
yj[names(y[j,])] <- y[j,]
z <- rbind(z, yj)
}
lab <- c(lab, if(binary) '' else lev[j])
vnd <- c(vnd, nam)
vnamd <- c(vnamd, varNames[i])
}
if(any(prtest != 'none')) {
fts <- formatTestStats(test[[varNames[i]]], type[i] == 3,
if(type[i] == 1) 1
else 1 : nr,
prtest = prtest,
plotmath= TRUE,
pdig=pdig, eps=eps)
ftstats <- c(ftstats, fts,
if(type[i] == 1 && nr - exc - 1 > 0)
rep(expression(''), nr - exc - 1))
}
}
dimnames(z) <- list(lab, dimnames(z)[[2]])
dotchart3(z, groups=factor(vnd, levels=unique(vnd)), xlab=xlab, xlim=xlim,
auxdata=if(!any(prtest == 'none')) ftstats,
pch=pch, ...)
if(main == '' && strat != '.ALL.') title(strat)
else if(main != '') title(main)
npages <- npages + 1
setParNro(opar)
## Dummy key if only one column, so won't use another Key from an
## earlier run
if(nw < 2) {
Key1 <- function(...)invisible(NULL)
.setKey(Key1)
} else { ##set up for key() if > 1 column
Key3 <- function(x=NULL, y=NULL, lev, pch) {
oldpar <- par('usr', 'xpd')
par(usr=c(0, 1, 0, 1), xpd=NA)
on.exit(par(oldpar))
if(is.list(x)) {
y <- x$y
x <- x$x
}
## Even though par('usr') shows 0,1,0,1 after lattice draws
## its plot, it still needs resetting
if(!length(x)) x <- 0
if(!length(y))
y <- 1 ## because of formals()
rlegend(x, y, legend=lev, pch=pch, ...)
invisible()
}
formals(Key3) <- list(x=NULL,y=NULL,lev=names(obj$group.freq),
pch=pch)
.setKey(Key3)
}
}
ncont <- sum(type==2)
if(which != 'categorical' && ncont) {
mf <- par('mfrow')
if(length(mf) == 0) mf <- c(1, 1)
if(ncont > 1 & max(mf) == 1) {
mf <- if(ncont <= 2) c(2,1)
else if(ncont <= 4) c(2,2)
else if(ncont <= 6) c(2,3)
else if(ncont <= 9) c(3,3)
else c(4,3)
## if(ncont <= 12)c(4,3) else if(ncont <= 16) c(4,4) else c(5,4)
nr <- mf[1]
m <- par('mar')
par(mfrow=mf)
}
npages <- npages + ceiling(sum(type == 2) / prod(mf))
for(i in (1 : length(type))[type == 2]) {
nam <- labelPlotmath(vn[i], Units[i])
st <- obj$stats[[i]]
if(nw==1) st <- as.matrix(st)
N <- st[, 'N']
if(conType == 'dot') {
quantile.columns <- dimnames(st)[[2]] %nin% c('Mean', 'SD', 'N')
st <- st[,quantile.columns, drop=FALSE]
xlim <- range(st)
ns <- as.numeric(dimnames(st)[[2]])
l <- 1 : length(ns)
q1 <- l[abs(ns - .25) < .001]
med <- l[abs(ns - .5) < .001]
q3 <- l[abs(ns - .75) < .001]
st <- st[,c(q1, med, q3), drop=FALSE]
dotchart3(st, xlim=xlim, xlab=nam, pch=c(91, 16, 93),
auxtitle='N', auxdata=N)
Key2 <- function(x=NULL, y=NULL, quant, ...) {
quant <- format(quant)
txt <- paste0('(0.25, 0.5, 0.75) quantiles shown\n',
'x-axes scaled to (',min(quant),',',
max(quant), ') quantiles')
if(length(x)) {
if(is.list(x)) {
y <- x$y;
x <- x$x
}
text(x, y, txt, cex=.8, adj=0, ...)
} else mtitle(lr=txt, cex.l=.8, line=1, ...)
invisible()
}
formals(Key2) <- list(x=NULL, y=NULL, quant=obj$quant)
.setKey2(Key2)
} else if(conType == 'bp') {
st <- st[, colnames(st) != 'N', drop=FALSE]
mw <- max(strwidth(N, 'inches', cex=cex))
omai <- par('mai')
mai <- omai
mai[4] <- .3 + 1.1 * mw
par(mai=mai)
bpplt(st, xlab=nam, cex.points=cex.means)
upedge <- par('usr')[4]
outerText('N',
upedge + strheight('N', cex=cex) / 2,
cex=cex)
outerText(N, length(N) : 1, cex=cex)
par(mai=omai)
}
else
stripChart(obj$data[[i]], xlab=nam)
if(all(prtest != 'none')) {
fts <- formatTestStats(test[[varNames[i]]], prtest=prtest,
plotmath=TRUE,
pdig=pdig, eps=eps)
title(fts, line=.5)
}
}
}
}
invisible(npages)
}
print.summaryM <- function(...) {
lang <- prType()
switch(lang,
plain = printsummaryM(...),
latex = latex.summaryM(...),
html = latex.summaryM(..., html=TRUE) )
}
printsummaryM <- function(x, digits, prn=any(n != N),
what=c('proportion', '%'), pctdig=if(what == '%') 0 else 2,
npct=c('numerator', 'both', 'denominator', 'none'),
exclude1=TRUE, vnames=c("labels", "names"), prUnits=TRUE,
sep="/", abbreviate.dimnames=FALSE,
prefix.width=max(nchar(lab)),
min.colwidth, formatArgs=NULL, round=NULL,
prtest=c('P', 'stat', 'df', 'name'), prmsd=FALSE, long=FALSE,
pdig=3, eps=0.001, prob=c(0.25, 0.5, 0.75), prN=FALSE, ...)
{
npct <- match.arg(npct)
vnames <- match.arg(vnames)
what <- match.arg(what)
if(is.logical(prtest) && !prtest) prtest <- 'none'
obj <- x
if(! length(obj$results)) return()
for(strat in names(obj$results)) {
x <- obj$results[[strat]]
stats <- x$stats
nv <- length(stats)
cstats <- lab <- character(0)
nn <- integer(0)
type <- x$type
n <- x$n
N <- x$N
nams <- names(stats)
labels <- x$labels
Units <- x$units
test <- x$testresults
if(!length(test)) prtest <- 'none'
nw <- if(lg <- length(x$group.freq)) lg else 1
gnames <- names(x$group.freq)
if(!missing(digits)) {
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
}
cstats <- NULL
for(i in 1 : nv) {
nn <- c(nn, n[i])
nam <- if(vnames == "names") nams[i] else labels[i]
if(prUnits && nchar(Units[i]))
nam <- paste0(nam,' [', gsub('\\*', ' ', Units[i]),']')
tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]]
else NULL
if(type[i] %in% c(1, 3)) {
cs <- formatCats(stats[[i]], nam, tr, type[i],
if(length(x$group.freq)) x$group.freq else x$n[i],
what, npct, pctdig, exclude1, long, prtest,
pdig=pdig, eps=eps)
nn <- c(nn, rep(NA, nrow(cs) - 1))
} else cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd,
sep, formatArgs, round, prtest,
pdig=pdig, eps=eps, prob=prob, prN=prN)
cstats <- rbind(cstats, cs)
}
lab <- dimnames(cstats)[[1]]
gl <- names(x$group.freq)
gl <- if(length(gl)) paste0(gl," \n(N=", x$group.freq, ")")
else ""
if(length(test) && !all(prtest == 'none'))
gl <- c(gl,
if(length(prtest) == 1 && prtest != 'stat')
if(prtest == 'P')'P-value'
else prtest
else ' Test\nStatistic')
nc <- nchar(cstats)
spaces <- substring(" ",
1, (max(nc) - nc + 1) / 2) ## center strings
dc <- dim(cstats)
cstats <- paste0(spaces, cstats)
dim(cstats) <- dc
if(prn) {
cnn <- format(nn)
cnn[is.na(nn)] <- ''
cstats <- cbind(cnn, cstats)
gl <- c('N', gl)
}
cstats <- rbind(gl, cstats)
dimnames(cstats) <- list(c('',lab), NULL)
if(strat != '.ALL.')
cat('\n', strat, '\n', sep='')
cat("\n\nDescriptive Statistics",
if(length(x$group.label))
paste(" by", x$group.label)
else
paste0(" (N=", x$N,")"), "\n\n", sep="")
if(missing(min.colwidth))
min.colwidth <- max(min(nchar(gl)), min(nc[nc > 0]))
print.char.matrix(cstats, col.names=FALSE,
col.txt.align='left', ...)
}
invisible(cstats)
}
latex.summaryM <-
function(object, title=first.word(deparse(substitute(object))),
file=paste(title, 'tex', sep='.'), append=FALSE,
digits, prn = any(n!=N),
what=c('proportion', '%'), pctdig=if(what == '%') 0 else 2,
npct=c('numerator', 'both', 'denominator', 'slash', 'none'),
npct.size=if(html) mspecs$html$smaller else 'scriptsize',
Nsize =if(html) mspecs$html$smaller else 'scriptsize',
exclude1=TRUE, vnames=c("labels","names"), prUnits=TRUE,
middle.bold=FALSE,
outer.size=if(html) mspecs$html$smaller else 'scriptsize',
caption, rowlabel="", rowsep=html,
insert.bottom=TRUE, dcolumn=FALSE, formatArgs=NULL, round=NULL,
prtest=c('P', 'stat', 'df', 'name'), prmsd=FALSE,
msdsize=if(html) function(x) x else NULL, brmsd=FALSE,
long=FALSE, pdig=3, eps=.001, auxCol=NULL, table.env=TRUE,
tabenv1=FALSE, prob=c(0.25, 0.5, 0.75), prN=FALSE,
legend.bottom=FALSE, html=FALSE, mspecs=markupSpecs, ...)
{
if(! length(object$results)) return()
if(! append) cat('', file=file)
append <- TRUE
what <- match.arg(what)
npct <- match.arg(npct)
vnames <- match.arg(vnames)
if(is.logical(prtest) && ! prtest) prtest <- 'none'
strats <- names(object$results)
probdef <- c(0.25, 0.5, 0.75)
if(length(prob) != 3) {
prob <- probdef
}
lang <- if(html) 'html' else 'latex'
specs <- mspecs[[lang]]
math <- specs$math
spc <- specs$lspace
bold <- specs$bold
sup <- specs$sup
br <- specs$br
plminus <- specs$plminus
Npct.size <- npct.size; NNsize <- Nsize; Outer.size <- outer.size
if(! is.function(npct.size))
npct.size <- function(x) paste0('{\\', Npct.size, ' ', x, '}')
if(! is.function(Nsize))
Nsize <- function(x) paste0('{\\', NNsize, ' ', x, '}')
if(! is.function(outer.size))
outer.size <- function(x) paste0('{\\', Outer.size,' ', x, '}')
## Add this back if revert to previous chisq function in markupSpecs
## if(html) cat(specs$styles) ## define html styles such as xscript for chisq
maxlablen <- 0
istr <- 0
Lab <- character(0)
Cstats <- NULL
n.tspanner <- integer(0)
for(strat in strats) {
istr <- istr + 1
x <- object$results[[strat]]
stats <- x$stats
nv <- length(stats)
cstats <- lab <- character(0)
nn <- integer(0)
type <- x$type
n <- x$n
N <- x$N
nams <- names(stats)
labels <- x$labels
maxlablen <- max(maxlablen, nchar(labels))
Units <- x$units
nw <- if(lg <- length(x$group.freq)) lg else 1
gnames <- names(x$group.freq)
test <- x$testresults
if(!length(test)) prtest <- 'none'
gt1.test <-
if(all(prtest == 'none'))
FALSE
else
length(unique(sapply(test,function(a)a$testname))) > 1
if(!missing(digits)) {
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
}
if(missing(caption))
caption <- paste0("Descriptive Statistics",
if(length(x$group.label))
paste(" by", x$group.label)
else
math(paste0(" (N=", x$N, ")")))
if(! middle.bold) bold <- function(x) x
cstats <- NULL
testUsed <- auxc <- character(0)
rows.per.var <- integer(0)
for(i in 1:nv) {
if(length(auxCol))
auxc <- c(auxc, auxCol[[1]][i])
nn <- c(nn, n[i])
nam <- if(vnames == "names") nams[i]
else labels[i]
if(prUnits && nchar(Units[i]) > 0)
nam <- specs$varlabel(nam, Units[i], hfill=TRUE)
tr <- if(length(test) && all(prtest != 'none')) test[[nams[i]]]
else NULL
if(length(test) && all(prtest != 'none'))
testUsed <- unique(c(testUsed, tr$testname))
if(type[i] %in% c(1, 3)) {
cs <- formatCats(stats[[i]], nam, tr, type[i],
if(length(x$group.freq)) x$group.freq else x$n[i],
what, npct, pctdig, exclude1, long, prtest,
lang=lang, testUsed=testUsed,
npct.size=npct.size,
pdig=pdig, eps=eps,
footnoteTest=gt1.test)
nn <- c(nn, rep(NA, nrow(cs) - 1))
rows.per.var <- c(rows.per.var, nrow(cs))
} else {
cs <- formatCons(stats[[i]], nam, tr, x$group.freq, prmsd,
prtest=prtest, formatArgs=formatArgs, round=round,
lang=lang, testUsed=testUsed,
middle.bold=middle.bold,
outer.size=outer.size, msdsize=msdsize,
brmsd=brmsd,
pdig=pdig, eps=eps, footnoteTest=gt1.test,
prob=prob, prN=prN)
rows.per.var <- c(rows.per.var, 1)
}
cstats <- rbind(cstats, cs)
if(length(auxc) && nrow(cstats) > 1)
auxc <- c(auxc, rep(NA, nrow(cs) - 1))
}
lab <- dimnames(cstats)[[1]]
gl <- names(x$group.freq)
if(!length(gl)) gl <- " "
if(! html) {
lab <- latexTranslate(lab, c(" "), c("~"), greek=TRUE)
gl <- latexTranslate(gl, greek=TRUE)
}
extracolheads <-
if(any(gl != " "))
c(if(prn)'', math(paste0('N=', x$group.freq)))
else NULL
if(length(test) && !all(prtest == 'none')) {
gl <- c(gl,
if(length(prtest) == 1 && prtest != 'stat')
if(prtest == 'P') 'P-value'
else prtest
else 'Test Statistic')
if(length(extracolheads)) extracolheads <- c(extracolheads, '')
}
dimnames(cstats) <- list(NULL, gl)
cstats <- data.frame(cstats, check.names=FALSE, stringsAsFactors=FALSE)
if(length(gl) == 1 && gl == '') colnames(cstats) <- ' ' ## override V1
col.just <- rep("c", length(gl))
if(dcolumn && all(prtest != 'none') &&
gl[length(gl)] %in% c('P-value', 'Test Statistic'))
col.just[length(col.just)] <- '.'
if(prn) {
cstats <- data.frame(N=nn, cstats, check.names=FALSE,
stringsAsFactors=FALSE)
col.just <- c("r", col.just)
}
noib <- is.logical(insert.bottom) && ! insert.bottom
defs <- NULL
if(is.character(insert.bottom)) defs <- insert.bottom
else {
if(any(type == 2)) {
if(identical(prob, probdef))
defs <-
paste0(outer.size(math('a')), ' ', bold(math('b')), ' ',
outer.size(math('c')), ' represent the lower quartile ',
math('a'), ', the median ', math('b'),
', and the upper quartile ', math('c'),
' for continuous variables.')
else {
prob <- sprintf("%1.0f\\%%", 100 * prob)
defs <-
paste0(outer.size(math('a')), ' ', bold(math('b')), ' ',
outer.size(math('c')),
' represent the ', prob[1],
' quantile ', math('a'), ' the ', prob[2],
' quantile ', math('b'), ' and the ', prob[3],
' quantile ', math('c'), ' for continuous variables.')
}
if(prmsd) defs <-
paste0(defs, spc,
if(html) paste0(math(paste0('x', specs$space,
plminus, ' s')),
' represents ', specs$xbar,
specs$space, plminus, ' 1 SD.')
else
'$x\\pm s$ represents $\\bar{X}\\pm 1$ SD.')
}
if(prn)
defs <- c(defs, if(length(defs)) spc,
paste0(math('N'), ' is the number of non-missing values.'))
if(any(type == 1) && npct == 'numerator')
defs <- c(defs, 'Numbers after proportions are frequencies.')
if(length(testUsed)) {
if(html)
defs <- c(defs, br,
if(length(testUsed) == 1)'Test used:'
else 'Tests used:',
if(length(testUsed) == 1) paste(testUsed, 'test')
else paste(paste0(sup(1 : length(testUsed)),
testUsed,
' test'), collapse='; '), '.')
else
defs <-c(defs,
if(length(testUsed) == 1)'\\noindent Test used:'
else '\\indent Tests used:',
if(length(testUsed) == 1) paste(testUsed, 'test')
else paste(paste0('$^{', 1 : length(testUsed),'}$',
testUsed, ' test'), collapse='; '), '.')
}
}
legend <- NULL
if(! html) legend <- defs
else if(! noib)
insert.bottom <- paste(defs, collapse=' ')
if(length(auxc)) {
if(length(auxc) != nrow(cstats))
stop(paste0('length of auxCol (',length(auxCol[[1]]),
') is not equal to number or variables in table (',
nv,').'))
auxcc <- format(auxc)
auxcc[is.na(auxc)] <- ''
cstats <- cbind(auxcc, cstats)
nax <- names(auxCol)
heads <- get2rowHeads(nax)
names(cstats)[1] <- heads[[1]]
if(length(col.just)) col.just <- c('r', col.just)
if(length(extracolheads)) extracolheads <- c(heads[2], extracolheads)
}
if(length(legend) && (html || ! table.env))
legend[1] <- paste0('\n', legend[1])
laststrat <- strat == strats[length(strats)]
finalcaption <- NULL
finallegend <- NULL
if((! tabenv1 && table.env) || (tabenv1 && istr == 1)) {
finalcaption <- caption
if(((! tabenv1 && laststrat) || (tabenv1 && istr == 1)) &&
!legend.bottom) {
finalcaption <- paste(finalcaption, paste(legend, collapse=' '),
sep='. ')
}
}
if(! noib && laststrat && ! table.env) {
finallegend <- legend
} else if(legend.bottom) {
finallegend <- paste(legend, collapse=' ')
}
if(html) {
heads <- colnames(cstats)
if(length(extracolheads)) heads <- paste(heads, extracolheads, sep=br)
Cstats <- rbind(Cstats, cstats)
Lab <- c(Lab, lab)
n.tspanner <- c(n.tspanner, length(lab))
}
else {
w <- latex(cstats, title=title, file=file, append=TRUE,
caption=finalcaption, rowlabel=rowlabel,
table.env=(! tabenv1 && table.env) || (tabenv1 && istr == 1),
col.just=col.just, numeric.dollar=FALSE,
insert.bottom=finallegend, rowname=lab, dcolumn=dcolumn,
extracolheads=extracolheads, extracolsize=NNsize,
insert.top=if(strat != '.ALL.') strat,
...)
if(tabenv1 && istr == 1) cat('\\clearpage\n', file=file, append=TRUE)
else if(istr < length(strats))
cat('\\Needspace{2.7in}\n', file=file, append=TRUE)
## trieds to avoid page break in middle of stratum
attr(w, 'legend') <- legend
}
}
if(! html) {
attr(w, 'nstrata') <- istr
return(w)
}
cs <- c(paste0('width:', round(0.85*maxlablen), 'ex;'),
rep('padding: 0 7px 0 7px;', ncol(Cstats)))
## was rep('padding-left:3ex;'...
if(length(strats) > 1) {
tspanner <- ifelse(strats == '.ALL', bold('Overall'), strats)
w <- htmlTable::htmlTable(Cstats, header=heads,
caption = paste(finalcaption, finallegend),
rowlabel = rowlabel,
# n.rgroup = if(rowsep) rows.per.var,
align = col.just, rnames=Lab,
tspanner=tspanner, n.tspanner=n.tspanner,
tfoot=insert.bottom,
css.cell=cs, escape.html=FALSE)
}
else
w <- htmlTable::htmlTable(Cstats, header=heads,
caption = paste(finalcaption, finallegend),
rowlabel = rowlabel,
# n.rgroup = if(rowsep) rows.per.var,
align = col.just, rnames=lab,
tfoot = insert.bottom,
css.cell = cs, escape.html=FALSE)
rendHTML(w)
}
html.summaryM <-
function(object, ...) latex.summaryM(object, file='', html=TRUE, ...)
plotpsummaryM <-
function(x, vnames = c('labels', 'names'),
which = c('both', 'categorical', 'continuous'),
vars=NULL, xlim = c(0,1),
xlab = 'Proportion',
exclude1 = TRUE, main=NULL, ncols=2,
prtest = c('P', 'stat', 'df', 'name'), pdig = 3, eps = 0.001,
height=NULL, width=NULL)
{
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
obj <- x
vnames <- match.arg(vnames)
which <- match.arg(which)
html <- TRUE
ul <- vnames=='labels'
if(is.logical(prtest) && !prtest) prtest <- 'none'
stratnames <- names(x$results)
nstrat <- length(stratnames)
gcat <- vector('list', nstrat)
names(gcat) <- stratnames
## Create annotations to simulate strata titles for dot charts
# if(nstrat > 1) {
# annotations <- list()
# xx <- (1 : nstrat) / (nstrat + 1)
# for(i in 1 : nstrat)
# annotations[[i]] <- list(x=xx[i], y=1.05, text=stratnames[i],
# xref='paper', yref='paper', showarrow=FALSE)
# }
for(strat in stratnames) {
obj <- x$results[[strat]]
test <- obj$testresults
if(!length(test)) prtest <- 'none'
varNames <- names(obj$stats)
vn <- if(ul) obj$labels
else varNames
Units <- obj$units
nw <- if(lg <- length(obj$group.freq)) lg
else 1
gnames <- names(obj$group.freq)
ggl <- obj$group.label
if(! length(main)) main <-
if(strat != '.ALL.') strat
else if(nw == 1) ''
else paste('Proportions', 'Stratified by', obj$group.label)
lab <- vnd <- z <- Frac <- nmiss <- vnamd <- NULL
type <- obj$type; n <- obj$n
gcon <- NULL
iv <- which(type %in% c(1, 3))
if(length(vars)) iv <- iv[intersect(vars, 1 : length(iv))]
if(which != 'continuous' && length(iv)) {
ftstats <- NULL
for(i in iv) {
nam <- vn[i]
tab <- obj$stats[[i]]
if(nw == 1)
tab <- as.matrix(tab)
nr <- nrow(tab)
denom <- if(type[i] == 1) apply(tab, 2, sum)
else obj$group.freq
y <- sweep(tab, 2, denom, FUN='/')
frac <- sweep(tab, 2, denom,
FUN=markupSpecs$html$frac, size=95)
dim(frac) <- dim(y) ## paste loses these
dimnames(frac) <- dimnames(y)
lev <- dimnames(y)[[1]]
exc <- exclude1 && (nr == 2)
jstart <- if(exc) 2 else 1
rl <- casefold(lev)
binary <- type[i] == 1 && exc &&
(all(rl %in% c("0", "1")) | all(rl %in% c("false", "true"))|
all(rl %in% c("absent", "present")))
for(j in jstart : nrow(y)) {
if(nw==1) {
z <- rbind(z, y[j,])
Frac <- rbind(Frac, frac[j,])
}
else {
yj <- rep(NA, nw)
names(yj) <- gnames
yj[names(y[j,])] <- y[j,]
z <- rbind(z, yj)
fj <- rep('', nw)
names(fj) <- gnames
fj[names(frac[j,])] <- frac[j,]
Frac <- rbind(Frac, fj)
}
lab <- c(lab, if(binary) '' else lev[j])
vnd <- c(vnd, nam)
vnamd <- c(vnamd, varNames[i])
}
if(any(prtest != 'none')) {
fts <- formatTestStats(test[[varNames[i]]], type[i] == 3,
if(type[i] == 1) 1 else 1 : nr,
prtest = prtest,
lang = 'html',
pdig = pdig, eps=eps)
ftstats <- c(ftstats, fts,
if(type[i] == 1 && nr - exc - 1 > 0)
rep('', nr - exc - 1))
}
}
dimnames(z) <- dimnames(Frac) <- list(lab, dimnames(z)[[2]])
if(! any(prtest == 'none'))
Frac[, 1] <- paste0(Frac[, 1], ' ', ftstats)
gcat[[strat]] <-
dotchartp(z, groups=factor(vnd, levels=unique(vnd)),
xlab=xlab, xlim=xlim,
auxdata=Frac, auxwhere='hover',
dec=3,
height=if(length(height) && height == 'auto')
plotlyParm$heightDotchart(nrow(z)) else height,
width=width,
layoutattr=FALSE && nstrat > 1)
}
iv <- which(type == 2)
if(length(vars)) iv <- iv[intersect(vars, 1 : length(iv))]
if(which != 'categorical' && length(iv)) {
if(nstrat > 1) warning('only plots last stratum for continuous variables')
icon <- iv
ii <- 0
p <- list()
for(i in icon) {
ii <- ii + 1
nam <- markupSpecs$html$varlabel(vn[i], Units[i], size=73)
st <- obj$stats[[i]]
if(nw==1) st <- as.matrix(st)
N <- st[, 'N']
teststat <- if(all(prtest != 'none'))
formatTestStats(test[[varNames[i]]], prtest=prtest,
lang='html',
pdig=pdig, eps=eps)
p[[ii]] <- bppltp(stats=st, xlab=nam, teststat=teststat,
showlegend=FALSE)
}
nrows <- ceiling(length(p) / ncols)
gcon <- plotly::subplot(p, shareY=TRUE, nrows=nrows,
titleX=TRUE, margin=.1)
if(FALSE) {
if(! length(height)) height <- min(1000, 275 * nrows)
if(! length(width)) width <- min(900, 400 * ncols)
gcon <- plotly::layout(gcon, height=height, width=width)
## and note: height and width are now arguments to plot_ly
}
}
}
if(! is.null(gcat)) { # plotly objects have length 0
gcat <- if(nstrat == 1) gcat[[1]]
else
plotly::subplot(gcat, shareY=TRUE,
titleX=TRUE, nrows=1, margin=.1)
# else {
# lo <- attr(gcat[[1]], 'layout')
# gcat <- plotly::subplot(gcat, shareY=TRUE,
# titleX=TRUE, nrows=1, margin=.1)
# ann <- list()
# for(i in 1 : nstrat)
# ann[[i]] <- list(x= i / (nstrat + 1), y=1.05,
# text=stratnames[i], showarrow=FALSE,
# xref='paper', yref='paper')
# lo$xaxis1 <- lo$xaxis
# lo$xaxis1$title <- 'this is it'
# lo$axis2 <- lo$axis1
# lo <- c(lo, ann)
# do.call(plotly::layout, lo)
# }
}
if(! is.null(gcat) && ! is.null(gcon)) # plotly objects have length 0
list(Categorical = gcat,
Continuous = gcon)
else
if(! is.null(gcat)) gcat
else gcon
}
Hmisc/R/capitalize.s 0000644 0001762 0000144 00000000255 12752725127 014043 0 ustar ligges users capitalize <- function(string) {
capped <- grep("^[A-Z]", string, invert = TRUE)
substr(string[capped], 1, 1) <- toupper(substr(string[capped], 1, 1))
return(string)
} Hmisc/R/xYplot.s 0000644 0001762 0000144 00000056167 14400366251 013221 0 ustar ligges users Cbind <- function(...)
{ # See llist function with Hmisc label function
dotlist <- list(...)
if(is.matrix(dotlist[[1]]))
{
y <- dotlist[[1]]
ynam <- dimnames(y)[[2]]
if(!length(ynam))
stop('when first argument is a matrix it must have column dimnames')
other <- y[,-1,drop= FALSE]
return(structure(y[,1], class='Cbind', label=ynam[1], other=other))
}
lname <- names(dotlist)
name <- vname <- as.character(sys.call())[-1]
for(i in 1:length(dotlist))
{
vname[i] <- if(length(lname)) lname[i] else ''
## Added length() and '' 12Jun01, remove length(vname[i])==0 below
if(vname[i]=='') vname[i] <- name[i]
}
lab <- attr(y <- dotlist[[1]], 'label')
if(!length(lab)) lab <- vname[1]
if(!is.matrix(other <- dotlist[[2]]) || ncol(other)<2)
{
other <- as.matrix(as.data.frame(dotlist))[,-1,drop= FALSE]
dimnames(other)[[2]] <- vname[-1]
}
structure(y, class='Cbind', label=lab, other=other)
}
as.numeric.Cbind <- as.double.Cbind <- function(x, ...) x
## Keeps xyplot from stripping off "other" attribute in as.numeric
'[.Cbind' <- function(x, ...)
{
structure(unclass(x)[...], class='Cbind',
label=attr(x,'label'),
other=attr(x,'other')[...,,drop= FALSE])
}
prepanel.xYplot <- function(x, y, ...)
{
xlim <- range(x, na.rm=TRUE)
ylim <- range(y, attr(y,'other'), na.rm=TRUE)
list(xlim=xlim, ylim=ylim, dx=diff(xlim), dy=diff(ylim))
}
## MB add method="filled bands"
## MB use col.fill to specify colors for filling bands
panel.xYplot <-
function(x, y, subscripts, groups = NULL,
type = if(is.function(method) || method == "quantiles") "b"
else "p",
method = c("bars", "bands", "upper bars", "lower bars",
"alt bars", "quantiles", "filled bands"),
methodArgs = NULL, label.curves = TRUE, abline,
probs = c(0.5, 0.25, 0.75), nx=NULL, cap = 0.015, lty.bar = 1,
lwd = plot.line$lwd, lty = plot.line$lty,
pch = plot.symbol$pch, cex = plot.symbol$cex,
font = plot.symbol$font, col = NULL,
lwd.bands = NULL, lty.bands = NULL, col.bands = NULL,
minor.ticks = NULL, col.fill = NULL,
size=NULL, rangeCex=c(.5,3), ...)
{
sRequire('lattice')
if(missing(method) || !is.function(method))
method <- match.arg(method) # was just missing() 26Nov01
type <- type # evaluate type before method changes 9May01
sizeVaries <- length(size) && length(unique(size)) > 1
if(length(groups)) groups <- as.factor(groups)
g <- as.integer(groups)[subscripts]
ng <- if(length(groups)) max(g)
else 1
plot.symbol <- lattice::trellis.par.get(if(ng > 1) "superpose.symbol"
else "plot.symbol")
plot.line <- lattice::trellis.par.get(if(ng > 1) "superpose.line"
else "plot.line")
lty <- rep(lty, length = ng)
lwd <- rep(lwd, length = ng)
if(length(rangeCex) != 1) pch <- rep(pch, length = ng)
if(!sizeVaries) cex <- rep(cex, length = ng)
font <- rep(font, length = ng)
if(!length(col))
col <- if(type == "p") plot.symbol$col
else plot.line$col
col <- rep(col, length = ng)
pchVaries <- FALSE
## Thanks to Deepayan Sarkar for the following size code
if(sizeVaries)
{
if(length(rangeCex) > 1)
srng <- range(size, na.rm=TRUE)
size <- size[subscripts]
if(length(rangeCex)==1)
{
pch <- as.character(size)
cex <- rangeCex
sizeVaries <- FALSE
pchVaries <- TRUE
}
else
{
cex <- rangeCex[1] + diff(rangeCex)*(size - srng[1])/diff(srng)
sKey <- function(x=0, y=1, cexObserved, cexCurtailed, col, pch,
other)
{
if(!length(x))
x <- 0.05
if(!length(y))
y <- 0.95 ## because of formals()
## had to multiply cex by 1.4 when using rlegend instead of rlegendg
rlegendg(x, y, legend=format(cexObserved), cex=cexCurtailed,
col=col, pch=pch, other=other)
invisible()
}
formals(sKey) <- list(x=NULL, y=NULL, cexObserved=srng,
cexCurtailed=rangeCex,
col=col[1], pch=pch, other=NULL)
.setsKey(sKey)
}
}
other <- attr(y, "other")
if(length(other))
{
nother <- ncol(other)
if(nother == 1)
{
lower <- y - other
upper <- y + other
}
else
{
lower <- other[, 1]
upper <- other[, 2]
}
}
else nother <- 0
y <- unclass(y)
levnum <- if(length(groups)) sort(unique(g))
else 1
if(is.function(method) || method == "quantiles")
{
if(!is.function(method))
{
method <- quantile # above: methodArgs=NULL
if(!length(methodArgs))
methodArgs <- list(probs = probs)
}
if(length(methodArgs)) methodArgs$na.rm <- TRUE
else
methodArgs <- list(na.rm = TRUE)
if(ng == 1)
{
if(!length(nx)) nx <- min(length(x)/4, 40)
xg <-
if(nx)
as.numeric(as.character(cut2(x, m = nx,
levels.mean = TRUE)))
else x
dsum <- do.call("summarize",
c(list(y, llist(xg = xg), method, type = "matrix",
stat.name = "Z"), methodArgs))
}
else
{
xg <- x
if(missing(nx) || nx)
for(gg in levnum)
{
w <- g == gg
if(missing(nx)) nx <- min(sum(w)/4, 40)
xg[w] <-
as.numeric(as.character(cut2(xg[w], m = nx,
levels.mean = TRUE)))
}
dsum <- do.call("summarize",
c(list(y, by = llist(g, xg),
method, type = "matrix", stat.name = "Z"),
methodArgs))
g <- dsum$g
groups <- factor(g, 1:length(levels(groups)),
levels(groups))
subscripts <- TRUE
}
x <- dsum$xg
y <- dsum$Z[, 1, drop = TRUE]
other <- dsum$Z[, -1, drop=FALSE]
nother <- 2
method <- "bands"
}
## MB 04/17/01 default colors for filled bands
## 'pastel' colors matching superpose.line$col
plot.fill <- c(9, 10, 11, 12, 13, 15, 7)
## The following is a fix of panel.xyplot to work for type='b'
ppanel <- function(x, y, type, cex, pch, font, lwd, lty, col, ...)
{
gfun <- ordGridFun(TRUE)
if(type != 'p')
gfun$lines(x, y, lwd = lwd, lty = lty, col = col, ...)
if(type !='l')
gfun$points(x=x, y=y,
pch = pch, font = font,
cex = cex, col = col,
type = type, lwd=lwd, lty=lty, ...)
}
##The following is a fix for panel.superpose for type='b'
pspanel <- function(x, y, subscripts, groups, type, lwd, lty,
pch, cex, font, col, sizeVaries, pchVaries, ...)
{
gfun <- ordGridFun(TRUE)
groups <- as.numeric(groups)[subscripts]
N <- seq(along = groups)
for(i in sort(unique(groups)))
{
which <- N[groups == i] # j <- which[order(x[which])]
# sort in x
j <- which # no sorting
if(type != "p")
gfun$lines(x[j], y[j],
col = col[i], lwd = lwd[i], lty = lty[i],
...)
if(type !='l')
gfun$points(x[j], y[j],
col = col[i],
pch = pch[if(pchVaries)j
else i],
cex = cex[if(sizeVaries)j
else i],
font = font[i], lty=lty[i], lwd=lwd[i], ...)
}
}
## 14Apr2001 MB changes: set colors for method = "filled bands"
if(!length(col.fill)) col.fill <- plot.fill
col.fill <- rep(col.fill, length = ng)
## end MB
if(ng > 1) {
## MB 14Apr2001: if method == "filled bands"
## have to plot filled bands first, otherwise lines/symbols
## would be hidden by the filled band
if(method == "filled bands")
{
gfun <- ordGridFun(TRUE)
for(gg in levnum)
{
s <- g == gg
gfun$polygon(x=c(x[s],rev(x[s])),
y=c(lower[s], rev(upper[s])),
col=col.fill[gg], ...)
}
} ## end MB
pspanel(x, y, subscripts, groups, lwd = lwd, lty =
lty, pch = pch, cex = cex, font = font, col
= col, type = type, sizeVaries=sizeVaries, pchVaries=pchVaries)
if(type != "p" && !(is.logical(label.curves) && !
label.curves))
{
lc <- if(is.logical(label.curves)) list(lwd = lwd, cex = cex[1])
else c(list(lwd = lwd, cex = cex[1]), label.curves)
curves <- vector("list", length(levnum))
names(curves) <- levels(groups)[levnum]
i <- 0
for(gg in levnum)
{
i <- i + 1
s <- g == gg
curves[[i]] <- list(x[s], y[s])
}
labcurve(curves, lty = lty[levnum], lwd = lwd[levnum],
col. = col[levnum], opts = lc, grid=TRUE, ...)
}
}
## MB 14Apr2001: if method == "filled bands"
## plot filled bands first, otherwise lines/symbols
## would be hidden by the filled band
else
{
if(method == "filled bands")
grid.polygon(x = c(x, rev(x)), y = c(lower, rev(upper)),
gp=gpar(fill = col.fill, col='transparent'),
default.units='native')
## end MB
ppanel(x, y, lwd = lwd, lty = lty, pch = pch, cex = cex,
font = font, col = col, type = type)
}
## 14Apr2001 MB
## final change for filled bands: just skip the rest
## if method = filled bands, remaining columns of other are ignored
if(nother && method != "filled bands")
{
if(method == "bands")
{
dob <- function(a, def, ng, j)
{
if(!length(a)) return(def)
if(!is.list(a)) a <- list(a)
a <- rep(a, length = ng)
sapply(a, function(b, j)
b[j], j = j)
}
for(j in 1:ncol(other))
{
if(ng == 1)
ppanel(x, other[, j],
lwd = dob(lwd.bands, lwd, ng, j),
lty = dob(lty.bands, lty, ng, j),
col = dob(col.bands, col, ng, j),
pch = pch, cex = cex, font =
font, type = "l")
else pspanel(x, other[, j],
subscripts, groups,
lwd = dob(lwd.bands, lwd, ng, j),
lty = dob(lty.bands, lty, ng, j),
col = dob(col.bands, col, ng, j),
pch = pch, cex = cex, font =
font, type = "l",
sizeVaries=sizeVaries, pchVaries=pchVaries)
}
}
else
{
errbr <- function(x, y, lower, upper, cap,
lty, lwd, col, connect)
{
gfun <- ordGridFun(TRUE) ## see Misc.s
segmnts <- gfun$segments
gun <- gfun$unit
smidge <- 0.5 * cap * unit(1,'npc')
switch(connect,
all = {
segmnts(x, lower, x, upper,
lty = lty, lwd = lwd, col = col)
segmnts(gun(x)-smidge, lower,
gun(x)+smidge, lower,
lwd = lwd, lty = 1, col = col)
segmnts(gun(x)-smidge, upper,
gun(x)+smidge, upper,
lwd = lwd, lty = 1, col = col)
},
upper = {
segmnts(x, y, x, upper, lty = lty, lwd = lwd, col = col)
segmnts(gun(x)-smidge, upper,
gun(x)+smidge, upper,
lwd = lwd, lty = 1, col = col)
},
lower = {
segmnts(x, y, x, lower, lty = lty, lwd = lwd, col = col)
segmnts(gun(x)-smidge, lower,
gun(x)+smidge, lower,
lwd = lwd, lty = 1, col = col)
}
)
}
if(ng == 1)
errbr(x, y, lower, upper, cap,
lty.bar, lwd, col, switch(method,
bars = "all",
"upper bars" = "upper",
"lower bars" = "lower",
"alt bars" = "lower"))
else
{
if(method == "alt bars")
medy <- median(y, na.rm = TRUE)
for(gg in levnum)
{
s <- g == gg
connect <- switch(method,
bars = "all",
"upper bars" = "upper",
"lower bars" = "lower",
"alt bars" = if(median(y[s],
na.rm = TRUE) > medy) "upper"
else "lower")
errbr(x[s], y[s], lower = lower[s],
upper = upper[s], cap, lty.bar,
lwd[gg], col[gg], connect)
}
}
}
}
if(length(minor.ticks))
{
minor.at <-
if(is.list(minor.ticks)) minor.ticks$at
else
minor.ticks
minor.labs <-
if(is.list(minor.ticks) && length(minor.ticks$labels))
minor.ticks$labels
else
FALSE
gfun$axis(side = 1, at = minor.at, labels = FALSE,
tck = par("tck") * 0.5, outer = TRUE, cex = par("cex") *
0.5)
if(!is.logical(minor.labs))
gfun$axis(side = 1, at = minor.at, labels =
minor.labs, tck = 0, cex = par("cex") * 0.5, line = 1.25)
}
if(ng > 1)
{
##set up for key() if points plotted
Key1 <- function(x=0, y=1, lev, cex, col, font, pch, other)
{
## Even though par('usr') shows 0,1,0,1 after lattice draws
## its plot, it still needs resetting
if(!length(x)) x <- 0.05
if(!length(y)) y <- 0.95 ## because of formals()
rlegendg(x, y, legend=lev, cex=cex, col=col, pch=pch, other=other)
invisible()
}
formals(Key1) <- list(x=NULL,y=NULL,lev=levels(groups),
cex=if(sizeVaries) 1 else cex,
col=col, font=font, pch=pch, other=NULL)
.setKey(Key1)
rm(Key1)
}
if(!missing(abline)) {
pabl <- lattice::panel.abline
if(length(names(abline))) do.call(pabl, abline)
else for(i in 1:length(abline)) do.call(pabl, abline[[i]])
}
if(type == "l" && ng > 1)
{
## Set up for legend (key() or rlegendg()) if lines drawn
Key2 <- function(x=0, y=1, lev, cex, col, lty, lwd, other)
{
## Even though par('usr') shows 0,1,0,1 after lattice draws
## its plot, it still needs resetting
if(!length(x)) x <- 0.05
if(!length(y)) y <- 0.95 ## because of formals()
rlegendg(x, y, legend=lev, cex=cex, col=col, lty=lty, lwd=lwd,
other=other)
invisible()
}
formals(Key2) <- list(x=NULL,y=NULL,lev=levels(groups), col=col,
lty=lty, lwd=lwd, other=NULL)
.setKey(Key2)
rm(Key2)
}
}
xYplot <-
function (formula, data=sys.frame(sys.parent()),
groups, subset,
xlab=NULL, ylab=NULL, ylim=NULL,
panel=panel.xYplot, prepanel=prepanel.xYplot,
scales=NULL, minor.ticks=NULL, sub=NULL, ...)
{
sRequire('lattice')
yvname <- as.character(formula[2]) # tried deparse
y <- eval(parse(text=yvname), data)
if(!length(ylab)) ylab <- label(y, units=TRUE, plot=TRUE,
default=yvname, grid=TRUE)
if(!length(ylim))
{
yother <- attr(y,'other')
if(length(yother)) ylim <- range(y, yother, na.rm=TRUE)
}
xvname <- formula[[3]]
if(length(xvname)>1 && as.character(xvname[[1]])=='|')
xvname <- xvname[[2]] # ignore conditioning var
xv <- eval(xvname, data)
if(!length(xlab))
xlab <- label(xv, units=TRUE, plot=TRUE,
default=as.character(xvname)[1],
grid=TRUE)
if(!length(scales$x))
{
if(length(maj <- attr(xv,'scales.major')))
scales$x <- maj
}
if(!length(minor.ticks))
{
if(length(minor <- attr(xv,'scales.minor')))
minor.ticks <- minor
}
if(!missing(groups)) groups <- eval(substitute(groups),data)
if(!missing(subset)) subset <- eval(substitute(subset),data)
## Note: c(list(something), NULL) = list(something)
## The following was c(list(formula=formula,...,panel=panel),if()c(),...)
lxyp <- lattice::xyplot
do.call(lxyp,
c(list(x = formula, data=data, prepanel=prepanel,
panel=panel),
if(length(ylab))list(ylab=ylab),
if(length(ylim))list(ylim=ylim),
if(length(xlab))list(xlab=xlab),
if(length(scales))list(scales=scales),
if(length(minor.ticks))list(minor.ticks=minor.ticks),
if(!missing(groups))list(groups=groups),
if(!missing(subset))list(subset=subset),
if(!missing(sub)) list(sub=sub),
list(...)))
}
prepanel.Dotplot <- function(x, y, ...)
{
xlim <- range(x, attr(x,'other'), na.rm=TRUE)
ylim <- range(as.numeric(y), na.rm=TRUE) ## as.numeric 25nov02
list(xlim=xlim, ylim=ylim) #, dx=diff(xlim), dy=diff(ylim))
}
panel.Dotplot <- function(x, y, groups = NULL,
pch = dot.symbol$pch,
col = dot.symbol$col, cex = dot.symbol$cex,
font = dot.symbol$font, abline, ...)
{
sRequire('lattice')
gfun <- ordGridFun(TRUE) ## see Misc.s
segmnts <- gfun$segments
pabl <- lattice::panel.abline
y <- as.numeric(y)
gp <- length(groups)
dot.symbol <- lattice::trellis.par.get(if(gp)'superpose.symbol'
else 'dot.symbol')
dot.line <- lattice::trellis.par.get('dot.line')
plot.line <- lattice::trellis.par.get(if(gp)'superpose.line'
else 'plot.line')
gfun$abline(h = unique(y), lwd=dot.line$lwd, lty=dot.line$lty,
col=dot.line$col)
if(!missing(abline))
{
if(length(names(abline))) do.call(pabl, abline)
else for(i in 1:length(abline)) do.call(pabl, abline[[i]])
}
other <- attr(x,'other')
x <- unclass(x)
attr(x,'other') <- NULL
if(length(other))
{
nc <- ncol(other)
segmnts(other[,1], y, other[,nc], y, lwd=plot.line$lwd[1],
lty=plot.line$lty[1], col=plot.line$col[1])
if(nc==4)
{
segmnts(other[,2], y, other[,3], y, lwd=2*plot.line$lwd[1],
lty=plot.line$lty[1], col=plot.line$col[1])
gfun$points(other[,2], y, pch=3, cex=cex, col=col, font=font)
gfun$points(other[,3], y, pch=3, cex=cex, col=col, font=font)
}
if(gp) lattice::panel.superpose(x, y, groups=as.numeric(groups), pch=pch,
col=col, cex=cex, font=font, ...)
else
gfun$points(x, y, pch=pch[1], cex=cex, col=col, font=font)
}
else
{
if(gp)
lattice::panel.superpose(x, y, groups=as.numeric(groups),
pch=pch, col=col, cex=cex,
font=font, ...)
else
lattice::panel.dotplot(x, y, pch=pch, col=col, cex=cex, font=font, ...)
}
if(gp)
{
Key <- function(x=0, y=1, lev, cex, col, font, pch, other)
{
if(!length(x)) x <- 0.05
if(!length(y)) y <- 0.95 ## because of formals()
rlegendg(x, y, legend=lev, cex=cex, col=col, pch=pch, other=other)
invisible()
}
lev <- levels(as.factor(groups))
ng <- length(lev)
formals(Key) <- list(x=NULL,y=NULL,lev=lev,
cex=cex[1:ng], col=col[1:ng],
font=font[1:ng], pch=pch[1:ng], other=NULL)
.setKey(Key)
}
}
Dotplot <-
function (formula, data=sys.frame(sys.parent()),
groups, subset,
xlab=NULL, ylab=NULL, ylim=NULL,
panel=panel.Dotplot, prepanel=prepanel.Dotplot,
scales=NULL, xscale=NULL, ...)
{
sRequire('lattice')
yvname <- as.character(formula[2]) # tried deparse
yv <- eval(parse(text=yvname), data)
if(!length(ylab))
ylab <- label(yv, units=TRUE, plot=TRUE,
default=yvname, grid=TRUE)
if(!length(ylim))
{
yother <- attr(yv,'other')
if(length(yother)) ylim <- range(yv, yother, na.rm=TRUE)
}
if(is.character(yv)) yv <- factor(yv)
if(!length(scales) && is.factor(yv))
scales <- list(y=list(at=1:length(levels(yv)),labels=levels(yv)))
if(length(xscale)) scales$x <- xscale
xvname <- formula[[3]]
if(length(xvname)>1 && as.character(xvname[[1]])=='|')
xvname <- xvname[[2]] # ignore conditioning var
xv <- eval(xvname, data)
if(!length(xlab)) xlab <- label(xv, units=TRUE, plot=TRUE,
default=as.character(xvname)[1], grid=TRUE)
if(!missing(groups)) groups <- eval(substitute(groups),data)
if(!missing(subset)) subset <- eval(substitute(subset),data)
dul <- options('drop.unused.levels')
options(drop.unused.levels=FALSE) ## for empty cells
on.exit(options(dul)) ## across some panels
lxyp <- lattice::xyplot
do.call(lxyp,
c(list(x = formula, data=data, prepanel=prepanel,
panel=panel),
if(length(ylab))list(ylab=ylab),
if(length(ylim))list(ylim=ylim),
if(length(xlab))list(xlab=xlab),
if(!missing(groups))list(groups=groups),
if(!missing(subset))list(subset=subset),
if(length(scales))list(scales=scales),
list(...)))
}
setTrellis <- function(strip.blank=TRUE, lty.dot.line=2,
lwd.dot.line=1)
{
sRequire('lattice')
if(strip.blank) trellis.strip.blank() # in Hmisc Misc.s
dot.line <- lattice::trellis.par.get('dot.line')
dot.line$lwd <- lwd.dot.line
dot.line$lty <- lty.dot.line
lattice::trellis.par.set('dot.line',dot.line)
invisible()
}
numericScale <- function(x, label=NULL, ...)
{
xn <- as.numeric(x)
attr(xn,'label') <- if(length(label)) label
else
deparse(substitute(x))
xn
}
## See proc.scale.trellis, render.trellis, axis.trellis for details of
## how scale is used
Hmisc/R/solvet.s 0000644 0001762 0000144 00000000632 12243661443 013224 0 ustar ligges users #FEH version of solve with argument tol passed to qr
#8 Apr 91
solvet <- function(a, b, tol=1e-9)
{
if(!is.list(a))
a <- qr(a, tol=tol)
if(a$rank < ncol(a$qr))
stop("apparently singular matrix")
if(missing(b)) {
b <- a$qr
db <- dim(b)
if(diff(db))
stop("matrix inverse only for square matrices")
b[] <- rep(c(1, rep(0, db[1])), length = prod(db))
}
qr.coef(a, b)
}
Hmisc/R/runParallel.r 0000644 0001762 0000144 00000013702 14606745411 014175 0 ustar ligges users ##' parallel Package Easy Front-End
##'
##' Given a function `onecore` that runs the needed set of simulations on
##' one CPU core, and given a total number of repetitions `reps`, determines
##' the number of available cores and by default uses one less than that.
##' By default the number of cores is one less than the number available
##' on your machine.
##' reps is divided as evenly as possible over these cores, and batches
##' are run on the cores using the `parallel` package `mclapply` function.
##' The current per-core repetition number is continually updated in
##' your system's temporary directory (/tmp for Linux and Mac, TEMP for Windows)
##' in a file name progressX.log where X is the core number.
##' The random number seed is set for each core and is equal to
##' the scalar `seed` - core number + 1. The default seed is a random
##' number between 0 and 10000 but it's best if the user provides the
##' seed so the simulation is reproducible.
##' The total run time is computed and printed
##' onefile must create a named list of all the results created during
##' that one simulation batch. Elements of this list must be data frames,
##' vectors, matrices, or arrays. Upon completion of all batches,
##' all the results are rbind'd and saved in a single list.
##'
##' onecore must have an argument `reps` that will tell the function
##' how many simulations to run for one batch, another argument `showprogress`
##' which is a function to be called inside onecore to write to the
##' progress file for the current core and repetition, and an argument `core`
##' which informs `onecore` which sequential core number (batch number) it is
##' processing.
##' When calling `showprogress` inside `onecore`, the arguments, in order,
##' must be the integer value of the repetition to be noted, the number of reps,
##' `core`, an optional 4th argument `other` that can contain a single
##' character string to add to the output, and an optional 5th argument `pr`.
##' You can set `pr=FALSE` to suppress printing and have `showprogress`
##' return the file name for holding progress information if you want to
##' customize printing.
##'
##' If any of the objects appearing as list elements produced by onecore
##' are multi-dimensional arrays, you must specify an integer value for
##' `along`. This specifies to the `abind` package `abind` function
##' the dimension along which to bind the arrays. For example, if the
##' first dimension of the array corresponding to repetitions, you would
##' specify along=1. All arrays present must use the same `along` unless
##' `along` is a named vector and the names match elements of the
##' simulation result object.
##' Set `simplify=FALSE` if you don't want the result simplified if
##' onecore produces only one list element. The default returns the
##' first (and only) list element rather than the list if there is only one
##' element.
##'
##' See [here](https://hbiostat.org/rflow/parallel.html) for examples.
##'
##' @title runParallel
##' @param onecore function to run the analysis on one core
##' @param reps total number of repetitions
##' @param seed species the base random number seed. The seed used for core i will be `seed` + `i`.
##' @param cores number of cores to use, defaulting to one less than the number available
##' @param simplify set to FALSE to not create an outer list if a `onecore` result has only one element
##' @param along see Details
##' @return result from combining all the parallel runs, formatting as similar to the result produced from one run as possible
##' @author Frank Harrell
##' @md
runParallel <- function(onecore, reps, seed=round(runif(1, 0, 10000)),
cores=max(1, parallel::detectCores() - 1),
simplify=TRUE, along) {
if(! requireNamespace('parallel', quietly=TRUE))
stop('requires parallel package')
progressDir <- if(Sys.info()['sysname'] == 'Darwin') '/tmp' else paste0(dirname(tempdir()))
stime <- Sys.time()
## Function to divide n things as evenly as possible into m groups
## See https://math.stackexchange.com/questions/199690
evenly <- function(n, m) {
a <- floor(n / m)
r <- n %% m
w <- c(rep(a + 1, r), rep(a, m - r))
if(sum(w) != n) stop('program logic error')
w
}
repsc <- evenly(reps, cores)
showprogress <- function(i=0, reps=0, core, other='', pr=TRUE) {
file <- paste0(progressDir, '/progress', core, '.log')
if(other != '') other <- paste0(other, ' ')
if(pr) cat(other, i, ' of ', reps, '\n', sep='', file=file)
invisible(file)
}
ff <- function(i) {
set.seed(seed + i - 1)
onecore(reps=repsc[i], showprogress=showprogress, core=i)
}
v <- parallel::mclapply(1 : cores, ff, mc.cores=cores, mc.set.seed=FALSE)
v1 <- v[[1]]
ite <- sapply(v, function(z) inherits(z, 'try-error'))
if(any(ite)) {
z <- sapply(v, function(x) {
x <- as.character(attr(x, 'condition'))
if(length(x)) x else '' })
stop(paste(z, collapse=';'))
}
etime <- Sys.time()
cat('\nRun time:', format(etime - stime), 'using', cores, 'cores\n')
## Separately for each element of each list in w, stack the results so
## the use can treat them as if from a single run
m <- length(v1) # number of elements in a per-core list
R <- vector('list', m)
names(R) <- names(v1)
u <- function(j) {
x <- lapply(v, function(x) x[[j]])
z <- x[[1]]
if(is.matrix(z)) x <- do.call('rbind', x)
else
if(is.list(z)) x <- data.table::rbindlist(x)
else if(is.array(z)) {
if(! requireNamespace('abind', quietly=TRUE))
stop('must install the abind package to handle arrays')
al <- if(length(along) == 1) along else along[names(v1)[j]]
x <- do.call(abind::abind, list(x, along=al))
}
else if(! is.atomic(z))
stop(paste('list element', j,
' of result returned by onecore is not data.frame, matrix, array, or vector'))
else x <- unlist(x) # vectors
x
}
for(j in 1: m) R[[j]] <- u(j)
if(simplify && m == 1) R[[1]] else R
}
Hmisc/R/mApply.s 0000644 0001762 0000144 00000003766 12243661443 013165 0 ustar ligges users mApply <- function(X, INDEX, FUN, ..., simplify=TRUE, keepmatrix=FALSE) {
## Matrix tapply
## X: matrix with n rows; INDEX: vector or list of vectors of length n
## FUN: function to operate on submatrices of x by INDEX
## ...: arguments to FUN; simplify: see sapply
## Modification of code by Tony Plate 10Oct02
## If FUN returns more than one number, mApply returns a matrix with
## rows corresponding to unique values of INDEX
## X should be either a Matrix or a Vector
if((!is.matrix(X) && is.array(X)) || is.list(X)){
if(is.data.frame(X))
X <- as.matrix(X)
else
stop("X must either be a vector or a matrix")
}
km <- if(keepmatrix) function(x)x else function(x)drop(x)
if(!is.matrix(X)) { ## X is a vector
r <- tapply(X, INDEX, FUN, ..., simplify=simplify)
if(is.matrix(r))
r <- km(t(r))
else if(simplify && is.list(r))
r <- km(matrix(unlist(r), nrow=length(r),
dimnames=list(names(r),names(r[[1]])), byrow=TRUE))
}
else {
idx.list <- tapply(1:NROW(X), INDEX, c)
r <- sapply(idx.list, function(idx,x,fun,...) fun(x[idx,,drop=FALSE],...),
x=X, fun=FUN, ..., simplify=simplify)
if(simplify)
r <- km(t(r))
}
dn <- dimnames(r)
lengthdn <- length(dn)
if(lengthdn && !length(dn[[lengthdn]])) {
fx <- FUN(X,...)
dnl <- if(length(names(fx))) names(fx)
else dimnames(fx)[[2]]
dn[[lengthdn]] <- dnl
dimnames(r) <- dn
}
if(simplify && is.list(r) && is.array(r)) {
ll <- sapply(r, length)
maxl <- max(ll)
empty <- (1:length(ll))[ll==0]
for(i in empty)
r[[i]] <- rep(NA, maxl)
## unlist not keep place for NULL entries for nonexistent categories
first.not.empty <- ((1:length(ll))[ll > 0])[1]
nam <- names(r[[first.not.empty]])
dr <- dim(r)
r <- aperm(array(unlist(r), dim=c(maxl,dr),
dimnames=c(list(nam),dimnames(r))),
c(1+seq(length(dr)), 1))
}
r
}
Hmisc/R/gbayes.s 0000644 0001762 0000144 00000013654 13006204222 013154 0 ustar ligges users gbayes <- function(mean.prior, var.prior, m1, m2, stat, var.stat,
n1, n2, cut.prior, cut.prob.prior=.025) {
if(!missing(cut.prior))
var.prior <- ((cut.prior - mean.prior)/qnorm(1 - cut.prob.prior))^2
if(!is.function(var.stat)) {
vs <- var.stat
if(!missing(n1))
stop('may not specify n1,n2 when var.stat is not a function')
} else
vs <- var.stat(m1,m2)
var.post <- 1/(1/var.prior + 1/vs)
mean.post <- (mean.prior/var.prior + stat/vs)*var.post
result <- list(mean.prior=mean.prior, var.prior=var.prior,
mean.post=mean.post, var.post=var.post)
if(!missing(n1)) {
mean.pred <- mean.post
var.pred <- var.post + var.stat(n1,n2)
result$mean.pred <- mean.pred
result$var.pred <- var.pred
}
structure(result, class='gbayes')
}
plot.gbayes <- function(x, xlim, ylim, name.stat='z', ...) {
obj <- x
pred <- length(obj$mean.pred)>0
if(missing(xlim))
xlim <- obj$mean.post + c(-6,6)*sqrt(obj$var.post)
x <- seq(xlim[1], xlim[2], length=200)
y1 <- dnorm(x,obj$mean.prior,sqrt(obj$var.prior))
y2 <- dnorm(x,obj$mean.post, sqrt(obj$var.post))
plot(x, y1, xlab=name.stat, ylab='Density',type='l',lty=1,
ylim=if(missing(ylim))
range(c(y1,y2))
else
ylim)
curves <- vector('list',2+pred)
names(curves) <- c('Prior','Posterior',
if(pred)'
Predictive')
curves[[1]] <- list(x=x,y=y1)
lines(x, y2, lty=2)
curves[[2]] <- list(x=x,y=y2)
if(pred) {
y <- dnorm(x,obj$mean.pred,sqrt(obj$var.pred))
lines(x, y, lty=3)
curves[[3]] <- list(x=x,y=y)
}
labcurve(curves, ...)
invisible()
}
gbayes2 <- function(sd, prior, delta.w=0, alpha=0.05,
upper=Inf, prior.aux=NULL) {
if(!is.function(prior))
stop('prior must be a function')
z <- qnorm(1-alpha/2)
prod <- function(delta, prior, delta.w, sd, z, prior.aux) {
(1 - pnorm((delta.w - delta)/sd + z)) *
if(length(prior.aux))
prior(delta, prior.aux)
else
prior(delta)
}
ww <- 'value'
ip <- if(length(prior.aux))
integrate(prior, -Inf, upper, prior.aux=prior.aux)[[ww]]
else
integrate(prior, -Inf, upper)[[ww]]
if(abs(ip - 1) > .01)
warning(paste('integrate failed to obtain 1.0 for integral of prior.\nDivided posterior probability by the integral it did obtain (',
format(ip),').\nTry specifying upper=.',sep=''))
integrate(prod, delta.w, upper,
prior=prior, delta.w=delta.w, sd=sd, z=z,
prior.aux=prior.aux)[[ww]]
}
## v = variance of Xn after future obs.
gbayesMixPredNoData <- function(mix=NA, d0=NA, v0=NA, d1=NA, v1=NA,
what=c('density','cdf')) {
what <- match.arg(what)
g <- function(delta, v, mix, d0, v0, d1, v1, dist) {
if(mix==1) {
pv <- 1/(1 / v0 + 1 / v)
dist(delta, d0, sqrt(pv))
} else if(mix == 0) {
pv <- 1/(1 / v1 + 1 / v)
dist(delta, d1, sqrt(pv))
} else {
pv0 <- 1/(1 / v0 + 1 / v)
pv1 <- 1/(1 / v1 + 1 / v)
mix * dist(delta, d0, sqrt(pv0)) +
(1-mix) * dist(delta, d1, sqrt(pv1))
}
}
formals(g) <- list(delta=numeric(0), v=NA, mix=mix, d0=d0, v0=v0,
d1=d1, v1=v1, dist=NA)
g
}
gbayesMixPost <- function(x=NA, v=NA, mix=1, d0=NA, v0=NA, d1=NA,
v1=NA, what=c('density','cdf','postmean')) {
what <- match.arg(what)
g <- function(delta, x, v, mix=1,
d0, v0, d1, v1, dist) {
if(mix == 1) {
pv <- 1 / (1 / v0 + 1 / v)
if(what == 'postmean') (d0 / v0 + x / v) * pv
else
dist(delta, (d0 / v0 + x / v) * pv, sqrt(pv))
}
else
if(mix == 0) {
pv <- 1 / (1 / v1 + 1 / v)
if(what == 'postmean') (d1 / v1 + x / v) * pv
else
dist(delta, (d1 / v1 + x / v) * pv, sqrt(pv))
} else {
prior.odds <- mix / (1 - mix)
pv0 <- 1 / (1 / v0 + 1 / v)
pv1 <- 1 / (1 / v1 + 1 / v)
# Until 2016-10-17 had omitted v+ in two sqrt below
likelihood.ratio <- dnorm(x, d0, sqrt(v + v0)) /
dnorm(x, d1, sqrt(v + v1))
post.odds <- prior.odds * likelihood.ratio
mixp <- post.odds / (1 + post.odds)
if(what == 'postmean')
mixp * (d0 / v0 + x / v) * pv0 +
(1-mixp) * (d1 / v1 + x / v) * pv1
else
mixp * dist(delta, (d0 / v0 + x / v) * pv0, sqrt(pv0)) +
(1-mixp)* dist(delta, (d1 / v1 + x / v) * pv1, sqrt(pv1))
}
}
formals(g) <- list(delta=numeric(0), x=x, v=v, mix=mix,
d0=d0, v0=v0,
d1=d1, v1=v1,
dist=switch(what,
density = dnorm,
cdf = pnorm,
postmean= NULL))
g
}
gbayesMixPowerNP <- function(pcdf, delta, v, delta.w=0, mix, interval,
nsim=0, alpha=0.05) {
if(nsim==0) {
## Solve for statistic x such that the posterior cdf at
## (delta.w,x)=alpha/2
g <- function(x, delta.w, v, alpha, pcdf, mix) {
pcdf(delta.w, x, v, mix) - alpha/2
}
formals(g) <- list(x=numeric(0), delta.w=delta.w, v=v,
alpha=alpha, pcdf=pcdf,
mix=if(missing(mix)) as.list(pcdf)$mix else mix)
x <- uniroot(g, interval=interval)$root
c('Critical value'=x, Power=1 - pnorm(x, delta, sqrt(v)))
} else {
x <- rnorm(nsim, delta, sqrt(v))
probs <-
if(missing(mix))
pcdf(delta.w, x, v)
else
pcdf(delta.w, x, v, mix=mix)
pow <- mean(probs <= alpha/2)
se <- sqrt(pow*(1-pow)/nsim)
c(Power=pow, 'Lower 0.95'=pow-1.96*se, 'Upper 0.95'=pow+1.96*se)
}
}
gbayes1PowerNP <- function(d0, v0, delta, v, delta.w=0, alpha=0.05) {
pv <- 1/(1/v0 + 1/v)
z <- qnorm(alpha/2)
1 - pnorm(v*( (delta.w - sqrt(pv)*z)/pv - d0/v0 ), delta, sqrt(v))
}
Hmisc/R/cpower.s 0000644 0001762 0000144 00000006447 12243661443 013221 0 ustar ligges users ## tref time at which mortalities estimated
## n total sample size
## mc tref-year mortality, control
## r % reduction in m1c by intervention
## accrual duration of accrual period
## tmin minimum follow-up time
## noncomp.c % non-compliant in control group (drop-ins)
## noncomp.i % non-compliant in intervention group (non-adherers)
## alpha type I error
## nc Sample size for control (if not n/2)
## ni Sample size for intervention (if not n/2)
## pr set to T to print intermediate results
##
## non-compliance handled by an approximation of Eq. 5.4 of
## Lachin JM, Foulkes MA (1986): Evaluation of sample size and power for
## analyses of survival with allowance for nonuniform patient entry,
## losses to follow-up, noncompliance, and stratification.
## Here we're using log hazard ratio instead of their hazard difference
cpower <- function(tref,
n,
mc,
r,
accrual,
tmin,
noncomp.c=0,
noncomp.i=0,
alpha=.05,
nc, ni,
pr=TRUE)
{
if(mc>1)
stop("mc should be a fraction")
## Find mortality in intervention group
mi <- (1-r/100)*mc
if(missing(nc) | missing(ni)) {
nc <- n/2; ni <- n/2
} else n <- nc+ni
if(pr) {
cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n")
cat("\nTotal sample size:",n,"\n")
cat("\nAlpha=",alpha,"\n")
d <- c("Control","Intervention")
m <- c(mc,mi)
names(m) <- d
cat("\n",tref,"-year Mortalities\n",sep=""); print(m)
}
## Find exponential hazards for all groups
lamc <- -logb(1-mc)/tref
lami <- -logb(1-mi)/tref
if(pr) {
lam <- c(lamc,lami)
names(lam) <- d
cat("\nHazard Rates\n");
print(lam)
}
## Find probability that a subject will have her event observed during
## the study, for all groups
tmax <- tmin+accrual
pc <- if(accrual==0)
1-exp(-lamc*tmin)
else
1-1/accrual/lamc*(exp(-tmin*lamc)-exp(-tmax*lamc))
pi <- if(accrual==0)
1-exp(-lami*tmin)
else
1-1/accrual/lami*(exp(-tmin*lami)-exp(-tmax*lami))
if(pr) {
p <- c(pc,pi)
names(p) <- d
cat("\nProbabilities of an Event During Study\n")
print(p)
}
## Find expected number of events, all groups
mc <- pc*nc
mi <- pi*ni
if(pr) {
m <- c(mc,mi)
names(m) <- d
cat("\nExpected Number of Events\n")
print(round(m,1))
}
## Find expected value of observed log hazard ratio
delta <- logb(lami/lamc)
if(pr)
cat("\nHazard ratio:",format(exp(delta)),"\n")
if(noncomp.c+noncomp.i>0) {
if(pr)
cat("\nDrop-in rate (controls):",noncomp.c,
"%\nNon-adherence rate (intervention):",noncomp.i,"%\n",sep="")
delta <- delta * (1 - (noncomp.c+noncomp.i)/100)
if(pr)
cat("Effective hazard ratio with non-compliance:",
format(exp(delta)),"\n")
}
## Find its variance
v <- 1/mc + 1/mi
## Get same as /sasmacro/samsizc.sas if use 4/(mc+mi)
sd <- sqrt(v)
if(pr)
cat("Standard deviation of log hazard ratio:",format(sd),"\n")
z <- -qnorm(alpha/2)
c(Power = 1 - (pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd)))
}
Hmisc/R/nobsY.s 0000644 0001762 0000144 00000010575 14400242025 012775 0 ustar ligges users nobsY <- function(formula, group=NULL,
data=NULL, subset=NULL, na.action=na.retain,
matrixna=c('all', 'any')) {
matrixna <- match.arg(matrixna)
forig <- formula
formula <- Formula::Formula(formula)
environment(formula) <- new.env(parent = environment(formula))
en <- environment(formula)
assign(envir = en, 'id', function(x) x)
assign(envir = en, 'pending', function(x) x)
assign(envir = en, 'randomized', function(x) x)
assign(envir = en, 'cond',
function(x, label, condition) rep(1, length(condition)))
marg <- length(data) && '.marginal.' %in% names(data)
if(marg) formula <- update(formula, .~. + .marginal.)
mf <- if(length(subset))
model.frame(formula, data=data, subset=subset, na.action=na.action)
else
model.frame(formula, data=data, na.action=na.action)
Y <- Formula::model.part(formula, data=mf, lhs=1)
X <- Formula::model.part(formula, data=mf, rhs=1)
## Get id variable if present so can count unique subjects
rhs <- terms(formula, rhs=1, specials='id')
sr <- attr(rhs, 'specials')
## specials counts from lhs variables
wid <- sr$id
if(length(wid)) {
xid <- X[[wid - ncol(Y)]]
if(length(wid) > 1) {
xid$sep <- '.'
xid <- do.call('paste', xid)
}
## Remove id() from formula
forig <- as.character(forig)
if(ncol(X) == 1) ## id() is the only right-hand term
forig <- as.formula(paste(forig[2], ' ~ 1'))
else {
forig[3] <- sub(' \\+ id(.*)', '', forig[3])
forig <- as.formula(paste(forig[2], forig[3], sep=' ~ '))
}
} else xid <- 1 : nrow(Y)
idv <- xid
group <- if(length(group) && group %in% names(X)) X[[group]]
if(marg) {
xm <- X$.marginal.
if(length(group)) group <- group[xm == '']
Y <- Y [xm == '',, drop=FALSE]
xid <- xid[xm == '']
}
nY <- ncol(Y)
nobs <- rep(NA, nY)
ylab <- sapply(Y, label)
ylab <- ifelse(ylab == '', names(Y), ylab)
names(nobs) <- ylab
nobsg <- if(length(group)) {
glev <- if(is.factor(group)) levels(group)
else sort(unique(group[! is.na(group)]))
matrix(NA, ncol=nY, nrow=length(glev), dimnames=list(glev, ylab))
}
if(nY > 0) for(i in 1 : nY) {
y <- Y[[i]]
## is.na.Surv reduces to vector but need to keep as matrix
notna <- if(is.matrix(y)) {
numna <- rowSums(is.na(unclass(y)))
if(matrixna == 'any') numna == 0 else numna < ncol(y)
} else ! is.na(y)
nobs[i] <- length(unique(xid[notna]))
if(length(group))
nobsg[, i] <- tapply(xid[notna], group[notna],
function(x) length(unique(x)))
}
structure(list(nobs=nobs, nobsg=nobsg, id=idv, formula=forig))
}
addMarginal <- function(data, ..., label='All',
margloc=c('last', 'first'), nested) {
nested <- as.character(substitute(nested))
if(length(nested) && nested == '') nested <- NULL
vars <- as.character(sys.call())[- (1 : 2)]
vars <- intersect(vars, names(data))
data$.marginal. <- ''
margloc <- match.arg(margloc)
if(length(nested) && (nested %nin% names(data)))
stop(paste('Variable', nested, 'is not in data'))
labs <- sapply(data, function(x) {
la <- attr(x, 'label')
if(! length(la)) la <- ''
la
} )
un <- sapply(data, function(x) {
u <- attr(x, 'units')
if(! length(u)) u <- ''
u
} )
levs <- vector('list', length(vars))
names(levs) <- vars
for(v in setdiff(vars, nested)) {
d <- data
d$.marginal. <- ifelse(d$.marginal. == '', v,
paste(d$.marginal., v, sep=','))
levs[[v]] <- levels(as.factor(d[[v]]))
levs[[v]] <- if(margloc == 'last') c(levs[[v]], label)
else c(label, levs[[v]])
d[[v]] <- label
if(length(nested)) {
levs[[nested]] <- levels(as.factor(d[[nested]]))
levs[[nested]] <- if(margloc == 'last') c(levs[[nested]], label)
else c(label, levs[[nested]])
d[[nested]] <- label
}
data <- if(margloc == 'last') rbind(data, d) else rbind(d, data)
}
for(v in vars) data[[v]] <- factor(data[[v]], levs[[v]])
## Restore any Hmisc attributes
if(any(labs != '') || any(un != ''))
for(i in 1 : length(data)) {
if(labs[i] != '') {
attr(data[[i]], 'label') <- labs[i]
class(data[[i]]) <- c('labelled', class(data[[i]]))
}
if(un[i] != '') attr(data[[i]], 'units') <- un[i]
}
data
}
Hmisc/R/src.s 0000644 0001762 0000144 00000000742 12243661443 012501 0 ustar ligges users ##Function to source(x) if x is given, or source(last x given) otherwise
##Last x is stored in options() last.source. x is unquoted with .s omitted.
##Author: Frank Harrell 19May91
src <- function(x) {
if(!missing(x)) {
y <- paste(as.character(substitute(x)),".s",sep="")
options(last.source=y, TEMPORARY=FALSE)
}
else y <- options()$last.source
if(is.null(y))
stop("src not called with file name earlier")
source(y)
cat(y, "loaded\n")
invisible()
}
Hmisc/R/GiniMd.s 0000644 0001762 0000144 00000001232 14514516434 013056 0 ustar ligges users ## David HA (1968): Gini's mean difference rediscovered. Biometrika 55 No. 3
## p. 573-575
## For binary x, is 2*n*p*(1-p)/(n-1) = approx 2*p*(1-p)
## For trinomial with value A (frequency a) B (freq b) C (freq c):
## 2*(ab|A-B| + ac|A-C|+bc|B-C|)/[n(n-1)]
## = 2n/(n-1) * [ PaPb|A-B| + PaPc|A-C| + PbPb|B-C| ]
GiniMd<- function(x, na.rm=FALSE) {
if(na.rm) {
k <- is.na(x)
if(any(k)) x <- x[! k]
}
n <- length(x)
if(n < 2 || any(is.na(x))) return(NA)
w <- 4 * ((1 : n) - (n - 1) / 2) / n / (n - 1)
# sum returns 0 if x is empty, so NA was returned above to intercept this
sum(w * sort(x - mean(x))) ## center for numerical stability only
}
Hmisc/R/misc.get.s 0000644 0001762 0000144 00000020105 14376663314 013426 0 ustar ligges users spss.get <- function(file, lowernames=FALSE,
datevars=NULL,
use.value.labels=TRUE,
to.data.frame=TRUE,
max.value.labels=Inf,
force.single=TRUE, allow=NULL, charfactor=FALSE,
reencode=NA) {
w <- read.spss(file, use.value.labels=use.value.labels,
to.data.frame=to.data.frame,
max.value.labels=max.value.labels,
reencode=reencode)
a <- attributes(w)
vl <- a$variable.labels
nam <- a$names
nam <- makeNames(a$names, unique=TRUE, allow=allow)
if(lowernames) nam <- casefold(nam)
names(w) <- nam
lnam <- names(vl)
if(length(vl))
for(i in 1:length(vl)) {
n <- lnam[i]
lab <- vl[i]
if(lab != '' && lab != n) label(w[[i]]) <- lab
}
attr(w, 'variable.labels') <- NULL
if(force.single || length(datevars) || charfactor)
for(v in nam) {
x <- w[[v]]
changed <- FALSE
if(v %in% datevars) {
x <- importConvertDateTime(x, 'date', 'spss')
changed <- TRUE
} else if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(!(is.factor(x) || is.character(x))) {
if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(max(abs(x),na.rm=TRUE) <= (2^31-1) &&
all(floor(x) == x, na.rm=TRUE)) {
storage.mode(x) <- 'integer'
changed <- TRUE
}
} else if(charfactor && is.character(x)) {
if(length(unique(x)) < .5*length(x))
{
x <- sub(' +$', '', x) # remove trailing blanks
x <- factor(x, exclude='')
changed <- TRUE
}
}
if(changed) w[[v]] <- x
}
w
}
csv.get <- function(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL,
dateformat='%F', fixdates=c('none','year'),
comment.char = "", autodate=TRUE, allow=NULL,
charfactor=FALSE,
sep=',', skip=0, vnames=NULL, labels=NULL, text=NULL, ...){
fixdates <- match.arg(fixdates)
if(length(text) && ! missing(file)) stop('may not specify both file and text')
scn <- function(skip)
if(length(text)) scan(text=text, what=character(0), skip=skip,
nlines=1, sep=sep, quiet=TRUE)
else
scan(file, what=character(0), skip=skip, nlines=1, sep=sep, quiet=TRUE)
rcsv <- function(...)
if(length(text)) read.csv(text=text, check.names=FALSE,
comment.char=comment.char, sep=sep, skip=skip, ...)
else
read.csv(file, check.names=FALSE, comment.char=comment.char,
sep=sep, skip=skip,, ...)
if(length(vnames)) vnames <- scn(skip=vnames - 1)
if(length(labels)) labels <- scn(skip=labels - 1)
w <- if(length(vnames))
rcsv(header=FALSE, col.names=vnames)
else
rcsv()
n <- nam <- names(w)
m <- makeNames(n, unique=TRUE, allow=allow)
if(length(labels)) n <- labels
if(lowernames)
m <- casefold(m)
changed <- any(m != nam)
if(changed)
names(w) <- m
cleanup.import(w,
labels=if(length(labels))labels else if(changed)n else NULL,
datevars=datevars, datetimevars=datetimevars,
dateformat=dateformat, autodate=autodate,
fixdates=fixdates, charfactor=charfactor)
}
stata.get <- function(file, lowernames=FALSE,
convert.dates=TRUE, convert.factors=TRUE,
missing.type=FALSE, convert.underscore=TRUE,
warn.missing.labels=TRUE, force.single=TRUE,
allow=NULL, charfactor=FALSE, ...) {
## depends on the read.dta function from foreign
## Function to convert the elements of w into more compact
## data storage types.
convertObjs <- function(x, charfactor, force.single) {
## Date is not nessarely a integer but it ignores any
## fraction it might have
if((inherits(x, 'Date') || is.factor(x))
&& storage.mode(x) != 'integer') {
storage.mode(x) <- 'integer'
} else if(charfactor && is.character(x)) {
## If x is a character and arg charfactor is TRUE then
## convert x to a factor if the number of unique values of x is less
## than half the total number of values in x
if(length(unique(x)) < length(x) / 2)
{
x <- sub(' +$', '', x) # remove trailing blanks
x <- factor(x, exclude='')
}
} else if(is.numeric(x)) {
if(all(is.na(x))) {
## if all values are NA then convert to integer because
## it is 4 bytes instead of 8
storage.mode(x) <- 'integer'
}
else if(force.single && max(abs(x), na.rm=TRUE) <= (2^31-1) &&
all(floor(x) == x, na.rm=TRUE)) {
## convert x to integer if arg force.single is TRUE and the maximum
## absolute value of x is less then maximum value that an integer
## can store.
storage.mode(x) <- 'integer'
}
}
return(x)
}
## A function to create additional attributes to add to the elements of
## w
create.attribs <- function(var.label, val.label, format, label.table) {
attribs <- list()
if(format != '') {
attribs$format <- format
}
## Translate var labels into Hmisc var lables
if(var.label != '') {
attribs$label <- var.label
}
## The label.table values are found by looking a the checking to see
## if there is a non-empty value in val.labels. That value corrasponds
## a named element in label.table.
## Check to see if val.label is not empty and it is one of the
## names in label.table and that its value is not NULL
if(val.label != '' && val.label %in% names(label.table) &&
!is.null(label.table[[val.label]])) {
attribs$value.label.table <- label.table[[val.label]]
}
return(attribs)
}
## Read the stata file into w
w <- read.dta(file, convert.dates=convert.dates,
convert.factors=convert.factors,
missing.type=missing.type,
convert.underscore=convert.underscore,
warn.missing.labels=warn.missing.labels, ...)
## extract attributes from w
a <- attributes(w)
num.vars <- length(w)
## Do translate attributes names into R names
nam <- makeNames(a$names, unique=TRUE, allow=allow)
if(lowernames) nam <- casefold(nam, upper=FALSE)
a$names <- nam
## If var.labels is empty then create a empty char vector.
if(!length(a$var.labels)) {
a$var.labels <- character(num.vars)
}
## If val.labels is empty then create an empty char vector.
if(length(a$val.labels)) {
val.labels <- a$val.labels
} else {
val.labels <- character(num.vars)
}
## create list of attributes for the elements in w. An mapply is faster
## then a for loop in large data sets.
attribs <- mapply(FUN=create.attribs, var.label=a$var.labels,
val.label=val.labels, format=a$formats,
MoreArgs=list(label.table=a$label.table),
SIMPLIFY=FALSE)
## clear var.labels attribute
attr(w, 'var.labels') <- NULL
## Convert the elements of w as needed
w <- lapply(w, FUN=convertObjs, force.single=force.single,
charfactor=charfactor)
## strip off the naming info for w
w <- unname(w)
## add the new attributes to the current attributes of
## the elements of w
for(i in seq(along.with=w)) {
## Set the label for the element
if('label' %in% names(attribs[[i]])) {
label(w[[i]]) <- attribs[[i]]$label
## clear the label value from attribs[[i]]
attribs[[i]]$label <- NULL
}
## combine the new attribs with the current attributes
consolidate(attributes(w[[i]])) <- attribs[[i]]
}
## add the names, rownames, class variables, and some extra stata
## info back to w
stata.info <- a[c('datalabel','version','time.stamp','val.labels','label.table')]
attributes(w) <- c(a[c('names','row.names','class')],
stata.info=list(stata.info))
return(w)
}
Hmisc/R/upFirst.s 0000644 0001762 0000144 00000003407 12627703317 013352 0 ustar ligges users upFirst <- function(txt, lower=FALSE, alllower=FALSE) {
f <- function(x) {
notcap <- c('a', 'about', 'above', 'across', 'after', 'against',
'along', 'among', 'an', 'and', 'around', 'as', 'at',
'before', 'behind', 'below', 'beneath', 'beside',
'besides', 'between', 'beyond', 'but', 'by', 'despite',
'down', 'during', 'except', 'following', 'for', 'from',
'in', 'inside', 'into', 'like', 'mid', 'minus', 'near',
'next', 'nor', 'of', 'off', 'on', 'onto', 'opposite',
'or', 'out', 'outside', 'over', 'past', 'per', 'plus',
'regarding', 'round', 'save', 'since', 'so', 'than',
'the', 'through', 'throughout', 'till', 'times',
'to', 'toward', 'towards', 'under', 'underneath',
'unlike', 'until', 'up', 'upon', 'via', 'vs.', 'when',
'with', 'within', 'without', 'worth', 'yet')
s <- strsplit(x, " ")[[1]]
## Find words that have more than one upper case letter; assume these
## are acronyms that need capitalization preserved
a <- grepl('[A-Z]{1,}.*[A-Z]{1,}', s)
s <- if(alllower)
ifelse(a, s, tolower(s))
else if(lower)
ifelse(a, s, ifelse((1 : length(s)) == 1,
paste(toupper(substring(s, 1, 1)),
tolower(substring(s, 2)), sep=''),
tolower(s)))
else
ifelse(a, s, ifelse((1 : length(s)) == 1 | s %nin% notcap,
paste(toupper(substring(s, 1, 1)),
tolower(substring(s, 2)), sep=''),
tolower(s)))
paste(s, collapse=' ')
}
for(i in 1 : length(txt)) txt[i] <- f(txt[i])
txt
}
Hmisc/R/transcan.s 0000644 0001762 0000144 00000124753 14433152227 013532 0 ustar ligges users ## $Id$
transcan <-
function(x, method=c("canonical","pc"),
categorical=NULL, asis=NULL, nk,
imputed=FALSE, n.impute,
boot.method=c('approximate bayesian', 'simple'),
trantab=FALSE, transformed=FALSE,
impcat=c("score","multinom","rpart"),
mincut=40,
inverse=c('linearInterp','sample'), tolInverse=.05,
pr=TRUE, pl=TRUE, allpl=FALSE, show.na=TRUE,
imputed.actual=c('none','datadensity','hist','qq','ecdf'),
iter.max=50, eps=.1, curtail=TRUE,
imp.con=FALSE, shrink=FALSE, init.cat="mode",
nres=if(boot.method=='simple')200 else 400,
data, subset, na.action, treeinfo=FALSE,
rhsImp=c('mean','random'),
details.impcat='', ...)
{
##This is a non-.Internal version of the approx function. The
##S-Plus version of approx sometimes bombs with a bus error.
asing <- function(x)x
call <- match.call()
method <- match.arg(method)
impcat <- match.arg(impcat)
boot.method <- match.arg(boot.method)
imputed.actual <- match.arg(imputed.actual)
inverse <- match.arg(inverse)
rhsImp <- match.arg(rhsImp)
if(missing(n.impute)) n.impute <- 0
if(n.impute > 0) {
imputed <- TRUE
if(impcat == 'rpart')
stop('n.impute not supported for impcat="rpart"')
warning('transcan provides only an approximation to true multiple imputation.\nA better approximation is provided by the aregImpute function.\nThe MICE and other S libraries provide imputations from Bayesian posterior distributions.')
}
if(imputed.actual!='none') imputed <- TRUE
# if(impcat=='multinom') require(nnet)
# if(impcat=='rpart') require(rpart)
formula <- nact <- NULL
if(inherits(x,"formula")) {
formula <- x
y <- match.call(expand.dots=FALSE)
y$x <- y$method <- y$categorical <- y$asis <- y$nk <- y$imputed <-
y$trantab <- y$impcat <- y$mincut <- y$pr <- y$pl <- y$allpl <- y$show.na <-
y$iter.max <- y$eps <- y$curtail <- y$imp.con <- y$shrink <-
y$init.cat <- y$n.impute <- y$... <- y$nres <- y$boot.method <-
y$transformed <- y$treeinfo <- y$imputed.actual <-
y$inverse <- y$tolInverse <- y$details.impcat <-
y$rhsImp <- NULL
y$formula <- x
if(missing(na.action))
y$na.action <- na.retain
y[[1]] <- as.name("model.frame")
y <- eval(y, sys.parent())
nact <- attr(y,"na.action")
d <- dim(y)
# Error if user is trying to use a non-allowed formula
if(length(attr(y, "terms")) > 2)
stop('transcan does not support a left hand side variable in the formula')
nam <- all.vars(attr(y, "terms"))
# Error if user has passed an invalid formula
if(length(nam) != d[2])
stop(paste('Formula', formula,
'does not have a dominant inner variable.'))
if(!length(asis)) {
Terms <- terms(formula, specials='I')
asis <- nam[attr(Terms,'specials')$I]
## all.vars will cause I() wrapper to be ignored
}
x <- matrix(NA,nrow=d[1],ncol=d[2],
dimnames=list(attr(y,"row.names"),nam))
for(i in 1:d[2]) {
w <- y[[i]]
if(is.character(w))
w <- factor(w)
if(is.factor(w)) {
x[,i] <- unclass(w)
categorical <- c(categorical, nam[i])
} else {
x[,i] <- w
nu <- length(unique(w[!is.na(w)]))
if(nu<2)
stop(paste("variable",nam[i],"has only one value"))
if(nu==2)
asis <- c(asis, nam[i])
else if(nu==3) categorical <- c(categorical, nam[i])
}
}
}
nam <- dimnames(x)[[2]]
rnam <- dimnames(x)[[1]]
if(length(rnam)==0)
rnam <- as.character(1:nrow(x))
p <- ncol(x)
if(!length(nam))
stop("x must have column names")
n <- nrow(x)
if(missing(nk))
nk <- 3*(n<30)+4*(n>=30 & n<100)+5*(n>=100)
## Compute constant to multiply canonical variates by to get a variance of 1.0
varconst <- sqrt(n-1)
if(length(categorical)) {
if(length(categorical)==1 && categorical=="*")
categorical <- nam
## oldopts <- options(c('na.action','contrasts'))
## R does not allow multiple options to be spec.
oldopts <- options()
## names(oldopts) <- c('na.action','contrasts') #windows can mess this up
if(impcat == 'rpart') {
options(contrasts=c("contr.treatment","contr.poly"))
on.exit(options(oldopts))
}
}
if(length(asis)==1 && asis=="*")
asis <- nam
R <- parms <- coef <- fill.con <- Imputed <- Trantab <- vector("list",p)
fillin <- rep(NA,p);
names(fillin) <- nam
scale <- rep(1,p);
names(scale) <- nam;
names(Trantab) <- nam
nparm <- shr <- fillin
if(n.impute > 0) {
Resid <- vector("list",p)
names(Resid) <- nam
} else Resid <- NULL
datad <- list();
datad.ranges <- list()
## For canonical-variate expansions (standardized), use scale of 1
xcoef <- matrix(NA, nrow=p, ncol=p+1, dimnames=list(nam,c("intercept",nam)))
usefill <- 1*(is.logical(imp.con) && imp.con)+2*(is.numeric(imp.con))
if(usefill==2 && length(imp.con)!=p)
stop("length of imp.con != ncol(x)")
for(i in 1:p) {
lab <- nam[i]
y <- x[,i]
na <- is.na(y)
w <- y[!na]
if(imputed && n.impute==0)
Imputed[[i]] <- double(sum(na))
if(lab %in% asis) {
fillin[i] <- if(usefill==2) imp.con[i]
else median(w)
scale[i] <- mean(abs(w-fillin[i]))
if(is.na(fillin[i]))
stop(paste("fillin value for",lab,"is NA"))
coef[[i]] <- c(0,1)
nparm[i] <- 1
} else {
if(lab %in% categorical) {
w <- table(y)
z <- as.numeric(names(w))
if(usefill==2) fillin[i] <- imp.con[i]
else fillin[i] <- z[w==max(w)][1] #most freq. category
assign("Y", as.factor(y))
opold <- options(na.action="na.retain")
w <- model.matrix(~Y) # uses contr.treatment (reference cell coding)
options(na.action=opold[[1]]) #for some reason Windows needs opt name
r <- attr(w,"contrasts")[[1]]
attr(r,"codes") <- z
parms[[i]] <- r
R[[i]] <- w[,-1,drop=FALSE] #kill intercept column
nparm[i] <- length(z)-1
if(usefill>0) {
fill.con[[i]] <- w[y==imp.con[i],-1,drop=FALSE][1,,drop=FALSE]
##select first hit
if(length(fill.con[[i]])==0)
stop("imp.con has a code not in the data for a categorical var")
}
} else {
fillin[i] <- if(usefill==2) imp.con[i]
else median(y[!is.na(y)])
R[[i]] <- rcspline.eval(y, nk=nk, inclx=TRUE)
parms[[i]] <- attr(R[[i]], "knots")
if(usefill>0)
fill.con[[i]] <-
rcspline.eval(fillin[i], parms[[i]], inclx=TRUE)
nparm[i] <- length(parms[[i]])-1
}
}
}
xt <- x
if(init.cat %in% c("mode","random")) for(i in (1:p)[nam %in% categorical])
xt[,i] <-
if(init.cat=="mode") {
if(is.na(fillin[i]))
stop(paste("fillin value for",nam[i],"is NA"))
xt[,i]==fillin[i]
} else runif(n)
p1 <- p-1
R2 <- R2.adj <- double(p)
r2 <- r2.adj <- NA
Details.impcat <- NULL
last.iter <- FALSE
if(pr) cat("Convergence criterion:")
milab <- as.character(1:n.impute)
predSamp <- function(res, yhat, rnam, allowed.range, n.impute,
boot.method)
{
m <- length(yhat)
yhat <- matrix(rep(yhat, n.impute), ncol=n.impute,
dimnames=list(rnam, as.character(1:n.impute)))
errors <- if(boot.method=='simple')
sample(res, m*n.impute, replace=TRUE)
else {
## From Jeff Longmate (jlongmat@coh.org):
n <- length(res)
i <- ceiling(runif(n*n.impute, 0, n))
j <- ceiling(runif(m*n.impute, 0, n)) +
rep((0:(n.impute-1))*n, rep(m, n.impute))
res[i[j]]
}
structure(pmax(pmin(yhat + errors, allowed.range[2]), allowed.range[1]),
names=NULL)
}
anyVarNA <- rep(FALSE, n)
for(iter in 1:iter.max) {
dmax <- 0
if(last.iter) xtl <- xt
for(i in 1:p) {
lab <- nam[i]
catg <- lab %in% categorical
xx <- xt[,-i,drop=FALSE]
k.other <- sum(pmax(nparm[-i]-1,0))/(p-1)+p-1 #effective d.f.
if(iter==1) {
for(j in 1:p1) {
if(any(z <- is.na(xx[,j]))) {
l <- (nam[-i])[j]
if(is.na(fillin[l]))
stop(paste("variable",l,"has fillin value of NA"))
xx[z,j] <- fillin[l]
}
}
}
if(method=="pc") {
z <- xx
for(k in 1:p1) {
y <- z[,k];
z[,k] <- (y-mean(y))/sqrt(var(y))
}
P <- prcomp(z)$x[,1] # 1st prin. comp.
}
j <- is.na(x[,i])
anyVarNA[j] <- TRUE
if(lab %in% asis) {
y <- x[!j, i]
f <- lm.fit.qr.bare(xx[!j,,drop=FALSE], y)
newy <- x[,i]
names(newy) <- NULL
xcof <- f$coef
r2 <- f$rsquared
nn <- length(y)
r2.adj <- max(0,1-(1-r2)*(nn-1)/(nn-k.other-1))
if(shrink) {
ybar <- mean(y)
shr[i] <- h <- (nn-k.other-1)*r2/(nn-1)/r2.adj
xcof <- c(ybar*(1-h)+h*xcof[1],h*xcof[-1])
}
if(any(j))
newy[j] <-
if(usefill>0) fillin[i]
else cbind(1,xx[j,,drop=FALSE]) %*% xcof
res <- f$residuals
if(last.iter) {
ybar <- mean(y)
if(imputed & any(j)) {
r <- range(newy[!j])
Imputed[[i]] <-
if(n.impute==0)structure(pmax(pmin(newy[j],r[2]),r[1]),
names=rnam[j])
else
predSamp(res, newy[j], rnam[j], r, n.impute, boot.method)
NULL
}
xcoef[i, c("intercept",nam[-i])] <- xcof
if(trantab) {
rr <- range(y);
Trantab[[i]] <- list(x=rr, y=rr);
NULL
}
if(n.impute > 0)
Resid[[i]] <-
if(length(res) <= nres) asing(res)
else asing(sample(res, nres))
## was f$residuals 3 times
}
} else {
f <- cancor(xx[!j,,drop=FALSE], R[[i]][!j,,drop=FALSE])
r2 <- f$cor[1]^2
xcof <- c(intercept=-sum(f$xcoef[,1] * f$xcenter),
f$xcoef[,1])*varconst
cof <-
if(method=="canonical")
c(intercept=-sum(f$ycoef[,1] * f$ycenter),
f$ycoef[,1])*varconst
else {
g <- lm.fit.qr.bare(R[[i]][!j,,drop=FALSE], P[!j])
g$coef
}
newy <- drop(cbind(1,R[[i]]) %*% cof)
if((n.impute > 0 && last.iter) || rhsImp=='random')
res <-
if(method=='canonical')
newy[!j] - cbind(1,xx[!j,,drop=FALSE]) %*% xcof
else g$residuals
if(n.impute > 0 && last.iter) {
Resid[[i]] <-
if(length(res) <= nres) asing(res)
else asing(sample(res, nres))
}
nn <- n - sum(j)
k <- nparm[i]-1+k.other
r2.adj <- max(0,1-(1-r2)*(nn-1)/(nn-k-1))
if(shrink) {
shr[i] <- h <- (nn-k-1)*r2/(nn-1)/r2.adj
xcof <- h*xcof #mean of can var=0
}
if(any(j)) newy[j] <-
if(usefill>0) drop(cbind(1,fill.con[[i]]) %*% cof)
else drop(cbind(1,xx[j,,drop=FALSE]) %*% xcof)
if(last.iter) {
coef[[i]] <- cof
xcoef[i,c("intercept",nam[-i])] <- xcof
if(trantab || (any(j) && catg &&
impcat %in% c("score","multinom"))) {
xa <- x[!j, i]
ya <- newy[!j]
tab <- table(paste(as.character(xa),
as.character(ya),sep=';'))
vals <- names(tab)
uvals <- unPaste(vals, ';')
names(tab) <- NULL
Trantab[[i]] <-
list(x=as.numeric(uvals[[1]]), y=as.numeric(uvals[[2]]),
frequency=unname(tab))
NULL
}
if(imputed & any(j)) {
if(catg) {
if(usefill>0)
pred <- rep(fillin[i], sum(j))
else {
if(impcat == 'rpart') {
y <- as.factor(x[,i])
zdf <- list(xx=xx, y=y)
f <- rpart(y ~ xx,
control=rpart.control(minsplit=mincut), data=zdf)
## won't work because rpart will not allow matrix x
pred <-
(t(apply(-predict(f,zdf)[j,,drop=FALSE],1,order)))[,1]
if(treeinfo) {
cat('\nProbabilities of Category Membership and Category Selected for',lab,'\n\n')
print(cbind(round(predict(f,zdf)[j,,drop=FALSE],3),
Mode=pred))
}
## Gets level number of most probable category
} else if(impcat=='score') {
##Get category code with score closest to pred. score
ti <- Trantab[[i]]
if(n.impute==0) {
##ww <- apply(outer(newy[j], ti$y,
##function(x,y)abs(x-y)),1,order)[1,]
ww <- order(ti$y)[round(approx(sort(ti$y),
1:length(ti$y),
xout=newy[j], rule=2)$y)]
## Thanks from Alan Zaslavsky :
## "The idea is to interpolate (after arranging in order) and then round the
## index, since the fractional part of the index represents the relative
## distance from the two adjacent values."
##pred <- round(approx(ti$y, ti$x, xout=newy[j], rule=2)$y)
pred <- ti$x[ww]
} else {
sval <- predSamp(0*res, newy[j], rnam[j], c(-Inf,Inf),
n.impute, boot.method)
ww <- order(ti$y)[round(approx(sort(ti$y),
1:length(ti$y),
xout=sval,
rule=2)$y)]
pred <- matrix(ti$x[ww], ncol=n.impute,
dimnames=list(rnam[j],milab))
names(pred) <- NULL
if(lab==details.impcat)
Details.impcat <-
list(pred.trans.na=sval,imputed=pred,
pred.trans.nona=cbind(1,xx[!j,]) %*% xcof,
obs=x[!j,i],trantab=ti)
}
} else {
## Multinomial logit
zdf <- list(y=as.factor(x[!j,i]),
xx=xx[!j,,drop=FALSE])
f <- multinom(y ~ xx, data=zdf,
trace=FALSE, maxit=200)
ncat <- length(levels(zdf$y))
## bug in predict.multinom when predictor is a matrix
cf <- coef(f)
zdf <- cbind(1,xx[j,,drop=FALSE]) %*%
(if(is.matrix(cf)) t(cf) else as.matrix(cf))
pred <- exp(cbind(0,zdf))/
(1 + apply(exp(zdf),1,sum))
dimnames(pred)[[2]] <- as.character(1:ncat)
pred <-
if(n.impute==0) (t(apply(-pred,1,order)))[,1]
else rMultinom(pred, n.impute)
}
}
if(n.impute==0)
names(pred) <- rnam[j]
Imputed[[i]] <- pred
NULL
} else {
if(n.impute == 0) {
if(usefill > 0)
Im <- rep(fillin[i], sum(j))
else
Im <- invertTabulated(x[!j,i], newy[!j], aty=newy[j],
name=nam[i], inverse=inverse,
tolInverse=tolInverse)
names(Im) <- rnam[j]
Imputed[[i]] <- Im
NULL
} else {
sval <- predSamp(res, newy[j], rnam[j], c(-Inf,Inf),
n.impute, boot.method)
sval.orig <- matrix(invertTabulated(x[!j,i], newy[!j],
aty=sval,
name=nam[i],
inverse=inverse,
tolInverse=tolInverse),
ncol=n.impute,
dimnames=list(rnam[j],milab))
names(sval.orig) <- NULL
Imputed[[i]] <- sval.orig
NULL
}
}
} ##end imputed
} ##end last.iter
} ##end non-asis
if(curtail && any(j)) {
r <- range(newy[!j])
newy[j] <- pmax(pmin(newy[j],r[2]),r[1])
}
if(iter>1) {
jj <- if(rhsImp=='mean')TRUE
else TRUE
dmax <- max(dmax, min(max(abs(xt[jj,i]-newy[jj]),na.rm=TRUE),
max(abs(-xt[jj,i]-newy[jj]),na.rm=TRUE))/scale[i])
##Allows for arbitrary flips (negation) of current transformation
}
if(rhsImp=='random')
newy[j] <- newy[j] + sample(res, sum(j), replace=TRUE)
if(last.iter) xtl[,i] <- newy
else xt[,i] <- newy
##don't update working transformations
##during last iteration since recomputing x-coefficients
##on the basis of current transformations, which may flip rapidly
if((pl & last.iter) | allpl) {
xti <- if(last.iter) xtl[,i]
else xt[,i]
plot(x[,i], xti, xlab=lab,ylab=paste("Transformed",lab))
title(sub=paste("R2=",format(round(r2,2)),sep=""),cex=.4,adj=0)
if(any(j))
title(sub=paste(sum(j),"missing"),cex=.4,adj=1)
if(show.na && any(j)) {
scat1d(xti[j], 4, ...)
if(imputed && last.iter)
scat1d(as.numeric(Imputed[[i]]), 3, ...)
}
}
if(last.iter && imputed.actual!='none' && any(j)) {
v1n <- nam[i]; v2n <- paste('Imputed',v1n)
datad[[v1n]] <- x[!j,i]
datad[[v2n]] <- Imputed[[i]]
datad.ranges[[v1n]] <- datad.ranges[[v2n]] <-
range(c(x[!j,i], Imputed[[i]]), na.rm=TRUE)
}
R2[i] <- r2; R2.adj[i] <- r2.adj
} #end i
if(pr && iter>1)
cat(format(round(dmax,3)),"")
if(pr && (iter %% 10 == 0))
cat("\n")
niter <- iter
if(last.iter)
break
last.iter <- (iter==(iter.max-1)) || (iter>1 && dmax3 & niter==iter.max & dmax>=eps)
stop(paste("no convergence in",iter.max,"iterations"))
## Use xtl instead of xt, otherwise transformed variables will not
## match ones from predict() or Function() since coefficients have
## been updated
if(pr && rhsImp=='mean')
cat("Convergence in",niter,"iterations\n")
if(imputed.actual=='datadensity') {
lab <- names(datad)
datadensity.data.frame(datad, ranges=datad.ranges,
labels=ifelse((1:length(lab)) %% 2, lab,'Imputed'))
} else if(imputed.actual !='none') {
namdd <- names(datad)
for(i in seq(1,length(datad),by=2)) {
if(imputed.actual=='hist')
histbackback(datad[i:(i+1)])
else {
v1 <- datad[[i]]; v2 <- datad[[i+1]]
n1 <- namdd[i]; n2 <- namdd[i+1]
if(imputed.actual=='ecdf' && is.numeric(datad[[i]]))
Ecdf(c(v1,v2), xlab=n1,
group=c(rep('actual',length(v1)),
rep('imputed',length(v2))))
else {
qqplot(v1, v2, xlab=n1, ylab=n2)
abline(a=0, b=1, lty=2)
}
}
}
}
names(R2) <- nam
if(pr) {
cat("R-squared achieved in predicting each variable:\n\n")
print(round(R2, 3))
}
names(R2.adj) <- nam
if(pr) {
cat("\nAdjusted R-squared:\n\n")
print(round(R2.adj, 3))
}
if(shrink) {
names(shr) <- nam
if(pr) {
cat("\nShrinkage factors:\n\n")
print(round(shr,3))
}
} else shr <- NULL
names(parms) <- names(coef) <- nam
r <- apply(xtl, 2, range)
dimnames(r) <- list(c("low","high"), nam)
if(imputed) {
names(Imputed) <- nam
} else Imputed <- NULL
structure(list(call=call, formula=formula, niter=niter, imp.con=usefill>0,
n.impute=n.impute, residuals=Resid, rsq=R2, rsq.adj=R2.adj,
shrinkage=shr,
inverse=inverse, tolInverse=tolInverse,
categorical=categorical, asis=asis, parms=parms, coef=coef,
xcoef=xcoef, fillin=fillin, scale=scale, ranges=r,
transformed=if(transformed)xtl,
trantab=if(trantab)Trantab, imputed=Imputed, na.action=nact,
rhsImp=rhsImp, details.impcat=Details.impcat),
class='transcan')
}
summary.transcan <- function(object, long=FALSE, digits=6, ...)
{
## Check for old style object
if(!is.list(object))
object <- attributes(object)
dput(object$call);
cat("\n")
if(length(nact <- object$na.action))
naprint(nact)
cat("Iterations:",object$niter,"\n\n")
cat("R-squared achieved in predicting each variable:\n\n")
print(round(object$rsq,3))
cat("\nAdjusted R-squared:\n\n")
print(round(object$rsq.adj,3))
if(length(shr <- object$shrink))
{
cat("\nShrinkage factors:\n\n")
print(round(shr,3))
}
cat("\nCoefficients of canonical variates for predicting each (row) variable\n\n")
xcoef <- object$xcoef[,-1]
g <- format(round(xcoef,2))
g[is.na(xcoef)] <- ""
print(g, quote=FALSE)
imp <- object$imputed
if(length(imp))
{
nimp <- TRUE
for(nn in names(imp))
{
if(length(z <- imp[[nn]]))
{
if(nimp & !long)
cat("\nSummary of imputed values\n\n");
nimp <- FALSE
if(long)
{
cat("\nImputed values for",nn,"\n\n");
print(z)
}
if(nn %in% object$categorical) {
print(describe(as.vector(z), nn))
} else {
print(describe(signif(as.vector(z), digits), nn))
}
}
}
}
if(object$imp.con)
cat("\nImputed values set to these constants:\n\n")
else cat("\nStarting estimates for imputed values:\n\n")
print(signif(object$fillin, digits))
invisible()
}
print.transcan <- function(x, long=FALSE, ...) {
trans <- x$transformed
form <- x$formula
print(form, showEnv=FALSE); cat("\n")
if(length(trans)) {
if(long) print(unclass(x))
else print.default(trans)
}
invisible()
}
impute.transcan <-
function(x, var, imputation,
name=as.character(substitute(var)),
pos.in, data, list.out=FALSE,
pr=TRUE, check=TRUE, ...)
{
if(!missing(imputation) && length(imputation) > 1)
stop('imputation must be a single number')
## Check for old style
imp <- if(is.list(x)) x$imputed
else attr(x, 'imputed')
if(!length(imp)) {
if(missing(var) && missing(name))
stop('imputed=TRUE was not specified to transcan')
warning("imputed was not specified to transcan")
return(if(! missing(var)) var)
}
if(missing(var) && missing(name)) {
nams <- names(imp)
if(list.out) {
outlist <- vector('list', length(nams))
names(outlist) <- nams
}
if(missing(data)) {
if(missing(pos.in))
pos.in <- find(nams[1])[1]
var1 <- get(nams[1], pos=pos.in)
}
else {
if(any(ni <- nams %nin% names(data)))
stop(paste('variable', paste(nams[ni],collapse=','),
'not in data'))
var1 <- data[[nams[1]]]
}
namvar <- names(var1)
if(! length(namvar) && ! missing(data))
namvar <- row.names(data)
if(check && length(namvar)==0)
warning(paste('variable', nams[1],
'does not have a names() attribute\nand data does not have row.names. Assuming row names are integers.'))
nimp <- integer(length(nams))
names(nimp) <- nams
for(nam in nams) {
i <- imp[[nam]]
if(!length(i)) {
if(list.out) outlist[[nam]] <-
if(missing(data)) get(nam, pos=pos.in) else data[[nam]]
next
}
d <- dim(i)
obsImputed <- if(length(d)) dimnames(i)[[1]] else names(i)
## i[,imputation] drops names if only one obs. imputed
if(!missing(imputation)) {
if(!length(d))
stop('imputation can only be given when transcan used n.impute')
if(imputation < 1 || imputation > d[2])
stop(paste('imputation must be between 1 and',d[2]))
i <- i[, imputation]
}
else if(length(d))
stop('imputation must be specified when transcan used n.impute')
v <- if(missing(data)) get(nam, pos=pos.in)
else data[[nam]]
if(is.character(v)) v <- as.factor(v) #####
## Below was names(i) instead of match(...)
if(length(namvar)) {
sub <- match(obsImputed, namvar, nomatch=0)
i <- i[sub > 0]
sub <- sub[sub > 0]
}
else {
if(! all.is.numeric(obsImputed))
stop(paste('names attribute of ',nam,
' is not all numeric\n',
'and original observations did not have names',sep=''))
sub <- as.integer(obsImputed)
}
if(check)
if((missing(imputation) || imputation == 1) &&
! all(is.na(v[sub])))
stop(paste('variable',nam,
'does not have same missing values as were present when transcan was run'))
v[sub] <- if(is.factor(v)) levels(v)[as.integer(i)]
else
if(is.logical(v)) i == 1
else
i
## Note: if v was empty before, new v would have arbitrary length
## Avoid problem by requiring all variables to be in data
attr(v,'imputed') <- sub
attr(v,'class') <- c('impute', attr(v,'class'))
nimp[nam] <- length(i)
if(list.out)
outlist[[nam]] <- v
}
if(pr) {
cat('\n\nImputed missing values with the following frequencies\n',
'and stored them in variables with their original names:\n\n')
print(nimp[nimp > 0])
}
if(list.out) {
z <- sapply(outlist, length)
if(diff(range(z)) > 0) {
cat('\n\nLengths of variable vectors:\n\n')
print(z)
stop('inconsistant naming of observations led to differing length vectors')
}
return(outlist)
}
return(invisible(nimp))
}
impval <- imp[[name]]
if(name %nin% names(imp))
warning(paste('Variable',name,
'was not specified to transcan or had no NAs'))
if(!length(impval)) return(var)
d <- dim(impval)
if(!missing(imputation)) {
if(!length(d))
stop('imputation can only be given when transcan used n.impute')
if(imputation < 1 || imputation > d[2])
stop(paste('imputation must be between 1 and',d[2]))
impval <- impval[,imputation]
}
else if(length(d))
stop('imputation must be specified when transcan used n.impute')
namvar <- names(var)
if(!length(namvar)) {
if(missing(data))
stop(paste('variable',name,
'does not have a names() attribute\nand data= was not given.\nAssuming identifiers stored by transcan are integer subscripts'))
else
namvar <- row.names(data)
if(!length(namvar))
stop(paste('variable',name,
'does not have a names() attribute\nand data has no row.names'))
}
if(length(namvar)) {
sub <- match(names(impval), namvar, nomatch=0)
impval <- impval[sub > 0]
sub <- sub[sub > 0]
}
else {
if(!all.is.numeric(names(impval)))
stop(paste('names attribute of ',name,
' is not all numeric\n',
'and original observations did not have names',sep=''))
sub <- as.integer(names(impval))
}
##Now take into account fact that transcan may have been
##run on a superset of current data frame
m <- length(sub)
if(check)
if(missing(imputation) || imputation==1)
if(m!=sum(is.na(var)))
warning("number of NAs in var != number of imputed values from transcan.")
if(m == 0)
return(var)
var[sub] <- if(is.factor(var)) levels(var)[as.integer(impval)]
else
if(is.logical(var)) impval == 1
else
impval
attr(var, 'imputed') <- sub
attr(var, 'class') <- c("impute", attr(var,'class'))
var
}
"[.transcan" <- function(x, rows=1:d[1], cols=1:d[2], ..., drop=TRUE)
{
## Check for old style object
if(is.list(x))
{
if(length(x$imputed) && sum(sapply(x$imputed,length)))
{
d <- dim(x$transformed)
original.rownames <- dimnames(x$transformed)[[1]]
subset.rownames <- original.rownames[rows]
for(v in names(x$imputed))
{
z <- x$imputed[[v]]
if(length(z))
{
use <- names(z) %in% subset.rownames
x$imputed[[v]] <- z[use]
}
}
}
x$transformed <- x$transformed[rows,cols, drop=drop]
return(x)
}
ats <- attributes(x)
ats$dimnames <- ats$dim <- ats$names <- NULL
attr(x, 'class') <- NULL
y <- x[..., drop = drop]
attributes(y) <- c(attributes(y), ats)
if(!length(dim(y)))
{
aty <- attributes(y)
aty$call <- aty$iter <- aty$rsq <- aty$parms <- aty$coef <-
aty$xcoef <- aty$rsq.adj <- aty$shrink <-
aty$fillin <- aty$imputed <- aty$class <- aty$ranges <-
aty$imp.con <- aty$scale <- aty$categorical <- aty$asis <-
aty$trantab <- NULL
attributes(y) <- aty
if(is.character(z <- list(...)[[1]]))
attr(y,"label") <- paste("Transformed",z)
##May someday have to use label(y) <- for this ?
}
y
}
predict.transcan <- function(object, newdata=NULL, iter.max=50, eps=.01,
curtail=TRUE, type=c("transformed","original"),
inverse, tolInverse, check=FALSE, ...)
{
type <- match.arg(type)
if(!is.list(object)) object <- attributes(object)
parms <- object$parms
coef <- object$coef
xcoef <- object$xcoef
fillin <- object$fillin
ranges <- object$ranges
scale <- object$scale
imp.con<- object$imp.con
rhsImp <- object$rhsImp
trantab<- object$trantab
categorical <- object$categorical
formula <- object$formula
inverse <- if(missing(inverse)) object$inverse
if(!length(inverse)) inverse <- 'linearInterp'
tolInverse <- if(missing(tolInverse)) object$tolInverse
if(!length(tolInverse)) tolInverse <- 0.05
if(type=="original" & !length(trantab))
stop('type="trantab" and trantab=TRUE not specified to transcan')
if(length(formula))
{
oldop <- options(na.action="na.retain")
y <- model.frame(formula, data=newdata)
options(oldop)
d <- dim(y)
p <- d[2]
newdata <- matrix(NA, nrow=d[1], ncol=p,
dimnames=list(attr(y,"row.names"), names(y)))
for(i in 1:p)
{
w <- y[[i]]
if(is.character(w))
{
warning("character predictor present. Depending on levels being same as in original fit,\nthat all levels are present in the data, and that levels were in alphabetical order")
w <- factor(w)
}
newdata[,i] <- unclass(w)
}
}
else
{
if(!length(newdata))
stop("newdata must be given (unless formula was given to transcan)")
p <- ncol(newdata)
}
if(!is.matrix(newdata))
{
if(!length(names(newdata)))
names(newdata) <- dimnames(object)[[2]]
newdata <- t(as.matrix(newdata))
}
if(imp.con || !any(is.na(newdata)))
iter.max <- 1
##only 1 iteration needed if no NAs (imp.con)
xt <- newdata
nam <- dimnames(ranges)[[2]]
if(ncol(ranges)!=p)
stop("wrong number of columns in newdata")
if(!length(dimnames(xt)[[2]]))
dimnames(xt) <- list(dimnames(xt)[[1]],nam)
else if(check && any(dimnames(newdata)[[2]]!=nam))
warning("column names in newdata do not match column names in object")
if(length(dimnames(xt)[[1]])==0)
dimnames(xt) <- list(as.character(1:nrow(xt)),
dimnames(xt)[[2]])
for(iter in 1:iter.max)
{
dmax <- 0
for(i in 1:p)
{
lab <- nam[i]
j <- is.na(newdata[,i])
prm <- parms[[lab]]
if(length(prm)==0)
{
newy <- newdata[,i]
if(any(j))newy[j] <- if(iter==1) fillin[i] else
drop(cbind(1,xt[j,-i,drop=FALSE]) %*% xcoef[i,-i-1])
}
else
{
if(is.matrix(prm))
{
lev <- attr(prm, "codes")
consec.lev <- match(newdata[,i], lev) #may give NAs - OK for next line
R <- prm[consec.lev,, drop=FALSE]
if(iter==1 && any(match(newdata[!j,i], lev, 0)==0))
stop("codes for categorical variable not in original list")
} else R <- rcspline.eval(newdata[,i], prm, inclx=TRUE)
newy <- drop(cbind(1,R) %*% coef[[i]])
if(any(j))
newy[j] <-
if(iter==1) 0
else
drop(cbind(1, xt[j,-i,drop=FALSE]) %*%xcoef[i, -i-1])
}
if(curtail) newy <- pmax(pmin(newy,ranges[2,i]),ranges[1,i])
if(iter>1) dmax <- max(dmax, min(max(abs(xt[,i]-newy),na.rm=TRUE),
max(abs(-xt[,i]-newy),na.rm=TRUE))/
scale[i])
xt[,i] <- newy
} #end i
niter <- iter
if(niter>1 && dmax4)
break
} #end iter
if(rhsImp=='mean')
{
if(iter.max>3 & niter==iter.max)
stop(paste("no convergence in",iter.max,"iterations"))
cat("Convergence in",niter,"iterations\n")
}
if(type=="transformed") return(xt)
for(i in 1:p)
{
ft <- trantab[[i]]
j <- is.na(newdata[,i])
if(any(j))
{
newdata[j,i] <- if(imp.con) fillin[i]
else
{
ww <- invertTabulated(ft, aty=xt[j,i], name=nam[i],
inverse=inverse, tolInverse=tolInverse)
if(nam[i] %in% categorical)
ww <- round(ww)
ww
}
}
}
newdata
}
Function <- function(object, ...) UseMethod("Function")
Function.transcan <- function(object, prefix=".", suffix="", pos=-1, ...)
{
at <- if(is.list(object)) object
else attributes(object)
Nam <- names(at$coef)
p <- length(Nam)
categorical <- at$categorical
asis <- at$asis
coef <- at$coef
parms <- at$parms
fnames <- character(p)
for(i in 1:p)
{
nam <- Nam[i]
cof <- coef[[nam]]
if(nam %in% asis)
f <- function(x) x
else if(nam %in% categorical)
{
codes <- attr(parms[[nam]], "codes")
g <- "{x <- unclass(x);"
cof[-1] <- cof[-1] + cof[1] #convert from ref cell to cell means model
for(j in 1:length(codes))
{
if(j>1 && cof[j]>0) g <- paste(g,"+")
g <- paste(g, format(cof[j]),
"*(x==",format(codes[j]),")",sep="")
}
g <- paste(g, "}", sep="")
f <- function(x) NULL
f[[2]] <- parse(text=g)[[1]]
}
else f <- attr(rcspline.restate(parms[[nam]], cof), "function")
fun.name <- paste(prefix,nam,suffix,sep="")
cat("Function for transforming",nam,"stored as",fun.name,"\n")
assign(fun.name, f, pos=pos)
fnames[i] <- fun.name
}
invisible(fnames)
}
na.retain <- function(mf) mf
plot.transcan <- function(x, ...)
{
## check for old style object
if(!is.list(x)) x <- attributes(x)
trantab <- x$trantab
imputed <- x$imputed
n.impute <- x$n.impute
if(length(trantab)==0)
stop('you did not specify trantab=TRUE to transcan()')
p <- length(trantab)
nam <- names(trantab)
for(w in nam) {
z <- trantab[[w]]
plot(z, xlab=w, ylab=paste('Transformed',w))
title(sub=paste('R2=',format(round(x$rsq[w],2)),sep=''),cex=.4,adj=0)
if(length(imputed))
{
m <- imputed[[w]]
if(L <- length(m))
{
title(sub=paste(L / n.impute, 'missing'),cex=.4,adj=1)
m.trans <- approx(z, xout=m, rule=2)$y
scat1d(m, 3, ...)
scat1d(m.trans, 4, ...)
}
}
}
}
ggplot.transcan <- function(data, mapping, scale=FALSE, ..., environment)
{
x <- data
trantab <- x$trantab
imputed <- x$imputed
n.impute <- max(1, x$n.impute)
rsq <- x$rsq
if(length(trantab) == 0)
stop('you did not specify trantab=TRUE to transcan()')
p <- length(trantab)
nam <- names(trantab)
data <- adata <- NULL
for(w in nam) {
z <- trantab[[w]]
x <- z[[1]]
y <- z[[2]]
if(scale) {
r <- range(y)
y <- (y - r[1]) / (r[2] - r[1])
z <- list(x=x, y=y)
}
data <- rbind(data, data.frame(type='transform', X=w, x=x, y=y))
loc <- largest.empty(x, y, xlim=range(x), ylim=range(y))
lab <- paste('R^2==', round(rsq[w], 2), sep='')
if(length(imputed)) {
m <- as.vector(imputed[[w]])
if(L <- length(m)) {
lab <- paste('paste(', lab, '," ',
L / n.impute, ' missing")', sep='')
m.trans <- approx(z, xout=m, rule=2)$y
data <- rbind(data,
data.frame(type='imputed', X=w, x=m, y=m.trans))
}
adata <- rbind(adata, data.frame(X=w, x=loc$x, y=loc$y,
lab=lab, type='transform'))
}
}
# scale*manual calls are from David Norris
# The implicit (alphabetical) order of 'imputed' and 'transform'
# reverses the intended symbol & color assignments. But naming
# the vectors corrects this.
g <- ggplot(data, aes(x=x, y=y, color=type, shape=type, size=type)) + geom_point() +
facet_wrap(~ X, scales=if(scale) 'free_x' else 'free', ...) +
xlab(NULL) + ylab('Transformed') +
scale_color_manual(values = c(transform="#00000059", imputed="#FF000059")) +
scale_shape_manual(values = c(transform=1, imputed=3)) +
scale_size_manual(values = c(transform=1.3, imputed=2.25)) +
theme(legend.position='none')
if(length(adata))
g <- g + geom_text(data=adata, aes(label=lab), parse=TRUE, size=1.65, col='black')
g
}
##The following needed if Design is not in effect, to make anova work
vcov.default <- function(object, regcoef.only=FALSE, ...)
{
vc <- object$Varcov
if(length(vc))
{
if(regcoef.only) return(object$var)
else return(vc(object, which='var'))
}
cov <- object$var
if(!length(cov))
stop("object does not have variance-covariance matrix")
if(regcoef.only)
{
p <- length(object$coef)
cov <- cov[1:p, 1:p, drop=FALSE]
}
cov
}
if(FALSE) Varcov.lm <- function(object, ...)
{
cof <- object$coefficients
Qr <- object$qr
cov <- chol2inv(Qr$qr)
cov <- sum(object$residuals^2)*cov/object$df.residual
nm <- names(cof)
dimnames(cov) <- list(nm, nm)
cov
}
if(FALSE) Varcov.glm <- function(object, ...)
{
if(length(object$var))
return(object$var) ## for glmD
s <- summary.glm(object)
s$cov.unscaled * s$dispersion
}
#Varcov.multinom <- function(object, ...) vcov(object)
invertTabulated <- function(x, y, freq=rep(1,length(x)),
aty, name='value',
inverse=c('linearInterp','sample'),
tolInverse=0.05, rule=2)
{
inverse <- match.arg(inverse)
if(is.list(x))
{
freq <- x[[3]]
y <- x[[2]]
x <- x[[1]]
}
if(inverse=='linearInterp')
return(approx(y, x, xout=aty, rule=rule, ties=mean)$y)
del <- diff(range(y, na.rm=TRUE))
m <- length(aty)
yinv <- double(m)
cant <- double(0)
for(i in 1:m)
{
a <- aty[i]
s <- abs(y-a) < (tolInverse * del)
nclose <- sum(s)
if(nclose < 2)
{
if(nclose==0)
cant <- c(cant, a)
xest <- approx(y, x, xout=a, rule=rule)$y
## If a outside range of y, approx(rule=2) will return min or max
## x. There may be many x's with y values near this extreme x.
## Take a random draw from them.
a <- approx(x, y, xout=xest, rule=rule)$y
s <- abs(y - a) < (tolInverse * del)
nclose <- sum(s)
if(nclose > 1)
{
maxdist <- max((y[s] - a)^2)
wt <- if(maxdist==0) freq[s]
else (1 - ((y[s] - a)^2) / maxdist) * freq[s]
if(all(wt==0)) wt <- freq[s] # y[s] all the same
if(any(wt==0)) wt[wt==0] <- min(wt[wt>0])/2
xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))]
}
}
else
{
maxdist <- max((y[s] - a)^2)
wt <- if(maxdist==0) freq[s]
else (1 - ((y[s] - a)^2) / maxdist) * freq[s]
if(all(wt==0))
wt <- freq[s] # y[s] all the same
if(any(wt==0))
wt[wt==0] <- min(wt[wt>0])/2
xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))]
## sample(x[s],...) fails if x[s] is scalar; thanks: Bill Dunlap
}
yinv[i] <- xest
}
if(length(cant))
warning(paste('No actual ',name, ' has y value within ',
format(tolInverse),
'* range(y) (',format(del),
') of the following y values:',
paste(format(sort(unique(cant))),collapse=' '),
'.\nConsider increasing tolInverse. ',
'Used linear interpolation instead.',sep=''))
yinv
}
## Trick taken from MICE impute.polyreg
rMultinom <- function(probs, m)
{
d <- dim(probs)
n <- d[1]
k <- d[2]
lev <- dimnames(probs)[[2]]
if(!length(lev))
lev <- 1:k
ran <- matrix(lev[1], ncol=m, nrow=n)
z <- apply(probs, 1, sum)
if(any(abs(z-1) > .00001))
stop('error in multinom: probabilities do not sum to 1')
U <- apply(probs, 1, cumsum)
for(i in 1:m)
{
un <- rep(runif(n), rep(k,n))
ran[,i] <- lev[1 + apply(un > U, 2, sum)]
}
ran
}
utils::globalVariables('type')
Hmisc/R/tabulr.s 0000644 0001762 0000144 00000015572 14112727067 013214 0 ustar ligges users tabulr <- function(formula, data=NULL, nolabel=NULL, nofill=NULL, ...) {
## require(gsubfn) || stop('package gsubfn not installed')
if (!requireNamespace("tables", quietly = TRUE))
stop("This function requires the 'tables' package.")
if(!length(data)) data <- environment(formula)
else if(is.list(data)) data <- list2env(data, parent=environment(formula))
## f <- as.character(deparse(formula))
# lab <- function(x, hfill=TRUE) {
## x <- gsub('^ +', '', x)
## x <- gsub(' +$', '', x)
# l <- labelLatex(get(x, envir=data), default=x, double=TRUE, hfill=hfill)
# paste("Heading('", l, "')*", x, sep='')
# }
lab <- function(x) {
x <- deparse(x)
if(x == 'trio') return('table_trio')
if(x == 'freq') return('table_freq')
if(x == 'N') return('Heading()*table_N')
if(! (exists(x, envir=data, mode='numeric') |
exists(x, envir=data, mode='character'))) return(x)
if(length(nolabel) && x %in% all.vars(nolabel)) return(x)
xval <- get(x, envir=data)
if(label(xval) == '') return(x)
l <- labelLatex(xval, double=FALSE,
hfill=!length(nofill) || x %nin% all.vars(nofill))
paste("Heading('", l, "')*", x, sep='')
}
# f <- gsubfn("\\.\\((.*?)\\)", ~ lab(x), f)
# f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f)
# f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f)
# f <- gsubfn('([ \\(]+)l \\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)',
# ~ paste(x, lab(y), sep=''), f)
# f <- gsubfn('([ \\(]+)l\\. +\\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)',
# ~ paste(x, lab(y, hfill=FALSE), sep=''), f)
## A variable is a string of characters, _, . not starting with 0-9
## delimited by
# f <- gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b')
# gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b*dd + f==h'
# f <- gsubfn( "([a-zA-Z_\\.][a-zA-Z0-9_\\.]*)((?=\\s*[-+~)*])|\\s*$)",
# ~ paste0(toupper(x),'z'), f, perl=TRUE )
# From Bill Dunlap
ff <- function(expr, convertName) {
if (is.call(expr) && is.name(expr[[1]]) &&
is.element(as.character(expr[[1]]),
c("~","+","-","*","/","%in%","%nin%","(", ":"))) {
for(i in seq_along(expr)[-1])
expr[[i]] <- Recall(expr[[i]], convertName = convertName)
} else if (is.name(expr)) expr <- as.name(convertName(expr))
expr
}
f <- ff(formula, lab)
f <- as.formula(gsub("`", "", as.character(deparse(f))))
result <- tables::tabular(f, data=data, ...)
attr(result, 'originalformula') <- formula
result
}
table_trio <- function(x) {
if (!requireNamespace("tables", quietly = TRUE))
stop("This function requires the 'tables' package.")
o <- tables::table_options()
s <- function(x, default) if(length(x)) x else default
left <- s(o$left, 3)
right <- s(o$right, 1)
prmsd <- s(o$prmsd, FALSE)
pn <- s(o$pn, FALSE)
pnformat <- s(o$pnformat, "n")
pnwhen <- s(o$pnwhen, "all")
bold <- s(o$bold, FALSE)
isna <- is.na(x)
x <- x[!isna]
if(!length(x)) return('')
qu <- quantile(x, (1:3)/4)
w <- paste('{\\smaller ', nFm(qu[1], left, right), '} ',
if(bold) '\\textbf{', nFm(qu[2], left, right), if(bold) '}',
' {\\smaller ', nFm(qu[3], left, right), '}', sep='')
if(pnwhen == 'ifna' && !any(isna)) pn <- FALSE
if(prmsd || pn) {
w <- paste(w, '~{\\smaller (', sep='')
if(prmsd) w <- paste(w, nFm(mean(x), left, right), '$\\pm$',
nFm(sd(x), left, right), sep='')
if(pn) w <- paste(w, if(prmsd)' ', '$',
if(pnformat == 'n') 'n=', length(x), '$', sep='')
w <- paste(w, ')}', sep='')
}
w
}
table_N <- function(x) paste('{\\smaller $n=', length(x), '$}', sep='')
nFm <- function(x, left, right, neg=FALSE, pad=FALSE, html=FALSE) {
tot <- if(right == 0) left + neg else left + right + neg + 1
fmt <- paste('%', tot, '.', right, 'f', sep='')
x <- sprintf(fmt, x)
if(pad) x <- gsub(' ', if(html) '' else '~', x)
x
}
table_freq <- function(x) {
if(!length(x) || all(is.na(x))) return('')
if (!requireNamespace("tables", quietly = TRUE))
stop("This function requires the 'tables' package.")
w <- table(x)
den <- sum(w)
to <- tables::table_options()
showfreq <- to$showfreq
if(!length(showfreq)) showfreq <- 'all'
pctdec <- to$pctdec
if(!length(pctdec)) pctdec <- 0
i <- switch(showfreq,
all = 1:length(w),
high = which(w == max(w)),
low = which(w == min(w)))
m <- w[i]
fpct <- table_formatpct(m, den)
if(showfreq == 'all') {
z <- paste(names(m), '\\hfill', fpct, sep='')
z <- paste(z, collapse='\\\\', sep='')
len <- max(nchar(names(m))) + 9 + pctdec + 1 * (pctdec > 0)
z <- paste('\\parbox{', len, 'ex}{\\smaller ', z, '}', sep='')
return(z)
}
lab <- paste(names(m), collapse=', ')
num <- m[1]
paste(lab, ':', table_formatpct(num, den), sep='')
}
table_pc <- function(x, y) {
maxn <- max(length(x), length(y))
maxdig <- 1L + floor(log10(maxn))
num <- if(all(is.na(x))) length(x) else
if(is.logical(x)) sum(x) else sum(x %in% c('yes','Yes'))
den <- if(all(is.na(y))) length(y) else sum(!is.na(y))
prn(c(num,den)); prn(table(x, exclude=NULL)); prn(table(y, exclude=NULL))
table_formatpct(num, den)
}
table_formatpct <- function(num, den) {
if(den == 0 | all(is.na(num + den))) return('')
if (!requireNamespace("tables", quietly = TRUE))
stop("This function requires the 'tables' package.")
to <- tables::table_options()
npct <- to$npct
pctdec <- to$pctdec
if(!length(pctdec)) pctdec <- 0
if(!length(npct)) npct <- 'both'
poss <- c('numerator', 'denominator', 'both', 'none')
i <- charmatch(npct, poss)
if(is.na(i)) stop('in table_options(npct=) npct must be "numerator", "denominator", "both", or "none"')
npct <- poss[i]
z <- paste(nFm(100 * num / den, 3, pctdec), '\\%', sep='')
if(npct == 'none') return(z)
if(npct == 'both')
return(paste(z, '{\\smaller[2] $\\frac{', num, '}{', den, '}$}', sep=''))
paste(z, '{\\smaller (', if(npct == 'numerator') num else den, ')}', sep='')
}
table_latexdefs <- function(file='') {
ct <- function(...) cat(..., file=file)
ct('\\makeatletter\n',
'\\def\\blfootnote{\\xdef\\@thefnmark{}\\@footnotetext}\n',
'\\makeatother\n')
ct('\\def\\keytrio{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$.}}\n')
ct('\\def\\keytriomsd{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$. $x \\pm s$ represents the mean and standard deviation.}}\n')
invisible()
}
Hmisc/R/R2Measures.r 0000644 0001762 0000144 00000014107 14225262155 013700 0 ustar ligges users ##' Generalized R^2 Measures
##'
##' Computes various generalized R^2 measures related to the Maddala-Cox-Snell (MCS) R^2 for regression models fitted with maximum likelihood. The original MCS R^2 is labeled `R2` in the result. This measure uses the raw sample size `n` and does not penalize for the number of free parameters, so it can be rewarded for overfitting. A measure adjusted for the number of fitted regression coefficients `p` uses the analogy to R^2 in linear models by computing `1 - exp(- lr / n) * (n-1)/(n-p-1)` if `padj=2`, which is approximately `1 - exp(- (lr - p) / n)`, the version used if `padj=1` (the default). The latter measure is appealing because the expected value of the likelihood ratio chi-square statistic `lr` is `p` under the global null hypothesis of no predictors being associated with the response variable. See for more details.
##'
##' It is well known that in logistic regression the MCS R^2 cannot achieve a value of 1.0 even with a perfect model, which prompted Nagelkerke to divide the R^2 measure by its maximum attainable value. This is not necessarily the best recalibration of R^2 throughout its range. An alternative is to use the formulas above but to replace the raw sample size `n` with the effective sample size, which for data with many ties can be significantly lower than the number of observations. As used in the `popower()` and `describe()` functions, in the context of a Wilcoxon test or the proportional odds model, the effective sample size is `n * (1 - f)` where `f` is the sums of cubes of the proportion of observations at each distict value of the response variable. Whitehead derived this from an approximation to the variance of a log odds ratio in a proportional odds model. To obtain R^2 measures using the effective sample size, either provide `ess` as a single number specifying the effective sample size, or specify a vector of frequencies of distinct Y values from which the effective sample size will be computed. In the context of survival analysis, the single number effective sample size you may wish to specify is the number of uncensored observations. This is exactly correct when estimating the hazard rate from a simple exponential distribution or when using the Cox PH/log-rank test. For failure time distributions with a very high early hazard, censored observations contain enough information that the effective sample size is greater than the number of events. See Benedetti et al, 1982.
##'
##' If the effective sample size equals the raw sample size, measures involving the effective sample size are set to \code{NA}.
##' @title R2Measures
##' @param lr likelihoood ratio chi-square statistic
##' @param p number of non-intercepts in the model that achieved `lr`
##' @param n raw number of observations
##' @param ess if a single number, is the effective sample size. If a vector of numbers is assumed to be the frequency tabulation of all distinct values of the outcome variable, from which the effective sample size is computed.
##' @param padj set to 2 to use the classical adjusted R^2 penalty, 1 (the default) to subtract `p` from `lr`
##' @return named vector of R2 measures. The notation for results is `R^2(p, n)` where the `p` component is empty for unadjusted estimates and `n` is the sample size used (actual sample size for first measures, effective sample size for remaining ones). For indexes that are not adjusted, only `n` appears.
##' @author Frank Harrell
##' @md
##' @export
##' @references
##' Smith TJ and McKenna CM (2013): A comparison of logistic regression pseudo R^2 indices. Multiple Linear Regression Viewpoints 39:17-26.
##'
##' Benedetti JK, et al (1982): Effective sample size for tests of censored survival data. Biometrika 69:343--349.
##'
##' Mittlbock M, Schemper M (1996): Explained variation for logistic regression. Stat in Med 15:1987-1997.
##'
##' Date, S: R-squared, adjusted R-squared and pseudo R-squared.
##'
##' UCLA: What are pseudo R-squareds?
##'
##' Allison P (2013): What's the beset R-squared for logistic regression?
##'
##' Menard S (2000): Coefficients of determination for multiple logistic regression analysis. The Am Statistician 54:17-24.
##'
##' Whitehead J (1993): Sample size calculations for ordered categorical data. Stat in Med 12:2257-2271. See errata (1994) 13:871 and letter to the editor by Julious SA, Campbell MJ (1996) 15:1065-1066 showing that for 2-category Y the Whitehead sample size formula agrees closely with the usual formula for comparing two proportions.
##' @examples
##' x <- c(rep(0, 50), rep(1, 50))
##' y <- x
##' # f <- lrm(y ~ x)
##' # f # Nagelkerke R^2=1.0
##' # lr <- f$stats['Model L.R.']
##' # 1 - exp(- lr / 100) # Maddala-Cox-Snell (MCS) 0.75
##' lr <- 138.6267 # manually so don't need rms package
##'
##' R2Measures(lr, 1, 100, c(50, 50)) # 0.84 Effective n=75
##' R2Measures(lr, 1, 100, 50) # 0.94
##' # MCS requires unreasonable effective sample size = minimum outcome
##' # frequency to get close to the 1.0 that Nagelkerke R^2 achieves
R2Measures <- function(lr, p, n, ess=NULL, padj=1) {
R <- numeric(0)
r2 <- 1. - exp(- lr / n)
adj <- function() if(padj == 1) 1. - exp(- max(lr - p, 0) / n)
else
1. - exp(- lr / n) * (n - 1.) / (n - p - 1.)
r2adj <- adj()
R <- c(R2=r2, R2adj=r2adj)
g <- function(p, n) {
n <- as.character(round(n, 1))
sub <- ifelse(p == 0, n, paste0(p, ',', n))
paste0('R2(', sub, ')')
}
name <- g(c(0, p), c(n, n))
if(length(ess)) {
## Replace n with effective sample size
nr <- n
n <- if(length(ess) == 1) ess else {
P <- ess / sum(ess)
n * (1. - sum(P ^ 3))
}
r2 <- 1. - exp(- lr / n)
r2adj <- adj()
if(n == nr) r2 <- r2adj <- NA
name <- c(name, g(c(0, p), c(n, n)))
R <- c(R, r2, r2adj)
}
names(R) <- name
R
}
Hmisc/R/printL.r 0000644 0001762 0000144 00000003540 14615433555 013166 0 ustar ligges users #' printL
#'
#' Print an object or a named list of objects. When multiple objects are given, their names are printed before their contents. When an object is a vector that is not longer than `maxoneline` and its elements are not named, all the elements will be printed on one line separated by commas. When `dec` is given, numeric vectors or numeric columns of data frames or data tables are rounded to the nearest `dec` before printing. This function is especially helpful when printing objects in a Quarto or RMarkdown document and the code is not currently being shown to place the output in context.
#'
#' @param ... any number of objects to `print()`
#' @param dec optional decimal places to the right of the decimal point for rounding
#' @param maxoneline controls how many elements may be printed on a single line for `vector` objects
#'
#' @return nothing
#' @author Frank Harrell
#' @export
#' @md
#' @seealso [prn()]
#'
#' @examples
#' w <- pi + 1 : 2
#' printL(w=w)
#' printL(w, dec=3)
#' printL('this is it'=c(pi, pi, 1, 2),
#' yyy=pi,
#' z=data.frame(x=pi+1:2, y=3:4, z=c('a', 'b')),
#' qq=1:10,
#' dec=4)
#'
printL <- function(..., dec=NULL, maxoneline=5) {
z <- list(...)
ns <- names(z)
if(! length(ns)) {
if(length(z) > 1) stop('must name arguments')
names(z) <- ' '
}
for(n in names(z)) {
x <- z[[n]]
l <- length(x)
if(is.numeric(x) && length(dec)) x <- round(x, dec)
if(is.vector(x) &&
(l == 1 || (l <= maxoneline && ! length(names(x))))) {
x <- paste(x, collapse=', ')
cat(if(n != ' ') c(n, ': '), x, '\n\n', sep='')
} else {
if(is.list(x) && length(dec))
for(j in 1 : length(x))
if(is.numeric(x[[j]])) x[[j]] <- round(x[[j]], dec)
if(n != ' ') cat(n, ':\n', sep='') else cat('\n')
print(x)
cat('\n')
}
}
invisible()
}
Hmisc/R/ggplotlyr.r 0000644 0001762 0000144 00000002364 14112727067 013740 0 ustar ligges users ##' Render `plotly` Graphic from a `ggplot2` Object
##'
##' Uses `plotly::ggplotly()` to render a `plotly` graphic with a specified tooltip attribute, removing extraneous text that `ggplotly` puts in hover text when `tooltip='label'`
##' @title ggplotlyr
##' @param ggobject an object produced by `ggplot`
##' @param tooltip attribute specified to `ggplot` to hold hover text
##' @param remove extraneous text to remove from hover text. Default is set to assume `tooltip='label'` and assumed the user specified `aes(..., label=txt)`. If you instead specified `aes(..., label=myvar)` use `remove='myvar: '`.
##' @param ... other arguments passed to `ggplotly`
##' @return a `plotly` object
##' @author Frank Harrell
##' @export
##' @md
ggplotlyr <- function(ggobject, tooltip='label', remove='txt: ', ...) {
if (!requireNamespace("plotly"))
stop("This function requires the 'plotly' package.")
# Get around a bug in tooltip construction with ggplotly
# See https://stackoverflow.com/questions/66316337
g <- plotly::ggplotly(ggobject, tooltip=tooltip, ...)
if(! length(remove) || remove == '') return(g)
d <- g$x$data
for(i in 1 : length(d)) {
w <- d[[i]]$text
if(length(w)) d[[i]]$text <- gsub(remove, '', w)
}
g$x$data <- d
g
}
Hmisc/R/summaryRc.s 0000644 0001762 0000144 00000011616 14400371147 013672 0 ustar ligges users summaryRc <-
function(formula, data=NULL, subset=NULL, na.action=NULL,
fun=function(x) x, na.rm=TRUE,
ylab=NULL, ylim = NULL, xlim=NULL, nloc=NULL, datadensity=NULL,
quant = c(0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95),
quantloc = c('top', 'bottom'), cex.quant=.6, srt.quant=0,
bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'),
height.bpplot = 0.08,
trim=NULL, test=FALSE, vnames=c('labels','names'),
...)
{
call <- match.call()
quantloc <- match.arg(quantloc)
vnames <- match.arg(vnames)
bpplot <- match.arg(bpplot)
if(bpplot == 'top') bpplot <- 'top inside'
X <- match.call(expand.dots=FALSE)
X$fun <- X$na.rm <- X$ylim <- X$xlim <- X$ylab <- X$nloc <- X$datadensity <-
X$quant <- X$quantloc <- X$cex.quant <- X$srt.quant <- X$trim <- X$test <-
X$vnames <- X$bpplot <- X$height.bpplot <- X$... <- NULL
if(missing(na.action)) X$na.action <- na.retain
Terms <- if(missing(data)) terms(formula, 'stratify')
else terms(formula, 'stratify', data=data)
X$formula <- Terms
X[[1]] <- as.name("model.frame")
X <- eval(X, sys.parent())
Terms <- attr(X, "terms")
resp <- attr(Terms, "response")
nact <- attr(X, "na.action")
nvar <- ncol(X) - 1
strat <- attr(Terms, 'specials')$stratify
getlab <- function(x, default) {
if(vnames == 'names') return(default)
lab <- attr(x, 'label')
if(!length(lab) || lab=='') default else lab
}
if(length(strat)) {
sRequire('survival')
temp <- survival::untangle.specials(Terms,'stratify')
strat.name <- var.inner(Terms)[temp$terms]
strat <- if(length(temp$vars) == 1) as.factor(X[[temp$vars]])
else stratify(X[,temp$vars])
strat.label <- getlab(X[, temp$vars[1]], strat.name)
X[[temp$vars]] <- NULL # remove strata factors
}
Y <- X[[resp]]
yname <- as.character(attr(Terms, 'variables'))[2]
ylabel <- if(length(ylab)) ylab else getlab(Y, yname)
X[[resp]] <- NULL # remove response var
Y <- as.matrix(Y)
s <- rowSums(is.na(Y)) == ncol(Y)
nmissy <- sum(s)
if(nmissy) {
X <- X[!s,, drop=FALSE]
Y <- Y[!s,, drop=FALSE]
strat <- strat[!s]
}
pl <- function(x, y, strat=NULL, quant, bpplot, width.bpplot,
xlab='', ylab='',
ylim=NULL, xlim=NULL, fun=function(x) x, ...) {
n <- sum(!is.na(x))
group <- if(length(strat)) strat else rep(1, length(x))
if(!length(trim)) trim <- if(n > 200) 10 / n else 0
if(!length(xlim)) {
xlim <- if(trim == 0) range(x, na.rm=TRUE)
else quantile(x, c(trim, 1 - trim), na.rm=TRUE)
}
a <- list(x=x, y=y, xlab=xlab, ylab=ylab, xlim=xlim, trim=0, group=group,
datadensity=if(length(datadensity)) datadensity
else length(strat) > 0, ...)
if(length(fun)) a$fun <- fun
if(length(ylim)) a$ylim <- ylim
z <- do.call('plsmo', a)
usr <- par('usr')
xl <- usr[1:2]
yl <- usr[3:4]
if(! (length(nloc) && is.logical(nloc) && !nloc)) {
if(length(nloc)) {
xx <- nloc[[1]]
yy <- nloc[[2]]
xx <- xl[1] + xx * diff(xl)
yy <- yl[1] + yy * diff(yl)
w <- list(x=xx, y=yy)
}
else {
xs <- unlist(lapply(z, function(x)x$x))
ys <- unlist(lapply(z, function(x)x$y))
w <- largest.empty(xs, ys, method='area')
}
text(w, paste('n=', n, sep=''), cex=.75, font=3, adj=.5)
}
# Compute user y-units per inch
u <- diff(yl) / par('fin')[2]
if(bpplot != 'none') {
h <- u * height.bpplot
yy <- switch(bpplot,
'top outside' = yl[2] + h/2 + u*.11,
'top inside' = yl[2] - h/2 - u*.11,
bottom = yl[1] + h/2 + u*.11)
panel.bpplot(x, yy, nogrid=TRUE, pch=19, cex.means=.6, height=h)
}
else
if(length(quant)) {
h <- u * .15
qu <- quantile(x, quant, na.rm=TRUE)
names(qu) <- as.character(quant)
qu <- pooleq(qu)
yq <- if(quantloc == 'top') yl[2] else yl[1]
arrows(qu, yq + h, qu, yq, col='blue', length=.05, xpd=NA)
if(cex.quant > 0)
text(qu, yq + 1.4 * h, names(qu), adj=if(srt.quant == 0) .5 else 0,
cex=cex.quant, srt=srt.quant, xpd=NA)
}
## text(xl[2], yl[2] + h/4, paste('n=', n, sep=''),
## cex=.75, font=3, adj=c(1,0), xpd=NA)
}
## Find all ties in quantiles and build combined labels
pooleq <- function(x) {
w <- tapply(names(x), x, paste, collapse=', ')
x <- as.numeric(names(w))
names(x) <- w
x
}
i <- 0
nams <- names(X)
for(v in nams) {
i <- i + 1
x <- X[[v]]
xlab <- getlab(x, nams[i])
units <- if(length(l <- attr(x,'units'))) l else ''
xlab <- labelPlotmath(xlab, units)
pl(x, Y, strat=strat, quant=quant, bpplot=bpplot,
height.bpplot=height.bpplot,
xlab=xlab, ylab=ylabel, ylim=ylim, xlim=xlim[[v]], ...)
}
}
Hmisc/R/ggMisc.s 0000644 0001762 0000144 00000004334 12662361764 013134 0 ustar ligges users # From Sandy Muspratt: http://stackoverflow.com/questions/28652284
# See better approach there, test is in ~/r/ggplot2/panel-border.r
colorFacet <- function(g, col=adjustcolor('blue', alpha.f=0.3)) {
## Get the plot grob
gt <- ggplotGrob(g)
## Check the layout
##gtable_show_layout(gt) # Vertical gaps are in columns 5 and 7
# and span rows 3 to 6
# Horizontal gap is in row 5
# and spans columns 4 to 9
## To automate the selection of the relevant rows and columns:
## Find out which items in the layout correspond to the panels.
## "r" and "b" (below) refer to the right and bottom indices for the panels in the layout
## The gaps' indices are one to the right of the panels' r index (except the right most panel)
## and one below the panels' b index (except the bottom most panel)
## Rmin and Rmax give the span of the horizontal gap;
## Bmin and Bmax give the span of the vertical gap
panelsR <- unique(gt$layout$r[grepl("panel", gt$layout$name)])
Rmin = panelsR[1] ## 4
Rmax = panelsR[length(panelsR)] + 1
panelsR = panelsR[-length(panelsR)] +1
panelsB <- unique(gt$layout$b[grepl("panel", gt$layout$name)])
Bmin = panelsB[1] - 1 ## 3
Bmax = panelsB[length(panelsB)]
panelsB = panelsB[-length(panelsB)] + 1
## Add colored rectangles into the vertical and horizontal gaps
for(i in panelsR)
gt <- gtable::gtable_add_grob(gt,
list(grid::rectGrob(gp = grid::gpar(col = NA, fill = col))),
Bmin, i, Bmax, i)
for(j in panelsB)
gt <- gtable::gtable_add_grob(gt,
list(grid::rectGrob(gp = grid::gpar(col = NA, fill = col))),
j, Rmin, j, Rmax)
grid::grid.draw(gt)
invisible()
}
## The following exists to nullify invisible() used in arrangeGrob's
## returned value. Regarding class and print see
## http://stackoverflow.com/questions/29062766/store-output-from-gridextragrid-arrange-into-an-object
arrGrob <- function(...) {
if(! requireNamespace('gridExtra', quietly=TRUE))
stop('gridExtra package not installed')
z <- gridExtra::arrangeGrob(...)
class(z) <- c("arrGrob", class(z))
z
}
print.arrGrob <- function(x, ...) {
grid::grid.newpage()
grid::grid.draw(x)
}
Hmisc/R/completer.r 0000644 0001762 0000144 00000006042 14430537777 013716 0 ustar ligges users ##' Create imputed dataset(s) using \code{transcan} and \code{aregImpute} objects
##'
##' Similar in function to `mice::complete`, this function uses `transcan` and `aregImpute` objects to impute missing data
##' and returns the completed dataset(s) as a dataframe or a list.
##' It assumes that `transcan` is used for single regression imputation.
##' @title completer
##' @param a An object of class `transcan` or `aregImpute`
##' @param nimpute A numeric vector between 1 and `a$n.impute`. For `transcan` object, this is set to 1. For `aregImpute` object, returns a list of `nimpute` datasets when `oneimpute` is set to `FALSE` (default).
##' @param oneimpute A logical vector. When set to `TRUE`, returns a single completed dataset for the imputation number specified by `nimpute`
##' @param mydata A data frame in which its missing values will be imputed.
##' @return A single or a list of completed dataset(s).
##' @author Yong-Hao Pua, Singapore General Hospital
##' @md
##' @examples
##' \dontrun{
##' mtcars$hp[1:5] <- NA
##' mtcars$wt[1:10] <- NA
##' myrform <- ~ wt + hp + I(carb)
##' mytranscan <- transcan( myrform, data = mtcars, imputed = TRUE,
##' pl = FALSE, pr = FALSE, trantab = TRUE, long = TRUE)
##' myareg <- aregImpute(myrform, data = mtcars, x=TRUE, n.impute = 5)
##' completer(mytranscan) # single completed dataset
##' completer(myareg, 3, oneimpute = TRUE)
##' # single completed dataset based on the `n.impute`th set of multiple imputation
##' completer(myareg, 3)
##' # list of completed datasets based on first `nimpute` sets of multiple imputation
##' completer(myareg)
##' # list of completed datasets based on all available sets of multiple imputation
##' # To get a stacked data frame of all completed datasets use
##' # do.call(rbind, completer(myareg, data=mydata))
##' # or use rbindlist in data.table
##' }
completer <- function (a, nimpute, oneimpute = FALSE, mydata) {
trans <- inherits(a, 'transcan')
areg <- inherits(a, 'aregImpute')
ni <- a$n.impute
if (trans) nimpute <- 1
if(missing(nimpute)) nimpute <- ni
if(areg && nimpute > ni)
stop ("'nimpute' cannot exceed ", ni, ".")
if(missing(mydata)) mydata <- eval(a$call$data)
impute.transcan_args <- list(x = a, imputation = nimpute, data = mydata, list.out = TRUE, pr = FALSE)
if(trans) impute.transcan_args$imputation <- NULL # remove `imputation` arg for transcan
imputed_dflist <-
if (trans | (areg & oneimpute) ) {
# single completed dataset from transcan or aregImpute
imputed <- do.call("impute.transcan", impute.transcan_args)
mydata [ , names(imputed)] <- as.data.frame(imputed)
return(mydata)
} else {
# list of completed datasets from aregImpute
lapply(seq_len(nimpute), function(x){
imputed <- do.call("impute.transcan", impute.transcan_args)
mydata [ , names(imputed)] <- as.data.frame(imputed)
return(mydata)
})
}
return(imputed_dflist)
}
Hmisc/R/plotCorrM.r 0000644 0001762 0000144 00000007134 14252346124 013632 0 ustar ligges users ##' Plot Correlation Matrix and Correlation vs. Time Gap
##'
##' Constructs two `ggplot2` graphics. The first is a half matrix of rectangles where the height of the rectangle is proportional to the absolute value of the correlation coefficient, with positive and negative coefficients shown in different colors. The second graphic is a variogram-like graph of correlation coefficients on the y-axis and absolute time gap on the x-axis, with a `loess` smoother added. The times are obtained from the correlation matrix's row and column names if these are numeric. If any names are not numeric, the times are taken as the integers 1, 2, 3, ... The two graphics are `ggplotly`-ready if you use `plotly::ggplotly(..., tooltip='label')`.
##' @title plotCorrM
##' @param r correlation matrix
##' @param what specifies whether to return plots or the data frame used in making the plots
##' @param type specifies whether to use bottom-aligned rectangles (the default) or centered circles
##' @param xlab x-axis label for correlation matrix
##' @param ylab y-axis label for correlation matrix
##' @param maxsize maximum circle size if `type='circle'`
##' @param xangle angle for placing x-axis labels, defaulting to 0. Consider using `xangle=45` when labels are long.
##' @return a list containing two `ggplot2` objects if `what='plots'`, or a data frame if `what='data'`
##' @author Frank Harrell
##' @md
##' @export
##' @examples
##' set.seed(1)
##' r <- cor(matrix(rnorm(100), ncol=10))
##' g <- plotCorrM(r)
##' g[[1]] # plot matrix
##' g[[2]] # plot correlation vs gap time
##' # ggplotlyr(g[[2]])
##' # ggplotlyr uses ggplotly with tooltip='label' then removes
##' # txt: from hover text
plotCorrM <- function(r, what=c('plots', 'data'),
type=c('rectangle', 'circle'),
xlab='', ylab='', maxsize=12, xangle=0) {
what <- match.arg(what)
type <- match.arg(type)
p <- dim(r)[1]
v <- dimnames(r)[[1]]
if(! length(v)) v <- as.character(1 : p)
vn <- as.numeric(v)
if(any(is.na(vn))) vn <- 1 : p
mn <- min(abs(r))
R <- as.vector(r)
x <- as.vector(col(r))
y <- as.vector(row(r))
txt <- paste0(v[x], ' vs. ', v[y], ' r=', round(R, 3))
d <- subset(data.frame(x, y, delta=abs(vn[x] - vn[y]), r=R, txt), x < y)
if(what == 'data') return(d)
mx <- max(abs(d$r))
g1 <-
switch(type,
rectangle = ggplot(d,
aes(x=x, y=y,
color=ifelse(r > 0, '+', '-'), label=txt)) +
geom_segment(aes(x=x, y=y, xend=x, yend=y + 0.9 * abs(r) / mx),
size=3),
circle = ggplot(d,
aes(x=x, y=y,
color=ifelse(r > 0, '+', '-'), label=txt,
size=abs(r))) +
geom_point() + scale_size(range = c(0, maxsize)) )
g1 <- g1 + scale_x_continuous(breaks = 1 : p, labels=v) +
scale_y_continuous(breaks = 1 : p, labels=v) +
guides(color = guide_legend(title=''),
size = guide_legend(title='r')) +
xlab(xlab) + ylab(ylab) +
theme(axis.text.x=element_text(angle=xangle,
hjust=if(xangle != 0) 1)) +
labs(subtitle=paste0('max |r|:', round(mx, 3),
' min |r|:', round(mn, 3)))
# Would not run loess if use text=txt
# Need to run ggplotly with tooltip='label'
g2 <- ggplot(d, aes(x=delta, y=r, label=txt)) +
geom_point() + geom_smooth(method=loess) +
xlab('Absolute Time Difference') + ylab('Correlation')
list(g1, g2)
}
utils::globalVariables('delta')
Hmisc/R/ecdf.s 0000644 0001762 0000144 00000032544 14561235550 012621 0 ustar ligges users Ecdf <- function(x, ...) UseMethod('Ecdf')
Ecdf.default <- function(x, what=c('F','1-F','f','1-f'),
weights=rep(1, length(x)), normwt=FALSE,
xlab, ylab, q, pl=TRUE, add=FALSE, lty=1,
col=1, group=rep(1, length(x)),
label.curves=TRUE, xlim, subtitles=TRUE,
datadensity=c('none','rug','hist','density'),
side=1,
frac=switch(datadensity,
none=NA,rug=.03,hist=.1,density=.1),
dens.opts=NULL, lwd=1, log='', ...)
{
datadensity <- match.arg(datadensity)
what <- match.arg(what)
colspec <- FALSE
if(datadensity != 'none') {
if(side %in% c(2,4))
stop('side must be 1 or 3 when datadensity is specified')
if('frac' %nin% names(dens.opts))
dens.opts$frac <- frac
if('side' %nin% names(dens.opts))
dens.opts$side <- side
if('col' %in% names(dens.opts))
colspec <- TRUE
}
if(missing(xlab))
xlab <- label(x, units=TRUE, plot=TRUE, default=deparse(substitute(x)))
what <- match.arg(what)
if(missing(ylab)) ylab <- switch(what,
'F'='Proportion <= x',
'1-F'='Proportion > x',
'f'='Frequency <= x')
group <- as.factor(group)
group <- group[drop=TRUE]
if(length(x) != length(group))
stop('length of x != length of group')
nna <- !(is.na(x) | is.na(group) | is.na(weights))
X <- x[nna]
group <- group[nna]
lev <- levels(group)
nlev <- length(lev)
curves <- vector('list',nlev)
names(curves) <- lev
lty <- rep(lty, length.out=nlev)
col <- rep(col, length.out=nlev)
lwd <- rep(lwd, length.out=nlev)
if(missing(xlim)) xlim <- range(X)
n <- if(normwt) length(X) else sum(weights[nna])
m <- (if(normwt) length(nna) else sum(weights, na.rm=TRUE)) - n
weights <- weights[nna]
for(i in 1:nlev) {
s <- group == lev[i]
x <- X[s]
wt <- weights[s]
xorig <- x
z <- wtd.Ecdf(x, wt, type='i/n', normwt=normwt, na.rm=FALSE)
x <- z$x; y <- z$ecdf
switch(what,
'1-F' = {y <- 1 - y},
'f' = {y <- y * sum(wt)},
'1-f' = {x <- x[-1]
y <- as.vector((1 - y[- length(y)]) * sum(wt)) } )
if(pl) {
if(i==1 && !add)
plot(x, y, xlab=xlab, ylab=ylab, xlim=xlim, type='n', log=log, ...)
lines(x, y, type="s", lty=lty[i], col=col[i], lwd=lwd[i])
if(subtitles && i == 1) {
pm <- paste("n:", n, " m:", m, sep="")
title(sub=pm, adj=0, cex=.5)
}
if(!missing(q)) {
if(what == '1-f') stop('what="1-f" not yet implemented with q')
if(what=='f') q <- q * y[length(y)]
else
if(what == '1-F') q <- 1 - q
q <- switch(what,
'f' = q * sum(wt),
'1-F' = 1 - q,
'F' = q)
a <- par("usr")
for(w in q) {
quant <-
if(what=='1-F') min(x[y <= w]) else min(x[y >= w])
lines(c(a[1], quant), c(w, w), lty=2, col=1)
lines(c(quant, quant), c(w, a[3]), lty=2, col=col[i])
}
}
}
curves[[i]] <- list(x=x, y=y)
if(datadensity!='none') {
if(!colspec)
dens.opts$col <- col[i]
do.call(switch(datadensity,
rug = 'scat1d',
hist = 'histSpike',
density= 'histSpike'),
c(list(x=xorig, add=TRUE),
if(datadensity=='density') list(type='density'),
dens.opts))
}
}
if(nlev > 1 && (is.list(label.curves) || label.curves))
labcurve(curves, type='s', lty=lty, col.=col, opts=label.curves)
invisible(structure(if(nlev==1) list(x = x, y = y) else curves,
N=list(n=n, m=m)))
}
Ecdf.data.frame <- function(x, group=rep(1, nrows),
weights=rep(1,nrows), normwt=FALSE,
label.curves=TRUE, n.unique=10, na.big=FALSE,
subtitles=TRUE, vnames=c("labels","names"),
...)
{
vnames <- match.arg(vnames)
mf <- par('mfrow')
if(length(mf) == 0)
mf <- c(1, 1)
g <- function(v, n.unique) {
if(is.character(v) || is.factor(v))
return(FALSE)
length(unique(v[!is.na(v)])) >= n.unique
}
use <- sapply(x, g, n.unique=n.unique)
automf <- FALSE
if((la <- sum(use)) > 1 & max(mf) == 1) {
mf <-
if(la<=4) c(2,2)
else if(la<=6) c(2,3)
else if(la<=9) c(3,3)
else if(la<=12)c(3,4)
else if(la<=16)c(4,4)
else c(4,5)
automf <- TRUE
}
oldmf <- par('mfrow')
par(mfrow=mf)
on.exit(par(oldmf))
nam <- names(x)
nrows <- nrow(x)
i <- 0
j <- 0
group <- as.factor(group)
for(j in (1 : length(x))[use]) {
v <- x[[j]]
i <- i + 1
lab <- if(vnames == 'names') nam[j] else
label(v, units=TRUE, plot=TRUE, default=nam[j])
z <- Ecdf(v, group=group, weights=weights, normwt=normwt,
xlab=lab, label.curves=label.curves,
subtitles=subtitles, ...)
if(na.big) {
m <- attr(z, 'N')$m
if(m > 0)
mtext(paste(m,"NAs"), line=-2, cex=1)
}
if(automf && interactive() &&
all(names(dev.list()) %nin% c('postscript','win.printer')) &&
(i %% prod(mf)==0)) {
cat("click left mouse button to proceed\n")
locator(1)
}
}
invisible(ceiling(sum(use) / prod(mf)))
}
prepanel.Ecdf <- function(x, y, fun, what, ...) {
xlim <- range(x, na.rm=TRUE)
l <- length(x[! is.na(x)])
ylim <- switch(what,
F = c(0, 1),
'1-F' = c(0, 1),
f = c(0, l),
'1-f' = c(0, l))
ylim <- fun(ylim)
if(any(is.infinite(ylim))) ylim <- fun(c(.001, .999))
list(xlim=xlim, ylim=ylim, dx=diff(xlim), dy=diff(ylim))
}
panel.Ecdf <- function(x, y, subscripts, groups=NULL,
q=NULL, type='s',
method=c('i/n','(i-1)/(n-1)','i/(n+1)'), fun,
what = c('F', '1-F', 'f', '1-f'),
label.curves=TRUE,
lwd = plot.line$lwd,
lty = plot.line$lty,
pch = plot.symbol$pch,
cex = plot.symbol$cex,
font= plot.symbol$font,
col = NULL, ...)
{
sRequire('lattice')
method <- match.arg(method)
what <- match.arg(what)
if(length(groups)) groups <- as.factor(groups)
type <- 's' # lattice histogram sets to 'percent'
g <- unclass(groups)[subscripts]
ng <- if(length(groups)) max(g, na.rm=TRUE) else 1
plot.symbol <- lattice::trellis.par.get(
if(ng > 1) "superpose.symbol" else "plot.symbol")
plot.line <- lattice::trellis.par.get(
if(ng > 1) "superpose.line" else "plot.line")
qrefs <- function(x, q, col, fun, llines, grid) {
quant <- quantile(x, probs=q, na.rm=TRUE)
a <- parGrid(grid)$usr
for(i in 1 : length(q)) {
llines(c(a[1], quant[i]), fun(c(q[i], q[i])), lty=2, col=1)
llines(c(quant[i], quant[i]), fun(c(q[i], a[3])), lty=2, col=col)
}
}
ppanel <- function(x, y, type, cex, pch, font, lwd, lty, col, q,
qrefs, ecdf.type, fun=fun, what,
datadensity=c('none','rug','hist','density'),
side=1,
frac=switch(datadensity,
none=NA,
rug=.03,
hist=.1,
density=.1),
dens.opts=NULL, llines, ...) {
## y ignored
z <- wtd.Ecdf(x, type=ecdf.type, na.rm=FALSE)
zx <- z$x
y <- z$ecdf
switch(what,
'1-F' = {y <- 1 - y},
'f' = {y <- y * length(x)},
'1-f' = {zx <- zx[-1]
y <- as.vector((1 - y[- length(y)]) * length(x)) } )
## For some reason S-Plus will not plot anything the following way
## when lwd is a variable
##llines(z$x, fun(z$ecdf), lwd = lwd, lty = lty, col = col,
## type = type, ...)
do.call(llines,
list(zx, fun(y), lwd = lwd, lty = lty, col = col,
type = type, ...))
if(length(q))
qrefs(x, q, col, fun=fun, llines=llines, grid=TRUE)
datadensity <- match.arg(datadensity)
if(datadensity != 'none') {
if(side %in% c(2,4))
stop('side must be 1 or 3 when datadensity is specified')
if('frac' %nin% names(dens.opts))
dens.opts$frac <- frac
if('side' %nin% names(dens.opts))
dens.opts$side <- side
if('col' %nin% names(dens.opts))
dens.opts$col <- col
if('lwd' %nin% names(dens.opts))
dens.opts$lwd <- lwd
do.call(switch(datadensity,
rug ='scat1d',
hist='histSpike',
density='histSpike'),
c(list(x=x, add=TRUE, grid=TRUE),
if(datadensity == 'density')
list(type='density'),
dens.opts))
}
} # end ppanel
pspanel <- function(x, subscripts, groups, type, lwd, lty,
pch, cex, font, col, q, qrefs,
ecdf.type, fun, what, llines, ...) {
## y ignored
lev <- levels(groups)
groups <- as.numeric(groups)[subscripts]
N <- seq(along = groups)
curves <- list()
for(i in 1:length(lev)) {
which <- N[groups == i]
## sort in x
j <- which # no sorting
if(any(j)) {
z <- wtd.Ecdf(x[j], type=ecdf.type, na.rm=FALSE)
zx <- z$x
y <- z$ecdf
switch(what,
'1-F' = {y <- 1 - y},
'f' = {y <- y * length(x[j])},
'1-f' = {zx <- zx[-1]
y <- as.vector((1 - y[- length(y)]) * length(x[j])) } )
do.call(llines,
list(zx, fun(y),
col = col[i], lwd = lwd[i], lty = lty[i],
type = type, ...))
if(length(q)) qrefs(x[j], q, col[i], fun=fun, llines=llines,
grid=TRUE)
curves[[lev[i]]] <- list(x=zx, y=fun(y))
}
}
curves
} # end pspanel
lty <- rep(lty, length = ng)
lwd <- rep(lwd, length = ng)
pch <- rep(pch, length = ng)
cex <- rep(cex, length = ng)
font <- rep(font,length = ng)
if(!length(col)) col <- plot.line$col
col <- rep(col, length = ng)
if(ng > 1) {
levnum <- sort(unique(g))
curves <- pspanel(x, subscripts, groups,
lwd=lwd, lty=lty, pch=pch, cex=cex,
font=font, col=col, type=type, q=q, qrefs=qrefs,
ecdf.type=method, fun=fun, what=what,
llines=lattice::llines)
if(!(is.logical(label.curves) && !label.curves)) {
lc <-
if(is.logical(label.curves))
list(lwd=lwd, cex=cex[1])
else
c(list(lwd=lwd, cex=cex[1]), label.curves)
labcurve(curves, lty=lty[levnum], lwd=lwd[levnum], col.=col[levnum],
opts=lc, grid=TRUE, ...)
}
}
else ppanel(x, lwd=lwd, lty=lty, pch=pch, cex=cex,
font=font, col=col, type=type, q=q, qrefs=qrefs,
ecdf.type=method, fun=fun, what=what, llines=lattice::llines, ...)
if(ng > 1) { ##set up for key() if points plotted
.Key <- function(x=0, y=1, lev, col, lty, lwd, ...)
{
oldpar <- par('usr', 'xpd')
par(usr=c(0,1,0,1),xpd=NA)
## Even though par('usr') shows 0,1,0,1 after lattice draws
## its plot, it still needs resetting
on.exit(par(oldpar))
if(is.list(x))
{
y <- x[[2]]; x <- x[[1]]
}
if(!length(x)) x <- 0
if(!length(y)) y <- 1 ## because of formals()
rlegend(x, y, legend=lev, lty=lty, lwd=lwd, col=col)
invisible()
}
formals(.Key) <- list(x=NULL, y=NULL, lev=levels(groups), col=col,
lty=lty, lwd=lwd,...=NULL)
.setKey(.Key)
}
}
Ecdf.formula <- function(x, data = sys.frame(sys.parent()),
groups = NULL,
prepanel=prepanel.Ecdf, panel=panel.Ecdf, ...,
xlab, ylab, fun=function(x)x,
what=c('F', '1-F', 'f', '1-f'),
subset=TRUE)
{
sRequire('lattice')
what <- match.arg(what)
vars <- all.vars(x)
xname <- vars[1]
if(missing(xlab))
xlab <- label(eval(parse(text=xname), data),
units=TRUE, plot=TRUE, default=xname, grid=TRUE)
if(missing(ylab))
ylab <-
if(missing(fun))
paste(switch(what,
F = 'Proportion <=',
'1-F' = 'Proportion >=',
'f' = 'Number <=',
'1-f' = 'Number >='), xname)
else ''
subset <- eval(substitute(subset), data)
lh <- lattice::histogram
do.call(lh,
c(list(x, data=data, prepanel=prepanel, panel=panel,
ylab=ylab, xlab=xlab, fun=fun, what=what),
if(!missing(groups))
list(groups=eval(substitute(groups), data)),
if(!missing(subset))
list(subset=subset),
list(...)))
}
Hmisc/R/summaryP.s 0000644 0001762 0000144 00000037546 14400366340 013536 0 ustar ligges users summaryP <- function(formula, data=NULL,
subset=NULL, na.action=na.retain,
sort=TRUE,
asna=c('unknown', 'unspecified'), ...) {
formula <- Formula::Formula(formula)
Y <- if(length(subset))
model.frame(formula, data=data, subset=subset, na.action=na.action)
else
model.frame(formula, data=data, na.action=na.action)
X <- Formula::model.part(formula, data=Y, rhs=1)
Y <- Formula::model.part(formula, data=Y, lhs=1)
nY <- NCOL(Y)
nX <- NCOL(X)
namY <- names(Y)
if(nX == 0) X <- data.frame(x=rep(1, NROW(Y)))
else {
## Remove observations with any values of X NA
i <- apply(is.na(X), 1, any)
if(any(i)) {
X <- X[! i,, drop=FALSE]
Y <- Y[! i,, drop=FALSE]
}
}
ux <- unique(X)
Z <- NULL
n <- nrow(X)
Lev <- character(0)
if(sort) {
## Compute marginal frequencies of all regular variables so can sort
mfreq <- list()
for(ny in namY) {
y <- Y[[ny]]
if(!inherits(y, 'ynbind') && !inherits(y, 'pBlock')) {
if(length(asna) && (is.factor(y) || is.character(y)))
y[y %in% asna] <- NA
freq <- table(y)
counts <- as.numeric(freq)
names(counts) <- names(freq)
mfreq[[ny]] <- - sort(- counts)
}
}
}
## Save combinations of var and val to exclude if exclude1 is used in
## a function that operates on summaryP result
ylevels.to.exclude1 <- NULL
for(ny in namY) {
y <- Y[[ny]]
la <- label(y)
if(la == '') la <- ny
tab <- table(y)
tab <- structure(as.numeric(tab), names=names(tab))
if(length(tab) == 2)
ylevels.to.exclude1 <-
rbind(ylevels.to.exclude1,
data.frame(var=la, val=names(tab)[which.max(tab)]))
}
for(i in 1 : nrow(ux)) {
j <- rep(TRUE, n)
if(nX > 0) for(k in 1 : nX) j <- j & (X[[k]] == ux[i, k])
for(k in 1 : nY) {
## y <- yx[[k]] doesn't work as attributes lost by [.data.frame
y <- Y[[k]]
y <- if(is.matrix(y)) y[j,, drop=FALSE] else y[j]
# y <- (Y[[k]])[j,, drop=FALSE]
if(inherits(y, 'ynbind') || inherits(y, 'pBlock')) {
overlab <- attr(y, 'label')
labs <- attr(y, 'labels')
z <- NULL
for(iy in 1 : ncol(y)) {
tab <- table(y[, iy])
no <- as.numeric(sum(tab))
if(inherits(y, 'ynbind')) {
d <- data.frame(var=overlab,
val=labs[iy],
freq=as.numeric(tab['TRUE']),
denom=no)
Lev <- c(Lev, as.character(labs[iy]))
} else {
d <- data.frame(var=overlab,
val=names(tab), # paste(labs[iy], names(tab)),
freq=as.numeric(tab),
denom=no)
Lev <- c(Lev, names(tab))
}
z <- rbind(z, d)
}
}
else { # regular single column
if(length(asna) && (is.factor(y) || is.character(y)))
y[y %in% asna] <- NA
tab <- table(y)
ny <- namY[k]
la <- label(y)
if(la == '') la <- ny
lev <- names(tab)
## mf <- mfreq[[ny]]
no <- as.numeric(sum(tab))
if(sort) lev <- reorder(lev, (mfreq[[ny]])[lev])
Lev <- c(Lev, as.character(lev))
z <- data.frame(var = unname(la),
val = lev,
freq = as.numeric(tab),
denom = no,
stringsAsFactors=TRUE)
}
## Add current X subset settings
if(nX > 0)
for(k in 1: nX)
z[[names(ux)[k]]] <- if(is.character(ux[i,k]))
factor(ux[i, k]) else ux[i,k]
Z <- rbind(Z, z)
}
}
Z$val <- factor(Z$val, levels=unique(Lev))
yl <- ylevels.to.exclude1
iex <- integer(0)
if(length(yl)) {
for(i in 1 : nrow(Z)) {
exi <- FALSE
for(j in 1 : nrow(yl))
if(as.character(Z$var[i]) == as.character(yl$var[j]) &&
as.character(Z$val[i]) == as.character(yl$val[j])) exi <- TRUE
if(exi) iex <- c(iex, i)
}
}
structure(Z, class=c('summaryP', 'data.frame'), formula=formula,
nX=nX, nY=nY, rows.to.exclude1=iex)
}
plot.summaryP <-
function(x, formula=NULL, groups=NULL,
marginVal=NULL, marginLabel=marginVal, refgroup=NULL,
exclude1=TRUE, xlim=c(-.05, 1.05), text.at=NULL,
cex.values=0.5,
key=list(columns=length(groupslevels),
x=.75, y=-.04, cex=.9,
col=lattice::trellis.par.get('superpose.symbol')$col,
corner=c(0,1)),
outerlabels=TRUE, autoarrange=TRUE, col=colorspace::rainbow_hcl,
...)
{
sRequire('lattice')
sRequire('latticeExtra')
## marginval: category name indicating addMarginal summaries (usually 'All')
## marginLabel: a fuller label for this, e.g. 'All Regions'
X <- x
at <- attributes(x)
Form <- at$formula
nX <- at$nX
nY <- at$nY
groupslevels <- if(length(groups)) levels(x[[groups]])
condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups))
## Reorder condvar in descending order of number of levels
numu <- function(x) if(is.factor(x)) length(levels(x))
else length(unique(x[! is.na(x)]))
if(exclude1 && length(at$rows.to.exclude1))
X <- X[- at$rows.to.exclude1, , drop=FALSE]
if(autoarrange && length(condvar) > 1) {
nlev <- sapply(X[condvar], numu)
condvar <- condvar[order(nlev)]
}
if(grType() == 'plotly') {
condvar <- setdiff(condvar, 'var')
if(length(condvar) > 1)
stop('with options(grType="plotly") does not handle > 1 stratification variable')
if(length(condvar) == 1 && length(marginVal)) {
X$Big <- X[[condvar]] == marginVal
if(marginLabel != marginVal)
X[[condvar]] <- ifelse(X$Big, marginLabel, as.character(X[[condvar]]))
}
X$.gg. <-
if(length(groups)) {
if(length(condvar) == 1 && length(marginVal))
ifelse(X$Big, as.character(X[[groups]]),
paste0(X[[groups]], ' stratified by ', condvar[1]))
else
X[[groups]]
}
p <-
with(X,
dotchartpl(freq / denom,
major = var,
minor = val,
group = if(length(groups)) .gg.,
mult = if(length(condvar) > 0) X[[condvar]],
big = if(length(condvar) == 1 && length(marginVal)) Big,
num = freq,
denom = denom,
refgroup = refgroup,
xlim = xlim,
col = col,
nonbigtracename=if(length(condvar))
paste0('Stratified by\n', condvar[1])
else
'Stratified Estimates',
...)
)
return(p)
}
form <- if(length(formula)) formula
else as.formula(
paste('val ~ freq',
paste(condvar, collapse=' * '), sep=' | '))
pan <- function(x, y, subscripts, groups=NULL, ...) {
y <- as.numeric(y)
denom <- X$denom[subscripts]
lattice::panel.dotplot(x/denom, y, subscripts=subscripts,
groups=groups, ...)
if(length(cex.values) && cex.values > 0) {
col <- if(length(groups)) lattice::trellis.par.get('superpose.symbol')$col
else lattice::trellis.par.get('dot.symbol')$col
longest.string <- paste(max(x), max(denom), sep='/ ')
length.longest <- unit(1, 'strwidth', longest.string)
xpos <- unit(1, 'npc') - unit(1, 'mm')
txt <- if(length(groups)) {
groups <- groups[subscripts]
tx <- ''
ig <- 0
xpos <- xpos - length(levels(groups)) * length.longest
for(g in levels(groups)) {
ig <- ig + 1
i <- groups == g
fr <- paste(x[i], denom[i], sep='/')
xpos <- xpos + length.longest
grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'),
just=c('right','top'), gp=gpar(cex=cex.values, col=col[ig]))
}
}
else {
fr <- paste(x, denom, sep='/')
grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'),
gp=gpar(cex=cex.values, col=col[1]), just=c('right','top'))
}
}
}
scal <- list(y='free', rot=0)
scal$x <- if(length(text.at)) {
at <- pretty(xlim)
list(limits=range(c(xlim, text.at)), at=at[at >= -0.0001 & at <= 1.0001])
} else list(limits=xlim)
d <- if(!length(groups))
lattice::dotplot(form, data=X, scales=scal, panel=pan,
xlab='Proportion', ...)
else eval(parse(text=
sprintf("lattice::dotplot(form, groups=%s, data=X, scales=scal, panel=pan, auto.key=key, xlab='Proportion', ...)", groups) ))
# if(outerlabels && ((nX - length(groups) + 1 == 2) ||
# length(dim(d)) == 2)) d <- useOuterStrips(d)
if(length(dim(d)) == 2) d <- latticeExtra::useOuterStrips(d)
## Avoid wasting space for vertical variables with few levels
if(condvar[length(condvar)] == 'var') {
vars <- levels(X$var)
nv <- length(vars)
h <- integer(nv)
for(i in 1 : nv) h[i] <- length(unique((X$val[X$var == vars[i]])))
d <- latticeExtra::resizePanels(d, h = h + 1)
}
d
}
ggplot.summaryP <-
function(data, mapping, groups=NULL, exclude1=TRUE, xlim=c(0, 1),
col=NULL, shape=NULL, size=function(n) n ^ (1/4),
sizerange=NULL, abblen=5,
autoarrange=TRUE, addlayer=NULL, ..., environment)
{
X <- data
class(X) <- setdiff(class(X), 'summaryP')
at <- attributes(X)
Form <- at$formula
nX <- at$nX
nY <- at$nY
groupslevels <- if(length(groups)) levels(X[[groups]])
condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups))
## Reorder condvar in descending order of number of levels
numu <- function(x) if(is.factor(x)) length(levels(x))
else length(unique(x[! is.na(x)]))
if(exclude1 && length(at$rows.to.exclude1))
X <- X[- at$rows.to.exclude1, , drop=FALSE]
if(autoarrange && length(condvar) > 1) {
nlev <- sapply(X[condvar], numu)
condvar <- condvar[order(nlev)]
}
## Find list of variables that contain only one level but have a
## variable name longer than abblen characters.
## The space devoted
## to one-level variables is not tall enough to print the variable name.
## Replace the name with (1) (2) ... and put the variable names possibly
## in a footnote
fnvar <- ''
lvar <- levels(X$var)
i <- 0
for(v in lvar) {
maxlen <- nchar(v) # max(nchar(strsplit(v, split=' ')[[1]]))
if(maxlen > abblen) {
nlev <- length(unique(X$val[X$var == v]))
if(nlev == 1) {
i <- i + 1
w <- paste('(', i, ')', sep='')
if(i > 1) fnvar <- paste(fnvar, '; ', sep='')
fnvar <- paste(fnvar, w, ' ', v, sep='')
levels(X$var)[levels(X$var) == v] <- w
}
}
}
spl <- function(x) {
u <- levels(x)
n <- length(u)
utrans <- character(n); names(utrans) <- u
for(w in u)
utrans[w] <- paste(strwrap(w, 10), collapse='\n')
factor(x, u, utrans)
}
X$var <- spl(X$var)
if(length(condvar) == 2) {
othvar <- setdiff(condvar, 'var')
X[[othvar]] <- spl(X[[othvar]])
}
N <- X$denom
rN <- range(N)
ratioN <- rN[2] / rN[1]
if(diff(rN) < 10 | (ratioN < 1.2)) size <- NULL
## plotly hover label
X$hov <- paste0(round(X$freq / X$denom, 3), ' ',
markupSpecs$html$frac(X$freq, X$denom, size=90))
if(length(groups)) X$hov <- paste0(X[[groups]], ' ', X$hov)
k <- 'ggplot(X, aes(x=freq / denom, y=val, text=hov'
if(length(groups)) k <- paste(k, sprintf(', color=%s, shape=%s',
groups, groups))
k <- paste(k, '))')
if(length(size)) {
k <- paste(k,
if(length(size)) 'geom_point(aes(size = N))' else
'geom_point()',
sep=' + ')
Ns <- X$denom
if(! length(sizerange)) {
fn <- if(is.function(size)) size else sqrt
sizerange <- c(max(0.7, 2.7 / fn(ratioN)), 3.25)
}
if(is.function(size)) {
X$N <- size(Ns)
Ns0 <- Ns[Ns > 0]
uN <- unique(sort(Ns0))
Nbreaks <- if(length(uN) < 8) uN else
unique(round(quantile(Ns0, (0 : 6) / 6, type=1)))
Nbreakst <- size(Nbreaks)
k <- paste(k,
'scale_size_continuous(breaks=Nbreakst, labels=format(Nbreaks), range=sizerange)', sep=' + ')
}
else {
k <- paste(k, 'scale_size_discrete(range = sizerange)', sep=' + ')
X$N <- cut2(Ns, g=size)
}
}
else k <- paste(k, 'geom_point()', sep=' + ')
p <- eval(parse(text=k))
if(length(addlayer)) p <- p + addlayer
if('var' %nin% condvar) stop('program logic error')
if(length(condvar) == 1)
p <- p + facet_grid(var ~ . , scales='free_y', space='free_y')
else {
p <- p + facet_grid(as.formula(sprintf('var ~ %s', othvar)),
scales='free_y', space='free_y')
}
p <- p + xlim(xlim) + xlab('Proportion') + ylab('')
if(length(col)) p <- p + scale_color_manual(values=col)
if(length(shape)) p <- p + scale_shape_manual(values=shape)
if(fnvar != '') attr(p, 'fnvar') <- fnvar
p
}
latex.summaryP <- function(object, groups=NULL, exclude1=TRUE, file='', round=3,
size=NULL, append=TRUE, ...) {
class(object) <- 'data.frame'
rte <- attr(object, 'rows.to.exclude1')
if(exclude1 && length(rte))
object <- object[- rte, , drop=FALSE]
if(! append) cat('', file=file)
p <- ifelse(object$denom == 0, '',
format(round(object$freq / object$denom, round)))
object$y <- paste(p, ' {\\scriptsize$\\frac{',
format(object$freq), '}{', format(object$denom),
'}$}', sep='')
object$freq <- object$denom <- NULL
stratvar <- setdiff(names(object), c('var', 'val', 'y', groups))
svar <- if(! length(stratvar)) as.factor(rep('', nrow(object)))
else {
if(length(stratvar) == 1) object[[stratvar]]
else do.call('interaction', list(object[stratvar], sep=' '))
}
object$stratvar <- svar
object <- object[, c('var', 'val', 'y', groups, 'stratvar')]
nl <- 0
slev <- levels(svar)
nslev <- length(slev)
for(i in 1 : nslev) {
if(nslev > 1) cat('\n\\vspace{1ex}\n\\begin{minipage}{\\linewidth}\n\\textbf{', slev[i],
'}\n\\vspace{1ex}\n\n', sep='', file=file, append=TRUE)
x <- object[svar == slev[i], colnames(object) != 'stratvar']
if(length(groups)) {
ord <- function(v) {
v <- as.character(v)
un <- unique(v)
vn <- 1 : length(un)
names(vn) <- un
vn[v]
}
varn <- ord(x$var)
valn <- ord(x$val)
r <- reshape(x, timevar=groups, direction='wide',
idvar=c('var', 'val'))
## reorder rows to be in original order
ir <- order(varn[as.character(r$var)],
valn[as.character(r$val)])
r <- r[ir, ]
## reshape does not respect order of levels of factors; reorder columns
lev <- levels(x[[groups]])
r <- r[c('var', 'val', paste('y', lev, sep='.'))]
nl <- length(lev)
w <- latex(r[colnames(r) != 'var'],
table.env=FALSE, file=file, append=TRUE,
rowlabel='', rowname=rep('', nrow(r)),
rgroup=levels(r$var), n.rgroup=as.vector(table(r$var)),
size=size,
colheads=c(' ', lev),
center='none')
}
else {
w <- latex(x[colnames(x) != 'var'],
table.env=FALSE, file=file, append=TRUE,
rowlabel='', rowname=rep('', nrow(x)),
rgroup=levels(x$var), n.rgroup=as.vector(table(x$var)),
size=size, colheads=c(' ', ' '), center='none')
}
if(nslev > 1) cat('\\end{minipage}\n', file=file, append=TRUE)
}
attr(w, 'ngrouplevels') <- nl
attr(w, 'nstrata') <- nslev
w
}
Hmisc/R/princmp.r 0000644 0001762 0000144 00000023222 14764627163 013372 0 ustar ligges users ##' Enhanced Output for Principal and Sparse Principal Components
##'
##' Expands any categorical predictors into indicator variables, and calls `princomp` (if `method='regular'` (the default)) or `sPCAgrid` in the `pcaPP` package (`method='sparse'`) to compute lasso-penalized sparse principal components. By default all variables are first scaled by their standard deviation after observations with any `NA`s on any variables in `formula` are removed. Loadings of standardized variables, and if `orig=TRUE` loadings on the original data scale are printed. If `pl=TRUE` a scree plot is drawn with text added to indicate cumulative proportions of variance explained. If `sw=TRUE`, the `leaps` package `regsubsets` function is used to approximate the PCs using forward stepwise regression with the original variables as individual predictors.
##'
##' A `print` method prints the results and a `plot` method plots the scree plot of variance explained.
##' @title princmp
##' @param formula a formula with no left hand side, or a numeric matrix
##' @param data a data frame or table. By default variables come from the calling environment.
##' @param method specifies whether to use regular or sparse principal components are computed
##' @param k the number of components to plot, display, and return
##' @param kapprox the number of components to approximate with stepwise regression when `sw=TRUE`
##' @param cor set to `FALSE` to compute PCs on the original data scale, which is useful if all variables have the same units of measurement
##' @param sw set to `TRUE` to run stepwise regression PC prediction/approximation
##' @param nvmax maximum number of predictors to allow in stepwise regression PC approximations
##' @return a list of class `princmp` with elements `scores`, a k-column matrix with principal component scores, with `NA`s when the input data had an `NA`, and other components useful for printing and plotting. If `k=1` `scores` is a vector. Other components include `vars` (vector of variances explained), `method`, `k`.
##' @author Frank Harrell
##' @export
##' @md
princmp <- function(formula, data=environment(formula),
method=c('regular', 'sparse'),
k=min(5, p - 1), kapprox=min(5, k),
cor=TRUE, sw=FALSE, nvmax=5) {
method <- match.arg(method)
if(method == 'sparse')
if(! requireNamespace('pcaPP', quietly=TRUE))
stop('You must install the pcaPP package to use method="sparse"')
isf <- inherits(formula, 'formula')
X <- if(isf) model.matrix.lm(formula, data) else as.matrix(formula)
if(isf) X <- X[, -1, drop=FALSE] # remove intercept
p <- ncol(X)
res <- list(n=nrow(X), ndata=if(! missing(data)) nrow(data),
method=method, k=k, kapprox=kapprox, cor=cor, nvmax=nvmax)
g <- switch(method,
regular = stats::princomp(X, cor=cor),
sparse = pcaPP::sPCAgrid(X, k=ncol(X) - 1, method='sd',
center=mean,
scale=if(cor) sd else function(x) 1.,
scores=TRUE, maxiter=10) )
co <- unclass(g$loadings)
type <- switch(method, regular='', sparse='Sparse ')
res$scoef <- co
res$scale <- g$scale
p <- ncol(co)
sds <- g$scale
co <- co / matrix(rep(sds, p), nrow=length(sds))
res$coef <- co
vars <- g$sdev^2
res$vars <- vars
if(sw) {
if(! requireNamespace('leaps', quietly=TRUE))
stop('You must install the leaps package when sw=TRUE')
swa <- list()
for(j in 1 : kapprox) {
fchar <- capture.output( # don't allow regular output
f <- leaps::regsubsets(X, g$scores[, j], method='forward',
nbest=1, nvmax=min(p - 1, nvmax)))
debug <- getOption('princmpdebug', FALSE)
if(debug) cat('Debugging turned on\n')
if(j == 1 && debug)
saveRDS(list(X=X, g=g, f=f), file='/tmp/princmp.rds')
s <- summary(f)
w <- s$which[, -1, drop=FALSE] # omit intercept
xnm <- colnames(w)
xadded <- character(0)
for(l in 1 : nrow(w)) {
varnow <- xnm[w[l,]]
varnew <- setdiff(varnow, xadded)
xadded <- c(xadded, varnew)
}
rsq <- structure(s$rsq, names=xadded)
l <- which(rsq >= 0.999)
if(length(l)) rsq <- rsq[1 : min(l)]
swa[[j]] <- rsq
}
res$sw <- swa
}
## Use predict method with newdata to ensure that PCs NA when
## original variables NA
## See https://stackoverflow.com/questions/5616210
if(isf) X <- model.matrix.lm(formula, data, na.action=na.pass)
pcs <- predict(g, newdata=X)
pcs <- pcs[, 1 : min(k, ncol(pcs)), drop=FALSE]
res$scores <- pcs
class(res) <- 'princmp'
res
}
##' Print Results of princmp
##'
##' Simple print method for [princmp()]
##' @title print.princmp
##' @param x results of `princmp`
##' @param which specifies which loadings to print, the default being `'none'` and other values being `'standardized'`, `'original'`, or `'both'`
##' @param k number of components to show, defaults to `k` specified to `princmp`
##' @param ... unused
##' @return nothing
##' @author Frank Harrell
##' @export
##' @md
print.princmp <- function(x,
which=c('none', 'standardized', 'original', 'both'),
k=x$k, ...) {
which <- match.arg(which)
prz <- function(x, m) {
x <- format(round(x, m), zero.print=FALSE)
print(x, quote=FALSE)
}
method <- x$method
type <- switch(method,
regular = '',
sparse = 'Sparse ')
cat(type, 'Principal Components Analysis\n', sep='')
if(length(x$ndata) && x$ndata != x$n)
cat('Used', x$n, 'observations with no NAs out of', x$ndata, '\n')
co <- x$scoef
if(which %in% c('standardized', 'both')) {
cat('\n', type, 'PC Coefficients of Standardized Variables\n', sep='')
prz(co[, 1 : k], 3)
}
p <- ncol(x$coef)
if(which %in% c('original', 'both')) {
sds <- x$scale
cat('\n', type, 'PC Coefficients of Original Variables\n', sep='')
co <- co / matrix(rep(sds, p), nrow=length(sds))
prz(co[, 1 : k], 5)
}
sw <- x$sw
lsw <- length(sw)
if(lsw) {
cat('\nStepwise Approximations to PCs With Cumulative R^2\n')
for(j in 1 : lsw) {
cat('\nPC', j, '\n')
rsq <- sw[[j]]
fw <- character(0)
xadded <- names(rsq)
for(l in 1 : length(rsq))
fw <- paste0(fw, if(l > 1) ' + ', xadded[l],
' (', round(rsq[l], 3), ')')
cat(strwrap(fw), sep='\n')
}
}
invisible()
}
##' Plot Method for princmp
##'
##' Uses base graphics to by default plot the scree plot from a [princmp()] result, showing cumultive proportion of variance explained. Alternatively the standardized PC loadings are shown in a `ggplot2` bar chart.
##' @title plot.princmp
##' @param x results of `princmp`
##' @param which '`scree`' or '`loadings'`
##' @param k number of components to show, default is `k` specified to `princmp`
##' @param offset controls positioning of text labels for cumulative fraction of variance explained
##' @param col color of plotted text in scree plot
##' @param adj angle for plotting text in scree plot
##' @param ylim y-axis scree plotting limits, a 2-vector
##' @param add set to `TRUE` to add a line to an existing scree plot without drawing axes
##' @param abbrev an integer specifying the variable name length above which names are passed through [abbreviate(..., minlength=abbrev)]
##' @param nrow number of rows to use in plotting loadings. Defaults to the `ggplot2` `facet_wrap` default.
##' @param ... unused
##' @return `ggplot2` object if `which='loadings'`
##' @export
##' @author Frank Harrell
plot.princmp <- function(x, which=c('scree', 'loadings'),
k=x$k, offset=0.8, col=1, adj=0,
ylim=NULL, add=FALSE, abbrev=25, nrow=NULL,
...) {
which <- match.arg(which)
if(which == 'scree') {
vars <- x$vars
cumv <- cumsum(vars) / sum(vars)
if(add) lines(1:k, vars[1:k], type='b', col=col)
else {
plot(1:k, vars[1:k], type='b', ylab='Variance',
xlab=if(k <= 10) '' else 'Component',
axes=FALSE, col=col, ylim=ylim)
axis(1, at=1:k,
labels = if(k > 10) 1 : k else
as.expression(sapply(1 : k, function(x) bquote(PC[.(x)]))))
axis(2)
}
text(1:k, vars[1:k] + offset * par('cxy')[2],
as.character(round(cumv[1:k], 2)),
srt=45, adj=adj, cex=.65, xpd=NA, col=col)
return(invisible())
}
co <- x$scoef[, 1 : k, drop=FALSE]
xname <- abbreviate(rownames(co), minlength=abbrev)
p <- length(xname)
b <- as.vector(co)
r <- range(abs(b))
sub <- paste0('Range of |loading|:', round(r[1], 3),
' - ', round(r[2], 3))
d <- data.frame(b = abs(b),
y = as.vector(p + 1 - row(co)),
comp = factor(as.vector(col(co)),
levels=1 : k,
labels=paste0('PC[', 1 : k, ']')),
x = 1,
sign = ifelse(b >= 0., '+', '-'))
g <- ggplot(d, aes(x=.data$x, y=.data$y, color=.data$sign)) +
geom_segment(aes(x=.data$x, y=.data$y, xend=.data$x + 0.9 * .data$b / r[2], yend=.data$y),
size=3) +
facet_wrap(~ .data$comp, labeller='label_parsed', nrow=nrow) +
scale_x_continuous(breaks = 1 : k,
labels=as.expression(sapply(1 : k,
function(x) bquote(PC[.(x)])))) +
scale_y_continuous(breaks = 1 : p, labels=rev(xname)) +
guides(x='none', color = guide_legend(title='')) +
xlab('Standardized Loading (Absolute)') + ylab('') + labs(caption=sub) +
theme(legend.position='bottom')
g
}
Hmisc/R/bootkm.s 0000644 0001762 0000144 00000002042 14400367343 013177 0 ustar ligges users bootkm <- function(S, q=.5, B=500, times, pr=TRUE)
{
sRequire('survival')
tthere <- !missing(times)
if(tthere && length(times)>1)
stop('presently bootkm only works for a single time')
S <- S[!is.na(S),]
n <- nrow(S)
stratvar <- factor(rep(1,nrow(S)))
f <- survival::survfitKM(stratvar, S)
tt <- c(0, f$time)
ss <- c(1, f$surv)
if(!tthere) {
if(ss[length(ss)] > q)
stop(paste('overall Kaplan-Meier estimate does not fall below',q))
} else {
if(tt[length(tt)] < times)
stop(paste('overall Kaplan-Meier estimate not defined to time',times))
}
ests <- double(B)
for(i in 1:B) {
if(pr && (i %% 10)==0)
cat(i,'\r')
f <- survival::survfitKM(stratvar, S[sample(n,n,replace=TRUE),],
se.fit=FALSE, conf.type='none')
tt <- c(0, f$time)
ss <- c(1, f$surv)
ests[i] <- if(tthere)
approx(tt, ss, xout=times, method='constant', f=0)$y
else
min(tt[ss <= q]) #is NA if none
}
if(pr)
cat('\n')
ests
}
Hmisc/R/AFirst.lib.s 0000644 0001762 0000144 00000000602 12260042135 013630 0 ustar ligges users ## $Id$
.noGenenerics <- TRUE # faster loading as new methods not used
.onAttach <- function(libname, pkgname, ...) {
verbose <- .Options$Hverbose
if(length(verbose) && verbose)
packageStartupMessage("Hmisc library by Frank E Harrell Jr\n\n",
"Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview')\n",
"to see overall documentation.\n")
invisible()
}
Hmisc/R/meltData.r 0000644 0001762 0000144 00000004727 14434136224 013451 0 ustar ligges users ##' Melt a Dataset To Examine All Xs vs Y
##'
##' Uses a formula with one or more left hand side variables (Y) and one or more right hand side variables (X). Uses [data.table::melt()] to melt `data` so that each X is played against the same Y if `tall='right'` (the default) or each Y is played against the same X combination if `tall='left'`. The resulting data table has variables Y with their original names (if `tall='right'`) or variables X with their original names (if `tall='left'`), `variable`, and `value`. By default `variable` is taken as `label()`s of the `tall` variables.
##' @title meltData
##' @param formula a formula
##' @param data data frame or table
##' @param tall see above
##' @param vnames set to `names` to always use variable names instead of labels for X
##' @param sepunits set to `TRUE` to create a separate variable `Units` to hold units of measurement. The variable is not created if no original variables have a non-blank `units` attribute.
##' @param ... passed to `label()`
##' @return data table
##' @author Frank Harrell
##' @md
##' @seealso [label()]
##' @examples
##' d <- data.frame(y1=(1:10)/10, y2=(1:10)/100, x1=1:10, x2=101:110)
##' label(d$x1) <- 'X1'
##' units(d$x1) <- 'mmHg'
##' m=meltData(y1 + y2 ~ x1 + x2, data=d, units=TRUE) # consider also html=TRUE
##' print(m)
##' m=meltData(y1 + y2 ~ x1 + x2, data=d, tall='left')
##' print(m)
meltData <- function(formula, data, tall=c('right', 'left'),
vnames=c('labels', 'names'), sepunits=FALSE, ...) {
tall <- match.arg(tall)
vnames <- match.arg(vnames)
s <- data.table::copy(data)
if(! is.data.table(s)) data.table::setDT(s)
v <- all.vars(formula)
k <- as.character(formula)
y <- all.vars(as.formula(paste('~', k[2])))
x <- all.vars(as.formula(paste('~', k[3])))
s <- s[, ..v]
labs <- sapply(s, label, ...)
labs <- ifelse(labs == '', names(labs), labs)
Units <- sapply(s, Hmisc::units)
id <- switch(tall,
right = y,
left = x)
## data.table wants all variables to be melted to have the same type
for(n in setdiff(v, id)) {
z <- s[[n]]
if(! is.factor(z)) set(s, j=n, value=as.double(z))
}
m <- data.table::melt(s, id.var=id)
if(sepunits && any(Units != ''))
m[, Units := Units[as.character(variable)]]
if(vnames == 'labels')
m[, variable := factor(labs[as.character(variable)],
levels=labs[levels(variable)]) ]
m
}
utils::globalVariables(c('..v','.SD','variable'))
Hmisc/R/symbol.freq.s 0000644 0001762 0000144 00000006300 12243661443 014147 0 ustar ligges users ## marginals applies only to symbol="therm", orig.scale to symbol="circle"
symbol.freq <- function(x, y, symbol=c("thermometer","circle"),
marginals=FALSE, orig.scale=FALSE,
inches=.25, width=.15, subset, srtx=0, ...)
{
symbol <- match.arg(symbol)
if(missing(subset))
subset <- rep(TRUE, length(x))
if(!is.logical(subset)) {
s <- rep(FALSE,length(x))
s[subset] <- FALSE
subset <- s
}
xlab <- attr(x,'label')
if(!length(xlab))
xlab <- as.character(substitute(x))
ylab <- attr(y,'label')
if(!length(ylab))
ylab <- as.character(substitute(y))
s <- !(is.na(x) | is.na(y)) & subset
x <- x[s]
y <- y[s]
f <- table(x, y)
dx <- dimnames(f)[[1]]
dy <- dimnames(f)[[2]]
if(orig.scale)
xp <- as.numeric(dimnames(f)[[1]])
else
xp <- 1:length(dimnames(f)[[1]])
xp1 <- length(xp)+1
if(orig.scale)
yp <- as.numeric(dimnames(f)[[2]])
else
yp <- 1:length(dimnames(f)[[2]])
yp1 <- length(yp)+1
m <- nrow(f) * ncol(f)
xx <- single(m)
yy <- single(m)
zz <- single(m)
k <- 0
for(i in 1:nrow(f)) {
for(j in 1:ncol(f)) {
k <- k + 1
xx[k] <- xp[i]
yy[k] <- yp[j]
if(f[i, j] > 0)
zz[k] <- f[i, j]
else zz[k] <- NA
}
}
maxn <- max(f)
n <- 10^round(log10(maxn))
if(marginals) {
xx <- c(xx, rep(xp1, length(yp)))
yy <- c(yy, yp)
zz <- c(zz, table(y)/2)
xx <- c(xx, xp)
yy <- c(yy, rep(yp1, length(xp)))
zz <- c(zz, table(x)/2)
xx <- c(xx, xp1)
yy <- c(yy, yp1)
zz <- c(zz, n)
}
if(symbol=="circle") {
## zz <- inches*sqrt(zz/maxn)
zz <- sqrt(zz)
if(orig.scale)
symbols(xx,yy,circles=zz,inches=inches,
smo=.02,xlab=xlab,ylab=ylab,...)
else
symbols(xx,yy,circles=zz,inches=inches,smo=.02,
xlab=xlab,ylab=ylab,axes=FALSE,...)
title(sub=paste("n=",sum(s),sep=""),adj=0)
if(marginals) {
axis(1, at = 1:xp1,
labels = c(dx, "All/2"), srt=srtx,
adj=if(srtx>0)1
else .5)
axis(2, at = 1:yp1,
labels = c(dy, "All/2"),adj=1)
} else { # if(!orig.scale) {
axis(1, at=xp, labels=dx, srt=srtx,
adj=if(srtx>0)1
else .5)
axis(2, at=yp, labels=dy)
}
return(invisible())
}
zz <- cbind(rep(width,length(zz)), inches*zz/maxn, rep(0,length(zz)))
symbols(xx,yy,thermometers=zz,inches=FALSE,
axes=FALSE,xlab=xlab,ylab=ylab,...)
title(sub=paste("n=",sum(s),sep=""),adj=0)
if(marginals) {
text(xp1-width, yp1, n, adj=1, cex=.5)
axis(1, at = 1:xp1,
labels = c(dx, "All/2"), srt=srtx,
adj=if(srtx>0)1
else .5)
axis(2, at = 1:yp1,
labels = c(dy, "All/2"),adj=1)
abline(h=yp1-.5, lty=2)
abline(v=xp1-.5, lty=2)
} else {
axis(1, at=xp, labels=dx, srt=srtx,
adj=if(srtx>0)1
else .5)
axis(2, at=yp, labels=dy)
cat("click left mouse button to position legend\n")
xy <- locator(1)
symbols(xy$x, xy$y, thermometers=cbind(width,inches*n/maxn,0),
inches=FALSE,add=TRUE,xlab=xlab,ylab=ylab)
text(xy$x-width, xy$y, n,adj=1,cex=.5)
}
box()
invisible()
}
Hmisc/R/reformM.r 0000644 0001762 0000144 00000003004 14400717433 013310 0 ustar ligges users # Function to automatically reorder variables for use with aregImpute
# Modification of function written by Yong Hao Pua
# Specify nperm=integer to obtain that many formulae with random
# permutation of order of variables; omit nperm to obtain formula
# with variables sorted by decreasing number of NAs
reformM <- function(formula, data, nperm) {
tlabels <- attr(terms(formula), "term.labels") ## minor tweak!
cs <- all.vars(formula)
data <- data[, cs]
ismiss <- function(x) if(is.character(x)) is.na(x) | x=='' else is.na(x)
m <- sapply(data, ismiss)
miss.per.obs <- apply(m, 1, sum)
miss.per.var <- apply(m, 2, sum)
# Percent of observations with any missings:
pm <- ceiling(100 * sum(miss.per.obs > 0) / length(miss.per.obs))
nimpute = max(5, pm)
cat("Recommended number of imputations:", nimpute, "\n")
if(missing(nperm)) {
j <- order(miss.per.var, decreasing=TRUE) # var with highest NA freq first
formula <- as.formula(paste('~', paste(tlabels[j], collapse=' + ')))
}
else {
formula <- list()
prev <- character(0)
# Could add logic to sample() until permutation is different from
# all previous permutations
for(i in 1 : nperm) {
## Try up to 10 times to get a unique permutation
for(j in 1 : 10) {
f <- paste('~', paste(sample(tlabels), collapse=' + '))
if(f %nin% prev) {
prev <- c(prev, f)
formula[[i]] <- as.formula(f)
break
}
}
}
if(nperm == 1) formula <- formula[[1]]
}
formula
}
Hmisc/R/bystats.s 0000644 0001762 0000144 00000016275 12250442003 013376 0 ustar ligges users bystats <- function(y, ..., fun, nmiss, subset)
{
## Fri, 16 Sep 2005 - Shawn@ori.org removed left argument to
## interaction
x <- interaction(..., drop=TRUE, sep=" ")
l <- levels(x)
if(any(is.na(x))) {
l <- c(l, "NA")
attr(x,"class") <- NULL
x[is.na(x)] <- length(l)
levels(x) <- l
attr(x,'class') <- "factor"
}
y <- as.matrix(y)
if(!missing(subset)) {
x <- x[subset]
y <- y[subset,,drop=FALSE]
}
if(missing(fun)) {
fun <- function(y) apply(y, 2, mean)
r <- range(y, na.rm=TRUE)
uy <- unique(y[!is.na(y)]) #fixed 1Jun95, 16Mar96
funlab <- if(length(uy)==2 && r[1]==0 & r[2]==1)
"Fraction"
else
"Mean"
} else {
funlab <- as.character(substitute(fun))
funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x)
}
lab <- as.character(sys.call())[-1]
m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset))
lab <- lab[1:(length(lab)-m)]
if(length(lab)>2)
lab2 <- paste(lab[-1],collapse=", ")
else
lab2 <- lab[-1]
heading <- if(funlab=="")
paste(lab[1],"by",lab2)
else
paste(funlab,"of",lab[1],"by",lab2)
nna <- !is.na(y %*% rep(1,ncol(y)))
N <- sum(nna)
stats <- fun(y[nna,,drop=FALSE])
nstats <- length(stats)
name.stats <- if(length(dn <- dimnames(stats)))
as.vector(outer(dn[[1]],dn[[2]],
FUN=function(a,b)paste(b, a)))
else
names(stats)
if(length(name.stats))
funlab <- name.stats
if(nstats>1 && length(name.stats)==0)
funlab <- rep(" ", nstats)
s <- matrix(NA, nrow=length(l) + 1, ncol=2 + nstats,
dimnames=list(c(l, "ALL"),c("N", "Missing", funlab)))
j <- 0
for(i in l) {
j <- j+1
w <- y[x==i,,drop=FALSE]
nna <- !is.na(w %*% rep(1,ncol(w)))
n <- sum(nna)
s[j,] <- c(n, nrow(w)-n,
if(n) fun(w[nna,,drop=FALSE])
else rep(NA,nstats))
}
s[j+1,] <- c(N, nrow(y)-N, stats)
if((!missing(nmiss) && !nmiss) || (missing(nmiss) && all(s[,"Missing"]==0)))
s <- s[,-2]
attr(s, "heading") <- heading
attr(s, "byvarnames") <- lab2
attr(s,'class') <- "bystats"
s
}
print.bystats <- function(x, ...)
{
cat("\n",attr(x,"heading"),"\n\n")
attr(x,"heading") <- NULL
attr(x,"byvarnames") <- NULL
attr(x,'class') <- NULL
invisible(print(x, ...))
}
latex.bystats <- function(object,
title=first.word(expr=substitute(object)),
caption=attr(object,"heading"),
rowlabel=attr(object,"byvarnames"), ...)
{
dm <- dimnames(object)
##inn <- c("%","<=","<",">=",">","\\[")
##out <- c("\\\\%","$\\\\leq$","$<$","$\\\\geq$","$>$","\\\\verb|[|")
##dm[[1]] <- translate(dm[[1]],inn,out)
##dm[[2]] <- translate(dm[[2]],inn,out)
inn <- c("%","<=","<",">=",">","[")
out <- c("\\%","$\\leq$","$<$","$\\geq$","$>$","\\verb|[|")
dimnames(object) <- dm
caption <- sedit(caption, "cbind", "")
latex(unclass(object), title=title, caption=caption, rowlabel=rowlabel,
n.rgroup=c(nrow(object)-1,1), ...)
}
bystats2 <- function(y, v, h, fun, nmiss, subset)
{
y <- as.matrix(y)
if(!missing(subset)) {
y <- y[subset,,drop=FALSE];
v <- v[subset];
h <- h[subset]
}
v <- factor(v, exclude=NULL)
h <- factor(h, exclude=NULL)
lv <- levels(v)
lh <- levels(h)
nv <- length(lv)
nh <- length(lh)
if(missing(fun)) {
fun <- function(y) apply(y, 2, mean)
r <- range(y, na.rm=TRUE)
funlab <- if(length(r)==2 && r[1]==0 & r[2]==1) "Fraction"
else "Mean"
} else {
funlab <- as.character(substitute(fun))
funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x)
}
lab <- as.character(sys.call())[-1]
m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset))
lab <- lab[1:(length(lab)-m)]
if(length(lab)>2)
lab2 <- paste(lab[-1],collapse=", ")
else
lab2 <- lab[-1]
heading <- if(funlab=="")
paste(lab[1],"by",lab2)
else
paste(funlab,"of",lab[1],"by",lab2)
nna <- !is.na(y %*% rep(1,ncol(y)))
N <- sum(nna)
stats <- fun(y[nna,,drop=FALSE])
nstats <- length(stats)
name.stats <- if(length(dn <- dimnames(stats)))
as.vector(outer(dn[[1]],dn[[2]],FUN=function(a,b)paste(b,a)))
else
names(stats)
if(length(name.stats))
funlab <- name.stats
if(nstats>1 && length(name.stats)==0)
funlab <- rep(" ", nstats)
s <- array(NA,dim=c(nv+1,nh+1,2+nstats),
dimnames=list(c(lv,"ALL"), c(lh,"ALL"), c("N","Missing",funlab)))
for(xv in c(lv,"ALL")) {
for(xh in c(lh,"ALL")) {
if(xv=="ALL" && xh=="ALL")
st <- c(N, nrow(y)-N, stats)
else {
if(xv=="ALL")
u <- h==xh
else if(xh=="ALL")
u <- v==xv
else
u <- h==xh & v==xv
if(any(u)) {
w <- y[u,,drop=FALSE]
nna <- !is.na(w %*% rep(1,ncol(w)))
n <- sum(nna)
st <- c(n, nrow(w)-n, fun(w[nna,,drop=FALSE]))
} else st <- c(0, n, rep(NA, length(stats)))
}
s[xv,xh,] <- st
}
}
if((!missing(nmiss) && !nmiss) ||
(missing(nmiss) && all(s[,,"Missing"]==0)))
s <- s[,,-2,drop=FALSE]
attr(s, "heading") <- heading
attr(s, "byvarnames") <- lab[-1]
attr(s,'class') <- "bystats2"
s
}
print.bystats2 <- function(x, abbreviate.dimnames=FALSE,
prefix.width=max(nchar(dimnames(x)[[1]])),...)
{
cat("\n",attr(x,"heading"),"\n\n")
if(!exists("print.char.matrix")) { # Vanilla S
attr(x, "heading") <- attr(x, "byvarnames") <- attr(x, "class") <-
NULL
return(invisible(print(x)))
}
d <- dim(x)
cstats <- array("", dim=d[1:3])
header <- matrix(paste(dimnames(x)[[3]],collapse="\n"),1,1)
print.char.matrix(header)
for(k in 1:d[3])
cstats[,,k] <- format(x[,,k])
dimn <- dimnames(x)[1:2]
names(dimn) <- attr(x,"byvarnames")
cstats2 <- matrix("", nrow=d[1], ncol=d[2], dimnames=dimn)
for(i in 1:d[1]) {
for(j in 1:d[2]) {
cstats2[i,j] <- paste(cstats[i,j,],collapse="\n")
}
}
invisible(print.char.matrix(cstats2,...))
}
latex.bystats2 <- function(object,
title=first.word(expr=substitute(object)),
caption=attr(object, "heading"),
rowlabel="", ...)
{
dm <- dimnames(object)
inn <- c("%", "<=", "<", ">=", ">", "[")
out <- c("\\%", "$\\leq$","$<$", "$\\geq$","$>$", "\\verb|[|")
dm[[1]] <- sedit(dm[[1]], inn, out)
dm[[2]] <- sedit(dm[[2]],inn,out)
dm[[3]] <- sedit(dm[[3]],inn,out)
dimnames(object) <- dm
caption <- sedit(caption, "cbind", "")
d <- dim(object)
dn <- rep(dimnames(object)[[3]], d[2])
st <- matrix(NA, nrow=d[1], ncol=d[2]*d[3],
dimnames=list(dimnames(object)[[1]], dn))
for(i in 1:d[1]) {
l <- 0
for(j in 1:d[2]) {
for(k in 1:d[3]) {
l <- l+1
st[i,l] <- object[i,j,k]
}
}
}
latex(st, title=title, caption=caption, rowlabel=rowlabel,
n.rgroup=c(nrow(st)-1,1),
cgroup=dimnames(object)[[2]], n.cgroup=rep(d[3],d[2]),...)
}
Hmisc/R/hist.data.frame.s 0000644 0001762 0000144 00000005060 14561235550 014661 0 ustar ligges users hist.data.frame <- function(x, n.unique=3, nclass="compute", na.big=FALSE,
rugs=FALSE, freq=TRUE, mtitl=FALSE, ...)
{
oldmf <- par('mfrow')
oldoma <- par('oma')
oldmar <- par('mar') # resetting mfrow causes a new mar
on.exit(par(mfrow=oldmf, oma=oldoma, mar=oldmar))
mf <- oldmf
if(length(mf)==0) mf <- c(1,1)
automf <- FALSE
if((la <- length(x))>1 & max(mf)==1) {
mf <-
if(la<=4) c(2,2)
else if(la<=6) c(2,3)
else if(la<=9) c(3,3)
else if(la<=12) c(3,4)
else if(la<=16) c(4,4)
else c(4,5)
automf <- TRUE
par(mfrow=mf)
}
if(is.character(mtitl))
par(oma=c(0,0,3,0))
nam <- names(x)
i <- 0
j <- 0
for(v in x) {
j <- j+1
type <-
if(is.character(v) || is.factor(v))
'cat'
else if(inherits(v,'Date'))
'Date'
else
'none'
lab <- attr(v,"label")
lab <-
if(length(lab) && nchar(lab) > 35)
nam[j]
else
label(v, units=TRUE, plot=type!='cat', default=nam[j])
if(type=='cat') {
tab <- -sort(-table(v))
dotchart3(tab, xlab=paste('Frequencies for', lab))
} else {
type <- if(inherits(v,'Date')) 'Date' else 'none'
if(type %nin% c('none','Date'))
v <- unclass(v)
w <- v[!is.na(v)]
n <- length(w)
if(length(unique(w)) >= n.unique) {
i <- i+1
if(is.numeric(nclass))
nc <- nclass else
if(nclass=="compute")
nc <- max(2,trunc(min(n/10,25*logb(n,10))/2))
if(nclass == 'default') {
if(type == 'Date')
hist(v, nc, xlab=lab, freq=freq, main='')
else hist(v, xlab=lab, main='', freq=freq)
} else {
if(type == 'Date')
hist(v, nc, xlab=lab, freq=freq, main='')
else
hist(v, nclass=nc, xlab=lab, freq=freq, main='')
}
m <- sum(is.na(v))
pm <- paste("n:",n," m:",m,sep="")
title(sub=pm,adj=0,cex=.5)
if(na.big && m>0)
mtext(paste(m,"NAs"),line=-2,cex=1)
if(rugs)
scat1d(v, ...)
if(automf && interactive() &&
all(names(dev.list()) %nin% c('postscript')) && # add win.printer?
(i %% prod(mf)==0)) {
if(is.character(mtitl))
mtitle(mtitl)
cat("click left mouse button to proceed\n")
locator(1)
} else if(is.character(mtitl) && i %% prod(mf)==1)
mtitle(mtitl)
}
}
}
invisible(ceiling(i / prod(mf)))
}
Hmisc/R/abs.error.pred.s 0000644 0001762 0000144 00000002667 12243661443 014550 0 ustar ligges users abs.error.pred <- function(fit, lp=NULL, y=NULL)
{
if(!length(y)) y <- fit$y
if(!length(lp)) lp <- fit$fitted.values
if(!length(lp)) lp <- fit$linear.predictors
if(!(length(y) && length(lp)))
stop('must specify lp and y or specify y=T in the fit')
s <- is.na(y + lp)
if(any(s)) {
y <- y[!s]
lp <- lp[!s]
}
my <- median(y)
mlp <- median(lp)
meanr <- mean( abs( lp - mlp))
meant <- mean( abs( y - my ))
meane <- mean( abs( lp - y ))
medr <- median(abs( lp - mlp))
medt <- median(abs( y - my ))
mede <- median(abs( lp - y ))
differences <- cbind(c(meanr,meane,meant),
c(medr ,mede ,medt ) )
dimnames(differences) <- list(c('|Yi hat - median(Y hat)|',
'|Yi hat - Yi|',
'|Yi - median(Y)|'),
c('Mean','Median'))
ratios <- cbind(c(meanr/meant, meane/meant),
c( medr/ medt, mede/ medt))
dimnames(ratios) <- list(c('|Yi hat - median(Y hat)|/|Yi - median(Y)|',
'|Yi hat - Yi|/|Yi - median(Y)|'),
c('Mean','Median'))
structure(list(differences=differences,ratios=ratios),class='abs.error.pred')
}
print.abs.error.pred <- function(x, ...)
{
cat('\nMean/Median |Differences|\n\n')
print(x$differences)
cat('\n\nRatios of Mean/Median |Differences|\n\n')
print(x$ratios)
invisible()
}
Hmisc/R/drawPlot.s 0000644 0001762 0000144 00000025557 12623343364 013522 0 ustar ligges users utils::globalVariables("pch.to.use")
drawPlot <- function(..., xlim=c(0,1), ylim=c(0,1), xlab='', ylab='',
ticks=c('none','x','y','xy'),
key=FALSE, opts=NULL)
{
Points <- function(label=' ', type=c('p','r'), n, pch=pch.to.use[1],
cex=par('cex'), col=par('col'),
rug=c('none','x','y','xy'),
ymean=NULL) {
type <- match.arg(type)
rug <- match.arg(rug)
cat('\nClick mouse for each point',
if(label!='')
paste(' for group ',label), '.',
if(missing(n))
' Right click when finished (Esc with RStudio).',
'\n',sep='')
pts <-
if(missing(n))
locator(type='p', pch=pch, cex=cex, col=col)
else
locator(n, type='p', pch=pch, cex=cex, col=col)
if(length(ymean))
pts$y <- pts$y - mean(pts$y) + ymean
if(type=='p')
assign("pch.to.use", pch.to.use[pch.to.use != pch],
envir=environment(Points))
else {
scat1d(pts$x, side=1)
pch <- NA
}
switch(rug,
x = scat1d(pts$x, side=1),
y = scat1d(pts$y, side=2),
xy = {scat1d(pts$x, side=1); scat1d(pts$y, side=2)},
none = )
structure(list(points=pts, label=label, type=type,
pch=pch, cex=cex, col=col, rug=rug), class='Points')
}
Curve <- function(label=' ',
type=c('bezier','polygon','linear','pol','loess','step',
'gauss'),
n=NULL, lty=1, lwd=par('lwd'), col=par('col'),
degree=2, evaluation=100, ask=FALSE) {
isfun <- is.function(type)
if(! isfun) type <- match.arg(type)
if(! isfun && ! length(n) && type == 'linear') n <- 2
if(! isfun && type=='gauss') n <- 3
xlim <- par('usr')[1 : 2]
redraw <- TRUE
if(isfun) {
x <- seq(xlim[1], xlim[2], length=evaluation)
pts <- list(x=as.numeric(x), y=as.numeric(type(x)))
lines(pts, lty=lty, lwd=lwd, col=col)
}
else repeat {
cat('\nClick mouse for each point',
if(label!='')
paste(' for group ',label),
'.',
if(!length(n))
' Right click when finished (Esc with RStudio).',
'\n', sep='')
pts <-
if(!length(n))
locator(type='l', lty=lty, lwd=lwd, col=col)
else
locator(n, type='l', lty=lty, lwd=lwd, col=col)
n <- length(pts$x)
if(n < 2) stop('must click at least 2 points')
if(n==2) type <- 'linear'
if(type=='pol') {
x <- matrix(NA, nrow=n, ncol=degree)
for(i in 1:degree) x[,i] <- pts$x^i
f <- lm.fit.qr.bare(x, pts$y)
x <- matrix(NA, nrow=evaluation, ncol=degree)
x[,1] <- seq(min(pts$x),max(pts$x), length=evaluation)
if(degree > 1)
for(i in 1:degree) x[,i] <- x[,1]^i
cof <- f$coefficients
y <- cof[1] + x %*% cof[-1]
pts <- list(x=as.numeric(x[,1]), y=as.numeric(y))
if(redraw) lines(pts, lty=lty, lwd=lwd, col=col)
}
if(type == 'loess') {
w <- lowess(pts$x, pts$y, f=.25)
pts <- approx(w, xout=seq(min(pts$x), max(pts$x), length=evaluation))
if(redraw) lines(pts, lty=lty, lwd=lwd, col=col)
}
if(type=='bezier') {
pts <- bezier(pts, xlim=range(pts$x), evaluation=evaluation)
if(redraw)
lines(pts, lty=lty, lwd=lwd, col=col)
}
if(type=='gauss') {
mu <- pts$x[2]
delta <- diff(pts$x[-2])/2
htavg <- sum(pts$y[-2])/2
htmax <- pts$y[2]
x <- seq(xlim[1], xlim[2], length=evaluation)
b2 <- delta^2 / log(htmax/htavg)
y <- htmax * exp(-(x-mu)^2/b2)
i <- y > 1e-4
pts <- list(x=as.single(x[i]), y=as.single(y[i]))
lines(pts, lty=lty, lwd=lwd, col=col)
}
if(type=='step' && redraw)
lines(pts, type='s', lty=lty, lwd=lwd, col=col)
if(!ask) break
if(readline('\nType y to accept, n to re-draw:') == 'y')
break
}
structure(list(points=pts, label=label, type=type, lty=lty,
lwd=lwd, col=col), class='Curve')
}
Abline <- function(...) {
abline(...)
structure(list(...), class='Abline')
}
environment(Points)$pch.to.use <- c(1,2,3,4,16,17,5,6,15,18,19)
ticks <- match.arg(ticks)
if(missing(ticks)) {
if(!missing(xlim)) ticks <- 'x'
if(!missing(ylim)) ticks <- 'y'
if(!missing(xlim) && !missing(ylim)) ticks <- 'xy'
}
plot(xlim, ylim, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab,
type='n', axes=ticks=='xy')
switch(ticks,
none={
axis(1, at=xlim, labels=FALSE)
axis(2, at=ylim, labels=FALSE)
},
x={
axis(1)
axis(2, at=ylim, labels=FALSE)
},
y={
axis(1, at=xlim, labels=FALSE)
axis(2)
},
xy = )
cc <- as.list(substitute(list(...))[-1])
W <- list()
for(i in seq_along(cc)) W[[i]] <- eval(cc[[i]])
## See http://stackoverflow.com/questions/33737102/elegant-way-to-define-a-function-inside-another-function
m <- length(W)
type <- label <- rep('', m)
lty <- lwd <- pch <- cex <- col <- rep(NA, m)
curves <- vector('list', m)
i <- 0
for(j in 1:m) {
w <- W[[j]]
if(attr(w,'class')=='Abline')
next
i <- i + 1
isfun <- is.function(w$type)
curves[[i]] <- if(!key || isfun) w$points
else switch(w$type,
step = approx(w$points,
xout=seq(min(w$points$x), max(w$points$x), length=50),
method='constant', f=0),
linear = approx(w$points,
xout=seq(min(w$points$x), max(w$points$x), length=50)),
w$points)
label[i] <- w$label
col[i] <- w$col
type[i] <- if(isfun) 'l'
else switch(w$type,
p = 'p',
r = 'r',
step= 's',
'l')
if(type[i]=='p') {
pch[i] <- w$pch
cex[i] <- w$cex
}
else if(type[i] != 'r') {
lty[i] <- w$lty
lwd[i] <- w$lwd
}
}
if(i < m) {
curves <- curves[1:i]
label <- label [1:i]
type <- type [1:i]
lty <- lty [1:i]
lwd <- lwd [1:i]
pch <- pch [1:i]
cex <- cex [1:i]
col <- col [1:i]
}
keyloc <- NULL
j <- type != 'r'
if(any(j)) {
if(!key)
labcurve(curves[j], labels=label[j], type=type[j],
lty=lty[j], lwd=lwd[j], col.=col[j], opts=opts)
else {
x <- unlist(lapply(curves, function(z)z$x))
y <- unlist(lapply(curves, function(z)z$y))
keyloc <- putKeyEmpty(x, y, labels=label[j], type=type[j],
pch=pch[j], lty=lty[j],
lwd=lwd[j], cex=cex[j], col=col[j])
}
}
invisible(structure(list(W, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
ticks=ticks, key=key, keyloc=keyloc, opts=opts),
class='drawPlot'))
}
bezier <- function(x, y, xlim, evaluation=100) {
if(missing(y)) {
y <- x[[2]]
x <- x[[1]]
}
n <- length(x)
X <- Y <- single(evaluation)
Z <- seq(0, 1, length=evaluation)
X[1] <- x[1];
X[evaluation] <- x[n]
Y[1] <- y[1];
Y[evaluation] <- y[n]
for(i in 2:(evaluation - 1)) {
z <- Z[i]
xz <- yz <- 0
const <- (1 - z) ^ (n - 1)
for(j in 0 : (n - 1)) {
xz <- xz + const * x[j + 1]
yz <- yz + const * y[j + 1]
const <- const* (n - 1 - j) / (j + 1) * z / (1 - z)
if(is.na(const)) prn(c(i, j, z))
}
X[i] <- xz; Y[i] <- yz
}
list(x=as.numeric(X), y=as.numeric(Y))
}
plot.drawPlot <- function(x, xlab, ylab, ticks,
key=x$key, keyloc=x$keyloc, ...)
{
if(missing(xlab)) xlab <- x$xlab
if(missing(ylab)) ylab <- x$ylab
xlim <- x$xlim
ylim <- x$ylim
if(missing(ticks)) ticks <- x$ticks
plot(xlim, ylim, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab,
type='n', axes=ticks=='xy')
switch(ticks,
none={
axis(1, at=xlim, labels=FALSE)
axis(2, at=ylim, labels=FALSE)
},
x={
axis(1)
axis(2, at=ylim, labels=FALSE)
},
y={
axis(1, at=xlim, labels=FALSE)
axis(2)
},
xy= )
data <- x[[1]]
m <- length(data)
type <- label <- rep('', m)
lty <- lwd <- pch <- cex <- col <- rep(NA, m)
curves <- vector('list', m)
i <- 0
for(j in 1 : m) {
w <- data[[j]]
if(attr(w, 'class') == 'Abline') {
do.call("abline", unclass(w))
next
}
i <- i + 1
if(is.function(w$type)) w$type <- 'l'
curves[[i]] <-
if(!key)
w$points
else switch(w$type,
step = approx(w$points,
xout=seq(min(w$points$x),max(w$points$x),
length=50),
method='constant', f=0),
linear = approx(w$points,
xout=seq(min(w$points$x),max(w$points$x),
length=50)),
w$points)
label[i] <- w$label
col[i] <- w$col
switch(attr(w, 'class'),
Points = {
type[i] <- w$type
pch[i] <- w$pch
cex[i] <- w$cex
switch(w$type,
p = points(w$points, cex=w$cex, pch=w$pch, col=w$col),
r = scat1d(w$points$x, side=1, col=w$col))
switch(w$rug,
x = scat1d(w$points$x, side=1, col=w$col),
y = scat1d(w$points$y, side=2, col=w$col),
xy = {
scat1d(w$points$x, side=1, col=w$col)
scat1d(w$points$y, side=2, col=w$col)
},
none = )
},
Curve = {
type[i] <- if(w$type=='step') 's' else 'l'
lty[i] <- w$lty
lwd[i] <- w$lwd
lines(w$points, lty=w$lty, lwd=w$lwd, col=col[i], type=type[i])
})
}
if(i < m) {
curves <- curves[1:i]
label <- label[1:i]
type <- type[1:i]
pch <- pch[1:i]
lty <- lty[1:i]
lwd <- lwd[1:i]
cex <- cex[1:i]
col <- col[1:i]
}
if(key && !length(keyloc))
stop('you may not specify key=T unless key=T was specified to drawPlot or keyloc is specified to plot')
if(any(label!='')) {
j <- type!='r'
if(any(j)) {
if(key) putKey(keyloc, labels=label[j],
type=type[j], pch=pch[j],
lty=lty[j], lwd=lwd[j], cex=cex[j], col=col[j])
else
labcurve(curves[j], type=type[j],
lty=lty[j], lwd=lwd[j], col.=col[j],
labels=label[j], opts=x$opts)
}
}
invisible()
}
Hmisc/R/model.frame.default.s 0000644 0001762 0000144 00000006432 12250440744 015525 0 ustar ligges users ## $Id$
GetModelFrame <- function(formula, specials, default.na.action=NULL) {
if(missing(formula) || !inherits(formula, "formula"))
stop("GetModelFrame needs a function argument specified",
"as a forumula or terms object")
## get the function call of the calling function
fun.call <- match.call(sys.function(sys.parent()),
call=sys.call(sys.parent()),
expand.dots=FALSE)
args.needed <- c("formula", "data", "weights", "subset", "na.action")
m <- structure(match(args.needed, names(fun.call), nomatch=0), names=args.needed)
## get the envronment of the formula
env <- environment(formula)
if (is.null(env))
env <- parent.frame()
## If formula is not a terms object then
## the formula must be turned into a terms object using the
## 'terms' function
if(!inherits(formula, "terms")) {
## Check for precence of args needed for terms call
has.arg <- c(formula=TRUE, data=FALSE)
if(m["data"])
has.arg["data"] <- TRUE
junk <- lapply(fun.call, print)
new.call <- fun.call[c(1,has.arg)]
new.call[[1]] <- as.name('terms')
names(new.call)[2] <- "x"
if(!missing(specials) && !is.null(specials))
new.call$specials=specials
## convert the formula to a terms object
print(new.call)
formula <- eval(new.call, envir=env)
# formula <- do.call("terms", args=list(x=formula,
# data=if(m["data"]) fun.call[m["data"]] else NULL,
# specials=specials)[has.arg],
# envir=env)
}
new.call <- fun.call[c(1, m)]
new.call[[1]] <- as.name("model.frame")
new.call$formula <- formula
if("na.action" %nin% names(fun.call) && !is.null(default.na.action))
new.call$na.action <- default.na.action
return(eval(new.call, env, parent.frame()))
}
## Replaced with one more like default R 3nov02
## With R 1.6 was getting error with ... arguments
## if(FALSE) '[.factor' <- function (x, i, drop = TRUE)
## {
## y <- NextMethod("[")
## class(y) <- class(x)
## attr(y, "contrasts") <- attr(x, "contrasts")
## attr(y, "levels") <- attr(x, "levels")
## opt <- .Options$drop.factor.levels
## if(!length(opt))
## opt <- .Options$drop.unused.levels
## if(drop && (!missing(drop) || (length(opt)==0 || opt)))
## reFactor(y)
## else y
## }
termsDrop <- function(object, drop, data)
{
trm <- terms(object, data=data)
if(is.numeric(drop)) {
vars <- attr(trm, 'term.labels')
if(any(drop > length(vars)))
stop('subscript out of range')
drop <- vars[drop]
}
form <- update(trm,
as.formula(paste('~ . ',
paste('-', drop, collapse=''))))
terms(form, data=data)
}
var.inner <- function(formula)
{
if(!inherits(formula,"formula"))
formula <- attr(formula,"formula")
if(!length(formula))
stop('no formula object found')
if(length(formula) > 2)
formula[[2]] <- NULL # remove response variable
av <- all.vars(formula, max.names=1e6)
## Thanks to Thomas Lumley 28Jul01 :
unique(sapply(attr(terms(formula),"term.labels"),
function(term,av)
av[match(all.vars(parse(text=term), max.names=1e6),av)][1],
av=av))
}
Hmisc/R/seqFreq.r 0000644 0001762 0000144 00000003502 14456316446 013324 0 ustar ligges users ##' Find Sequential Exclusions Due to NAs
##'
##' Finds the variable with the highest number of `NA`s. From the non-`NA`s on that variable find the next variable from those remaining with the highest number of `NA`s. Proceed in like fashion. The resulting variable summarizes sequential exclusions in a hierarchical fashion. See [this](https://hbiostat.org/rflow/doverview.html#sec-doverview-filter) for more information.
##' @title seqFreq
##' @param ... any number of variables
##' @param labels if specified variable labels will be used in place of variable names
##' @param noneNA set to `TRUE` to not include 'none' as a level in the result
##' @return `factor` variable with `obs.per.numcond` attribute
##' @author Frank Harrell
##' @md
##' @export
seqFreq <- function(..., labels=NULL, noneNA=FALSE) {
d <- list(...)
k <- length(d)
if(length(labels)) nam <- labels
else {
s <- as.character(sys.call()[-1])[1 : k]
nam <- names(d)
if(! length(nam)) nam <- rep('', k)
nam <- ifelse(nam == '', s, nam)
}
ispos <- function(x) {
w <- if(is.logical(x)) x
else if(is.numeric(x)) x > 0
else tolower(as.character(x)) %in%
c('present', 'yes', 'y', 'true', 'positive', 'pos', '+')
w[is.na(x)] <- FALSE
1L * w
}
names(d) <- nam
x <- sapply(d, ispos) # creates a matrix
# Count number of positives per observation
po <- apply(x, 1, sum)
cond <- c(sum(po == 0), tabulate(po, nbins=k))
j <- integer(k)
w <- rep(0, nrow(x))
for(i in 1 : k) {
freqs <- apply(x, 2, sum)
if(all(freqs == 0)) break
imax <- which.max(freqs)
j[i] <- imax
w <- ifelse(w == 0 & x[, imax], imax, w)
x[x[, imax] == 1, ] <- 0
}
j <- j[j != 0]
x <- if(noneNA) factor(w, j, nam[j]) else
factor(w, c(0, j), c('none', nam[j]))
attr(x, 'obs.per.numcond') <- cond
x
}
Hmisc/R/Merge.r 0000644 0001762 0000144 00000012242 14333753640 012751 0 ustar ligges users #' Merge Multiple Data Frames or Data Tables
#'
#' Merges an arbitrarily large series of data frames or data tables containing common \code{id} variables. Information about number of observations and number of unique \code{id}s in individual and final merged datasets is printed. The first data frame/table has special meaning in that all of its observations are kept whether they match \code{id}s in other data frames or not. For all other data frames, by default non-matching observations are dropped. The first data frame is also the one against which counts of unique \code{id}s are compared. Sometimes \code{merge} drops variable attributes such as \code{labels} and \code{units}. These are restored by \code{Merge}.
#'
#' @param \dots two or more dataframes or data tables
#' @param id a formula containing all the identification variables such that the combination of these variables uniquely identifies subjects or records of interest. May be omitted for data tables; in that case the \code{key} function retrieves the id variables.
#' @param all set to \code{FALSE} to drop observations not found in second and later data frames (only applies if not using \code{data.table})
#' @param verbose set to \code{FALSE} to not print information about observations
#' @export
#' @examples
#' \dontrun{
#' a <- data.frame(sid=1:3, age=c(20,30,40))
#' b <- data.frame(sid=c(1,2,2), bp=c(120,130,140))
#' d <- data.frame(sid=c(1,3,4), wt=c(170,180,190))
#' all <- Merge(a, b, d, id = ~ sid)
#' # First file should be the master file and must
#' # contain all ids that ever occur. ids not in the master will
#' # not be merged from other datasets.
#' a <- data.table(a); setkey(a, sid)
#' # data.table also does not allow duplicates without allow.cartesian=TRUE
#' b <- data.table(sid=1:2, bp=c(120,130)); setkey(b, sid)
#' d <- data.table(d); setkey(d, sid)
#' all <- Merge(a, b, d)
#' }
Merge <- function(..., id=NULL, all=TRUE, verbose=TRUE) {
w <- list(...)
nams <- (as.character(sys.call())[-1])[1 : length(w)]
m <- length(nams)
## If argument is a function call, e.g., subset(mydata, age > 20)
## find name of first argument and omit any dollar sign prefix and []
for(i in 1 : m) {
x <- nams[i]
x <- gsub('subset\\(', '', x)
x <- gsub(',.*', '', x)
x <- gsub('\\[.*' , '', x)
nams[i] <- gsub('(.*)\\$(.*)', '\\2', x)
}
d1 <- w[[1]]
idt <- 'data.table' %in% class(d1)
if(idt && ! requireNamespace("data.table", quietly = TRUE))
stop("The 'data.table' package is required to operate on data tables.")
if(length(id)) id <- all.vars(id)
else {
if(! idt) stop('must specify id if not using data.tables')
id <- key(d1)
if(! length(id)) stop('id not given and first data table has no keys')
}
m <- length(w)
va <- n <- nu <- integer(m)
nin1 <- nnin1 <- rep(NA, m)
did <- if(idt) d1[, id, with=FALSE] else d1[id]
idc1 <- unique(as.character(interaction(did)))
id.union <- id.intersection <- idc1
## Unique variables, and their labels and units
uvar <- lab <- un <- character(0)
for(i in 1 : m) {
d <- w[[i]]
nd <- names(d)
if(any(id %nin% nd))
stop(paste('data frame', nams[i], 'does not contain id variables',
paste(id, collapse=', ')))
j <- nd %nin% uvar
uvar <- c(uvar, nd[j])
lab <- c(lab, sapply(d, label)[j])
un <- c(un, sapply(d, units)[j])
idt <- is.data.table(d)
M <- if(i == 1) d
else
merge(M, d, by=id, all.x=TRUE, all.y=all)
did <- if(idt) d[, id, with=FALSE] else d[id]
idc <- unique(as.character(interaction(did)))
di <- dim(d)
va[i] <- di[2]
n [i] <- di[1]
nu[i] <- length(unique(idc))
if(i > 1) {
nin1 [i] <- sum(idc %in% idc1)
nnin1[i] <- sum(idc %nin% idc1)
id.union <- union(id.union, idc)
id.intersection <- intersect(id.intersection, idc)
}
}
## Restore labels and units if needed
nm <- names(M)
names(lab) <- uvar
names(un ) <- uvar
anych <- FALSE
if(any(c(lab, un) != ''))
for(i in 1 : ncol(M)) {
x <- M[[i]]
ni <- nm[i]
changed <- FALSE
if(ni %nin% names(lab))
stop(paste('Unexpected variable:', ni))
if(lab[ni] != '' && ! length(attr(x, 'label'))) {
label(x) <- lab[ni]
changed <- TRUE
}
if(un[ni] != '' && ! length(attr(x, 'units'))) {
units(x) <- un[ni]
changed <- TRUE
}
if(changed) M[[i]] <- x
anych <- anych | changed
}
nams <- c(nams, 'Merged')
va <- c(va, ncol(M))
n <- c(n, nrow(M))
did <- if(is.data.table(M)) M[, id, with=FALSE] else M[id]
idc <- unique(as.character(interaction(did)))
nu <- c(nu, length(unique(idc)))
nin1 <- c(nin1, sum(idc %in% idc1))
nnin1 <- c(nnin1, sum(idc %nin% idc1))
info <- cbind(Vars=va, Obs=n, 'Unique IDs'=nu, 'IDs in #1'=nin1,
'IDs not in #1'=nnin1)
rownames(info) <- nams
if(verbose) {
print(info)
cat('\nNumber of unique IDs in any data frame :', length(id.union), '\n')
cat( 'Number of unique IDs in all data frames:', length(id.intersection),
'\n')
if(anych) cat('\nLabels or units restored\n')
}
attr(M, 'info') <- info
M
}
Hmisc/R/binconf.s 0000644 0001762 0000144 00000005413 12243661443 013330 0 ustar ligges users binconf <- function(x, n, alpha = 0.05,
method = c("wilson","exact","asymptotic","all"),
include.x = FALSE, include.n = FALSE,
return.df = FALSE)
{
## ..modifications for printing and the addition of a
## method argument and the asymptotic interval
## and to accept vector arguments were
## made by Brad Biggerstaff on 10 June 1999
method <- match.arg(method)
bc <- function(x, n, alpha, method)
{
nu1 <- 2 * (n - x + 1)
nu2 <- 2 * x
ll <- if(x > 0)
x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1))
else
0
nu1p <- nu2 + 2
nu2p <- nu1 - 2
pp <- if(x < n)
qf(1 - alpha/2, nu1p, nu2p)
else
1
ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp)
zcrit <- - qnorm(alpha/2)
z2 <- zcrit * zcrit
p <- x/n
cl <- (p + z2/2/n + c(-1, 1) * zcrit *
sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n)
if(x == 1)
cl[1] <- - log(1 - alpha)/n
if(x == (n - 1))
cl[2] <- 1 + log(1 - alpha)/n
asymp.lcl <- x/n - qnorm(1 - alpha/2) *
sqrt(((x/n) * (1 - x/n))/n)
asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)
)/n)
res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl))
res <- cbind(rep(x/n, 3), res)
##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c(
## "Point Estimate", "Lower", "Upper"))
switch(method,
wilson = res[2, ],
exact = res[1, ],
asymptotic = res[3, ],
all = res,
res)
}
if((length(x) != length(n)) & length(x) == 1)
x <- rep(x, length(n))
if((length(x) != length(n)) & length(n) == 1)
n <- rep(n, length(x))
if((length(x) > 1 | length(n) > 1) & method == "all") {
method <- "wilson"
warning("method=all will not work with vectors...setting method to wilson")
}
if(method == "all" & length(x) == 1 & length(n) == 1) {
mat <- bc(x, n, alpha, method)
dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)
if(include.x)
mat <- cbind(X = x, mat)
if(return.df)
mat <- as.data.frame(mat)
return(mat)
}
mat <- matrix(ncol = 3, nrow = length(x))
for(i in 1:length(x))
mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method)
dimnames(mat) <- list(rep("", dim(mat)[1]),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)
if(include.x)
mat <- cbind(X = x, mat)
if(return.df)
mat <- as.data.frame(mat, row.names=NULL)
mat
}
Hmisc/R/units.s 0000644 0001762 0000144 00000001130 14765552272 013056 0 ustar ligges users units <- function(x, ...)
UseMethod("units")
"units<-.default" <- function(x, value)
{
# value <- sub('s$', '', tolower(value))
attr(x, "units") <- value
x
}
units.default <- function(x, none='', ...)
{
lab <- attr(x, "units")
if(is.null(lab))
lab <- attr(attr(x,'tspar'),'units')
if(is.null(lab))
lab <- none
lab
}
units.Surv <- function(x, none='', ...)
{
at <- attributes(x)
un <- at$units
ia <- at$inputAttributes
if(! length(un) && length(ia)) {
un <- ia$time2$units
if(! length(un)) un <- ia$time$units
}
if(! length(un)) un <- none
un
}
Hmisc/R/label.s 0000644 0001762 0000144 00000030356 14761572250 013001 0 ustar ligges users ##"label<-" <- function(x, value) {
## attr(x, "label") <- value
## x
##}
## exact=TRUE and [['label']] to prevent matching with a different attribute
## "labels" (Thanks: Ivan Puzek)
label <- function(x, default=NULL, ...) UseMethod("label")
label.default <- function(x, default=NULL, units=plot, plot=FALSE,
grid=FALSE, html=FALSE, ...)
{
if(length(default) > 1)
stop("the default string cannot be of length greater then one")
at <- attributes(x)
lab <- at[['label']]
if(length(default) && (!length(lab) || lab==''))
lab <- default
un <- at$units
labelPlotmath(lab,
if(units) un else NULL,
plotmath=plot, grid=grid, html=html)
}
label.Surv <- function(x, default=NULL, units=plot,
plot=FALSE, grid=FALSE, html=FALSE,
type=c('any', 'time', 'event'), ...)
{
type <- match.arg(type)
if(length(default) > 1)
stop("the default string cannot be of length greater then one")
at <- attributes(x)
lab <- at[['label']]
ia <- at$inputAttributes
if((! length(lab) || lab == '') && length(ia)) {
poss <- switch(type,
any = c(ia$event$label, ia$time2$label, ia$time$label),
time = c( ia$time2$label, ia$time$label),
event = ia$event$label )
for(lb in poss)
if(! length(lab) && lb != '') lab <- lb
}
if(length(default) && (!length(lab) || lab=='')) lab <- default
un <- NULL
if(units) {
un <- at$units
if(! length(un) && length(ia)) {
un <- ia$time2$units
if(! length(un)) un <- ia$time$units
}
}
labelPlotmath(lab, un,
plotmath=plot, grid=grid, html=html)
}
label.data.frame <- function(x, default=NULL, self=FALSE, ...) {
if(self) {
label.default(x)
} else {
if(length(default) > 0 && length(default) != length(x)) {
stop('length of default must same as x')
} else if(length(default) == 0) {
default <- list(default)
}
labels <- mapply(FUN=label, x=x, default=default,
MoreArgs=list(self=TRUE), USE.NAMES=FALSE)
names(labels) <- names(x)
return(labels)
}
}
labelPlotmath <- function(label, units=NULL, plotmath=TRUE, html=FALSE,
grid=FALSE, chexpr=FALSE)
{
if(! length(label)) label <- ''
if(! length(units) || (length(units) == 1 && is.na(units))) units <- ''
if(html) return(markupSpecs$html$varlabel (label, units))
if(! plotmath) return(markupSpecs$plain$varlabel(label, units))
g <-
function(x, y=NULL, xstyle=NULL, ystyle=NULL)
{
h <- function(w, style=NULL)
if(length(style)) sprintf('%s(%s)', style, w) else w
tryparse <- function(z, original, chexpr) {
p <- try(parse(text=z), silent=TRUE)
if(is.character(p)) original else
if(chexpr) sprintf('expression(%s)', z) else p
}
if(! length(y))
return(tryparse(h(plotmathTranslate(x), xstyle), x, chexpr))
w <- paste('list(',h(plotmathTranslate(x), xstyle), ',',
h(plotmathTranslate(y), ystyle), ')', sep='')
tryparse(w, paste(x, y), chexpr)
}
if(units=='') g(label)
else
if(label=='') g(units)
else g(label, units, ystyle='scriptstyle')
}
plotmathTranslate <- function(x)
{
if(length(grep('paste', x))) return(x)
specials <- c(' ','%','_')
spec <- FALSE
for(s in specials)
if(length(grep(s,x)))
spec <- TRUE
## If x is not a legal expression, also put in paste()
if(! spec && is.character(try(parse(text=x), silent=TRUE)))
spec <- TRUE
if(spec) x <- paste('paste("',x,'")',sep='')
else if(substring(x,1,1)=='/') x <- paste('phantom()', x, sep='')
x
}
labelLatex <- function(x=NULL, label='', units='', size='smaller[2]',
hfill=FALSE, bold=FALSE, default='', double=FALSE) {
if(length(x)) {
if(label == '') label <- label(x)
if(units == '') units <- units(x)
}
if(default == '' && length(x)) default <- deparse(substitute(x))
if(label == '') return(default)
label <- latexTranslate(label)
bs <- if(double) '\\\\' else '\\'
if(bold) label <- paste('{', bs, 'textbf ', label, '}', sep='')
if(units != '') {
units <- latexTranslate(units)
if(length(size) && size != '')
units <- paste('{', bs, size, ' ', units, '}', sep='')
if(hfill) units <- paste(bs, 'hfill ', units, sep='')
else
units <- paste(' ', units, sep='')
label <- paste(label, units, sep='')
}
label
}
"label<-" <- function(x, ..., value) UseMethod("label<-")
##From Bill Dunlap, StatSci 15Mar95:
"label<-.default" <- function(x, ..., value)
{
if(is.list(value)) {
stop("cannot assign a list to be a object label")
}
if(length(value) != 1L) {
stop("value must be character vector of length 1")
}
attr(x, 'label') <- value
if(! inherits(x, 'labelled')) class(x) <- c('labelled', class(x))
return(x)
}
"label<-.data.frame" <- function(x, self=TRUE, ..., value) {
if(!is.data.frame(x)) {
stop("x must be a data.frame")
}
if(missing(self) && is.list(value)) {
self <- FALSE
}
if(self) {
xc <- class(x)
xx <- unclass(x)
label(xx) <- value
class(xx) <- xc
return(xx)
} else {
if(length(value) != length(x)) {
stop("value must have the same length as x")
}
for (i in seq(along.with=x)) {
label(x[[i]]) <- value[[i]]
}
}
return(x)
}
"[.labelled"<- function(x, ...) {
tags <- valueTags(x)
x <- NextMethod("[")
valueTags(x) <- tags
x
}
"print.labelled"<- function(x, ...) {
x.orig <- x
u <- attr(x, 'units', exact=TRUE)
if(length(u))
attr(x,'units') <- NULL # so won't print twice
cat(attr(x, "label", exact=TRUE),
if(length(u))
paste('[', paste0(u, 's'), ']', sep=''),
"\n")
attr(x, "label") <- NULL
class(x) <-
if(length(class(x)) == 1 && inherits(x, 'labelled'))
NULL
else
class(x)[class(x) != 'labelled']
## next line works around print bug
if(! length(attr(x, 'class')))
attr(x, 'class') <- NULL
NextMethod("print")
invisible(x.orig)
}
as.data.frame.labelled <- as.data.frame.vector
Label <- function(object, ...) UseMethod("Label")
Label.data.frame <- function(object, file='', append=FALSE, ...)
{
nn <- names(object)
for(i in 1:length(nn)) {
lab <- attr(object[[nn[i]]], 'label', exact=TRUE)
lab <- if(length(lab)==0) '' else lab
cat("label(",nn[i],")\t<- '",lab,"'\n",
append=if(i==1)
append
else
TRUE,
file=file, sep='')
}
invisible()
}
relevel.labelled <- function(x, ...) {
lab <- label(x)
x <- NextMethod(x)
label(x) <- lab
x
}
reLabelled <- function(object)
{
for(i in 1:length(object))
{
x <- object[[i]]
lab <- attr(x, 'label', exact=TRUE)
cl <- class(x)
if(length(lab) && ! any(cl == 'labelled')) {
class(x) <- c('labelled',cl)
object[[i]] <- x
}
}
object
}
llist <- function(..., labels=TRUE)
{
dotlist <- list(...)
lname <- names(dotlist)
name <- vname <- as.character(sys.call())[-1]
for(i in 1:length(dotlist))
{
vname[i] <-
if(length(lname) && lname[i]!='')
lname[i]
else
name[i]
## R barked at setting vname[i] to NULL
lab <- vname[i]
if(labels && ! is.null(dotlist[[i]]))
{
lab <- attr(dotlist[[i]],'label', exact=TRUE)
if(length(lab) == 0) lab <- vname[i]
label(dotlist[[i]]) <- lab
}
}
names(dotlist) <- vname[1:length(dotlist)]
dotlist
}
prList <- function(x, lcap=NULL, htmlfig=0, after=FALSE) {
if(! length(names(x))) stop('x must have names')
if(length(lcap) && (length(lcap) != length(x)))
stop('if given, lcap must have same length as x')
mu <- markupSpecs$html
g <- if(htmlfig == 0) function(x, X=NULL) paste(x, X)
else
if(htmlfig == 1) function(x, X=NULL) paste(mu$cap(x), mu$lcap(X))
else
function(x, X=NULL)
paste0('\n### ', mu$cap(x),
if(length(X) && X != '') paste0('\n', mu$lcap(X)))
i <- 0
for(n in names(x)) {
i <- i + 1
y <- x[[n]]
if(length(names(y)) && length(class(y)) == 1 &&
inherits(y, 'list') && length(y) > 1) {
for(m in names(y)) {
if(! after)
cat('\n', g(paste0(n, ': ', m)), '\n', sep='')
suppressWarnings(print(y[[m]])) # for plotly warnings
if(after) cat('\n', g(paste0(n, ': ', m)), '\n', sep='')
}
if(length(lcap) && lcap[i] != '') cat(mu$lcap(lcap[i]))
}
else {
if(! after)
cat('\n', g(n, if(length(lcap)) lcap[i]), '\n', sep='')
suppressWarnings(print(x[[n]]))
if(after) cat('\n', g(n, if(length(lcap)) lcap[i]), '\n', sep='')
}
}
invisible()
}
putHfig <- function(x, ..., scap=NULL, extra=NULL, subsub=TRUE, hr=TRUE,
table=FALSE, file='', append=FALSE, expcoll=NULL) {
ec <- length(expcoll) > 0
if(ec && ! table)
stop('expcoll can only be specified for tables, not figures')
mu <- markupSpecs$html
lcap <- unlist(list(...))
if(length(lcap)) lcap <- paste(lcap, collapse=' ')
if(ec && length(lcap))
stop('does not work when lcap is specified because of interaction with markdown sub-subheadings')
if(! length(lcap) && ! length(scap)) {
if(ec) {
if(hr) x <- c(mu$hrule, x)
x <- mu$expcoll(paste(expcoll, collapse=' '),
paste(x, collapse='\n'))
cat(x, file=file, append=append, sep='\n')
return(invisible())
}
if(hr) cat(mu$hrule, '\n', sep='', file=file, append=append)
if(table) cat(x, file=file, append=append || hr, sep='\n')
else suppressWarnings(print(x)) # because of # colors in pallette warning
return(invisible())
}
if(! length(scap)) {
scap <- lcap
lcap <- NULL
}
scap <- if(table) mu$tcap(scap) else mu$cap(scap)
if(subsub) scap <- paste0('\n### ', scap)
if(hr && ! ec) cat(mu$hrule, '\n', sep='', file=file, append=append)
if(! ec) cat(scap, '\n', sep='', file=file, append=append | hr)
if(length(lcap)) {
lcap <- if(table) mu$ltcap(lcap) else mu$lcap(lcap)
if(length(extra))
lcap <- paste0(
'
')
r <- c(r, lcap)
}
output(r)
}
combineLabels <- function(...)
{
w <- list(...)
labs <- sapply(w[[1]], label)
lw <- length(w)
if(lw > 1) for(j in 2:lw)
{
lab <- sapply(w[[j]], label)
lab <- lab[lab != '']
if(length(lab)) labs[names(lab)] <- lab
}
labs[labs != '']
}
Hmisc/R/testCharDateTime.r 0000644 0001762 0000144 00000013630 14537631546 015114 0 ustar ligges users ##' Test Character Variables for Dates and Times
##'
##' For a vector `x`, if it is already a date-time, date, or time variable, the type is returned if `convert=FALSE`, or a list with that type, the original vector, and `numna=0` is returned. Otherwise if `x` is not a character vector, a type of `notcharacter` is returned, or a list that includes the original `x` and `type='notcharacter'`. When `x` is character, the main logic is applied. The default logic (when `m=0`) is to consider `x` a date-time variable when its format is YYYY-MM-DD HH:MM:SS (:SS is optional) in more than 1/2 of the non-missing observations. It is considered to be a date if its format is YYYY-MM-DD or MM/DD/YYYY or DD-MMM-YYYY in more than 1/2 of the non-missing observations (MMM=3-letter month). A time variable has the format HH:MM:SS or HH:MM. Blank values of `x` (after trimming) are set to `NA` before proceeding.
##' @title testCharDateTime
##' @param x input vector of any type, but interesting cases are for character `x`
##' @param p minimum proportion of non-missing non-blank values of `x` for which the format is one of the formats described before considering `x` to be of that type
##' @param m if greater than 0, a test is applied: the number of distinct illegal values of `x` (values containing a letter or underscore) must not exceed `m`, or type `character` will be returned. `p` is set to `1.0` when `m` > 0.
##' @param convert set to `TRUE` to convert the variable under the dominant format. If all values are `NA`, `type` will be set to `'character'`.
##' @param existing set to `TRUE` to return a character string with the current type of variable without examining pattern matches
##' @return if `convert=FALSE`, a single character string with the type of `x`: `"character", "datetime", "date", "time"`. If `convert=TRUE`, a list with components named `type`, `x` (converted to `POSIXct`, `Date`, or `chron` times format), and `numna`, the number of originally non-`NA` values of `x` that could not be converted to the predominant format. If there were any non-covertible dates/times,
##' the returned vector is given an additional class `special.miss` and an
##' attribute `special.miss` which is a list with original character values
##' (`codes`) and observation numbers (`obs`). These are summarized by
##' `describe()`.
##' @md
##' @author Frank Harrell
##' @examples
##' for(conv in c(FALSE, TRUE)) {
##' print(testCharDateTime(c('2023-03-11', '2023-04-11', 'a', 'b', 'c'), convert=conv))
##' print(testCharDateTime(c('2023-03-11', '2023-04-11', 'a', 'b'), convert=conv))
##' print(testCharDateTime(c('2023-03-11 11:12:13', '2023-04-11 11:13:14', 'a', 'b'), convert=conv))
##' print(testCharDateTime(c('2023-03-11 11:12', '2023-04-11 11:13', 'a', 'b'), convert=conv))
##' print(testCharDateTime(c('3/11/2023', '4/11/2023', 'a', 'b'), convert=conv))
##' }
##' x <- c(paste0('2023-03-0', 1:9), 'a', 'a', 'a', 'b')
##' y <- testCharDateTime(x, convert=TRUE)$x
##' describe(y) # note counts of special missing values a, b
testCharDateTime <- function(x, p=0.5, m=0, convert=FALSE, existing=FALSE) {
ret <- function(type, x, numna=0)
if(convert) list(type=type, x=x, numna=numna) else type
cl <- class(x)
if(any(cl %in% c('POSIXt', 'POSIXct', 'chron'))) return(ret('datetime', x))
if(any(cl %in% c('Date', 'dates'))) return(ret('date', x))
if(any(cl == 'times')) return(ret('time', x))
if(! is.character(x)) return(ret('notcharacter', x))
if(existing) return('character')
y <- x
y[trimws(y) == ''] <- NA
x <- x[! is.na(x)]
if(! length(x)) return(ret('character', y, numna=length(y)))
if(m > 0) {
p <- 1.0
ischar <- grep('[a-z,A-Z,_]', x)
uchar <- unique(x[ischar])
lu <- length(uchar)
if(lu) {
if(lu > m) return(ret('character', y)) # more than m unique char values
x <- x[- ischar] # values with no alpha characters
if(! length(x)) return(ret('character', y))
}
}
# If p=1.0 all non-NA non-blank values must be legal date/time
# Minimum length for those is 5 characters, max is 19
# Save some time for this case
if(p == 1.0) {
len <- range(nchar(x))
if(len[1] < 5 | len[2] > 19) return(ret('character', y))
}
rex <- c(
'^[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]$',
'^[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]$',
'^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$',
'^[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}$',
'^[0-3][0-9]-(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)-[0-9]{4}$',
'^[0-2][0-9]:[0-5][0-9]:[0-5][0-9]$',
'^[0-2][0-9]:[0-5][0-9]$' )
types <- c('datetime', 'datetime', 'date', 'date', 'date', 'time', 'time')
nx <- length(x)
for(i in 1 : 6) {
ty <- types[i]
rx <- rex[i]
ngood <- sum(grepl(rx, x, ignore.case=TRUE))
if(ngood / nx >= p) {
if(! convert) return(ty)
nna <- ! is.na(y)
good <- grepl(rx, y, ignore.case=TRUE)
bad <- nna & ! good
j <- nna & good
z <- rep(NA, length(y))
fm <- c('', '', '%Y-%m-%d', '%m/%d/%Y', '%d-%b-%Y', '')[i]
if(i > 4) if(! requireNamespace('chron', quietly=TRUE))
stop('chron package is required for pure time variables')
sec <- if(i %in% c(2, 7)) ':00' else ''
zj <- switch(ty,
datetime = as.POSIXct(y[j]),,
date = as.Date(y[j], format=fm),
time = chron::chron(times.=paste0(y[j], sec)) )
z[j] <- zj
class(z) <- class(zj)
lab <- label(y)
if(lab != '') label(z) <- lab
un <- units(y)
if(un != '') units(z) <- un
if(any(bad)) {
class(z) <- c('special.miss', class(z))
attr(z, 'special.miss') <- list(codes=y[bad], obs=which(bad))
}
return(list(type=ty, x=z, numna=nx - ngood))
}
}
ret('character', y)
}
Hmisc/R/dotchart3.s 0000644 0001762 0000144 00000044574 14410372744 013621 0 ustar ligges users dotchart3 <-
function (x, labels = NULL, groups = NULL, gdata = NULL, cex = par("cex"),
pch = 21, gpch = pch, bg = par("bg"), color = par("fg"),
gcolor = par("fg"), lcolor = "gray",
xlim = range(c(x, gdata), na.rm=TRUE), main = NULL,
xlab = NULL, ylab = NULL, auxdata=NULL, auxtitle=NULL,
auxgdata=NULL, axisat=NULL, axislabels=NULL,
cex.labels = cex, cex.group.labels = cex.labels*1.25,
cex.auxdata = cex, groupfont=2,
auxwhere=NULL, height=NULL, width=NULL, ...)
{
opar <- par("mai", "mar", "cex", "yaxs")
on.exit(par(opar))
par(cex = cex, yaxs = "i")
if (! is.numeric(x))
stop("'x' must be a numeric vector or matrix")
x <- as.matrix(x)
n <- nrow(x)
nc <- ncol(x)
pch <- rep(pch, length.out=nc)
if(! length(labels)) labels <- rownames(x)
if(! length(labels)) stop('labels not defined')
if(length(groups)) groups <- as.factor(groups)
glabels <- levels(groups)
plot.new()
linch <- max(strwidth(labels, "inch"), na.rm = TRUE)
if (! length(glabels)) {
ginch <- 0
goffset <- 0
}
else {
ginch <- max(strwidth(glabels, "inch", cex=cex.group.labels,
font=groupfont),
na.rm = TRUE)
goffset <- 0.4
}
if(length(labels) + length(glabels) > 0) {
nmai <- par("mai")
nmai[2L] <- max(nmai[2L], nmai[4L] + max(linch + goffset, ginch) + 0.1)
## Run strwidth separately because on of the 3 variables might
## be an expression, in which case an overall c(...) would combine the
## widths of character vectors
if(length(auxdata) + length(auxgdata) > 0)
nmai[4L] <- .2 + 1.1 * max(strwidth(auxtitle, 'inch', cex=cex.auxdata),
strwidth(auxdata, 'inch', cex=cex.auxdata),
strwidth(auxgdata, 'inch', cex=cex.auxdata))
par(mai = nmai)
}
if (! length(groups)) {
o <- n:1L
y <- o
ylim <- c(.5, n + .5)
x <- x[o, , drop=FALSE]
labels <- labels[o]
if(length(auxdata)) auxdata <- auxdata[o]
}
else {
# Added: For each group reverse order of data so plotting will
# put first levels at top
o <- sort.list(as.numeric(groups), decreasing = TRUE)
groups <- groups[o]
# for(g in levels(groups)) {
# i <- groups == g
# o[i] <- rev(o[i])
# }
x <- x[o, , drop=FALSE] # ascending within region
labels <- labels[o]
if(length(auxdata)) auxdata <- auxdata[o]
# End added
# groups <- groups[o] (put earlier)
color <- rep(color, length.out = length(groups))[o]
lcolor <- rep(lcolor, length.out = length(groups))[o]
offset <- cumsum(c(0, diff(as.numeric(groups)) != 0))
y <- 1L:n + 2 * offset
ylim <- range(0.5, y + 1.5) # range(0, y + 2)
}
plot.window(xlim = xlim, ylim = ylim, log = "")
lheight <- par("csi")
if(length(labels)) {
linch <- max(strwidth(labels, "inch", cex=cex.labels), na.rm = TRUE)
loffset <- (linch + 0.1) / lheight
# was line=loffset
mtext(labels, side = 2, line = .1*loffset, at = y, adj = 1,
col = color, las = 2, cex = cex.labels, ...)
}
abline(h = y, lty = "dotted", col = lcolor)
if(length(auxtitle)) {
upedge <- par('usr')[4]
outerText(auxtitle,
upedge + strheight(auxtitle, cex=cex) / 2,
cex=cex)
}
gpos <- if(length(groups))
rev(cumsum(rev(tapply(groups, groups, length)) + 2) - 1)
if(length(auxdata) + length(auxgdata) > 0)
outerText(c(auxdata, auxgdata), c(y, if(length(auxgdata)) gpos),
cex=cex.auxdata)
for(i in 1:nc)
points(x[,i], y, pch = pch[i], col = color, bg = bg)
if(length(groups)) {
ginch <- max(strwidth(glabels, "inch", font=groupfont,
cex=cex.group.labels),
na.rm = TRUE)
goffset <- (max(linch + 0.2, ginch, na.rm = TRUE) + 0.1)/lheight
mtext(glabels, side = 2, line = .2, at = gpos, adj = 1, # was adj=0
col = gcolor, las = 2, cex = cex.group.labels, font=groupfont, ...)
if (length(gdata)) {
abline(h = gpos, lty = "dotted")
if(is.matrix(gdata))
for(j in 1:ncol(gdata))
points(gdata[, j], gpos, pch=gpch[j], col=gcolor, bg=bg, ...)
else
points(gdata, gpos, pch = gpch, col = gcolor, bg = bg,
...)
}
}
if(length(axisat)) axis(1, at=axisat, labels=axislabels)
else
axis(1)
box()
title(main = main, xlab = xlab, ylab = ylab, ...)
invisible()
}
dotchartp <-
function (x, labels = NULL, groups = NULL, gdata = NULL,
xlim = range(c(x, gdata), na.rm=TRUE), main=NULL,
xlab = NULL, ylab = '', auxdata=NULL, auxtitle=NULL,
auxgdata=NULL, auxwhere=c('right', 'hover'),
symbol='circle', col=colorspace::rainbow_hcl,
legendgroup=NULL,
axisat=NULL, axislabels=NULL, sort=TRUE, digits=4, dec=NULL,
height=NULL, width=700, layoutattr=FALSE, showlegend=TRUE,
...)
{
if (!requireNamespace("plotly", quietly=TRUE))
stop("This function requires the 'plotly' package.")
auxwhere <- match.arg(auxwhere)
fmt <- if(length(dec)) function(x) format(round(x, dec))
else
function(x) format(x, digits=digits)
mu <- markupSpecs$html
lspace <- mu$lspace
if (! is.numeric(x))
stop("'x' must be a numeric vector or matrix")
x <- as.matrix(x)
n <- nrow(x)
nc <- ncol(x)
symbol <- rep(symbol, length.out=nc)
col <- if(length(col)) {
if(! is.function(col)) col
else col(ncol(x))
}
col <- rep(col, length.out=nc)
if(length(gdata)) {
gdata <- as.matrix(gdata)
if(ncol(gdata) != nc) stop('gdata must have same columns as x')
}
if(! length(labels)) labels <- rownames(x)
if(! length(labels)) stop('labels not defined')
if(length(groups)) groups <- as.factor(groups)
glabels <- levels(groups)
groups.pres <- length(groups) > 0
if(groups.pres && is.character(sort))
warning('specifying sort does not makes sense with groups present')
if(! groups.pres) {
y <- n : 1
if(is.logical(sort)) sort <- if(sort) 'descending' else 'none'
if(sort != 'none') {
o <- order(if(sort == 'ascending') x[, 1] else - x[, 1])
x <- x[o, , drop=FALSE]
labels <- labels[o]
if(length(auxdata))
auxdata <- if(is.matrix(auxdata))
auxdata[o,, drop=FALSE] else auxdata[o]
}
} else { # groups present
if(is.character(sort) || sort) {
o <- if(is.character(sort)) {
if(sort == 'ascending') order(x[, 1])
else
order(-x[, 1])
} else order(as.integer(groups)) ###, as.integer(labels))
groups <- groups[o]
x <- x[o, , drop=FALSE]
labels <- labels[o]
if(length(auxdata))
auxdata <- if(is.matrix(auxdata))
auxdata[o,, drop=FALSE] else auxdata[o]
}
lgroups <- Lag(as.character(groups))
lgroups[1] <- 'NULL'
first.in.group <- groups != lgroups
y <- cumsum(1 + 1.5 * first.in.group)
yg <- y[first.in.group] - 1
y <- -y
yg <- -yg
} # end groups present
X <- x[, 1]
tly <- y
auxd <- NULL
auxh <- auxwhere == 'hover'
auxt <- if(length(auxtitle) && auxtitle != '')
# ifelse(auxh, paste0(auxtitle, ' '), paste0(auxtitle, ':'))
paste0(auxtitle, ':')
else ''
if(auxh)
auxd <- if(length(auxdata))
paste0(auxt,
if(is.matrix(auxdata)) auxdata[, 1] else auxdata)
else rep('', length(X))
if(length(gdata) || (auxh && length(auxgdata))) {
X <- c(X, if(length(gdata)) gdata[, 1] else rep(NA, length(auxgdata)))
tly <- c(tly, yg)
if(auxh) auxd <- c(auxd,
if(length(auxgdata))
paste0(auxt, # was lspace after auxt,
if(is.matrix(auxgdata)) auxgdata[, 1]
else auxgdata)
else rep('', length(yg)))
}
nx <- if(nc == 1) '' else colnames(x)[1]
ht <- if(nx == '') fmt(X)
else paste(nx, ' ', fmt(X))
if(auxh && any(auxd != '')) ht <- paste0(ht, ' ', auxd) # was lspace
d <- data.frame(X, y=tly, ht=ht)
if(length(height) && height == 'auto')
height <- plotlyParm$heightDotchart(n)
auto <- .Options$plotlyauto
if(length(auto) && auto) height <- width <- NULL
p <- plotly::plot_ly(d, x=~ X, y=~ y, mode='markers', type='scatter',
marker=list(symbol=symbol[1], color=col[1]),
text = ~ ht,
hoverinfo = 'text',
name=nx,
legendgroup=if(length(legendgroup)) legendgroup[1],
width = width, height=height)
if(nc > 1)
for(i in 2 : nc) {
X <- x[, i]
tly <- y
if(length(gdata)) {
X <- c(X, gdata[, i])
tly <- c(tly, yg)
}
ax <- if(length(auxdata) && is.matrix(auxdata)) auxdata[, i] else ''
d <- data.frame(X=X, y=tly,
ht=paste0(colnames(x)[i], ' ',
fmt(X), lspace, ax))
p <- plotly::add_markers(p, data=d, x=~ X, y=~ y, #mode='markers',
marker=list(symbol=symbol[i], color=col[i]),
text = ~ ht, hoverinfo='text',
legendgroup=if(length(legendgroup)) legendgroup[i],
name=colnames(x)[i])
}
dx <- 0.1 * diff(xlim)
lenaux <- length(auxdata) + length(auxgdata)
if(auxwhere == 'right' && lenaux) {
yb <- tb <- NULL
if(length(auxdata)) {
yb <- y
tb <- auxdata
}
if(groups.pres && length(auxgdata)) {
yb <- c(yb, yg)
tb <- c(tb, auxgdata)
}
if(length(auxtitle)) {
yb <- c(yb, min(yb) - 2)
tb <- c(tb, auxtitle)
}
if(length(auxgdata)) {
yb <- c(yb, yg)
tb <- c(tb, paste('', auxgdata, '', sep=''))
}
z <- data.frame(xb=xlim[2] + dx, yb, tb)
p <- plotly::add_text(p, data=z, x=~ xb, y=~ yb, text=~ tb, # mode='text',
textposition='left',
textfont=list(size=10), hoverinfo='none', name='')
}
if(length(axisat)) {tlx <- axisat; ttx <- axislabels}
else {
tlx <- pretty(xlim, 10)
tlxs <- pretty(xlim, 5)
ttx <- format(tlx)
for(j in 1 : length(tlx))
if(! any(abs(tlxs - tlx[j]) < 1e-10)) ttx[j] <- ''
}
tly <-y
tty <- as.character(labels)
if(groups.pres) {
tly <- c(tly, yg)
tty <- c(tty, paste('', glabels, '', sep=''))
}
if(! length(ylab)) ylab <- ''
tty <- ifelse(nchar(tty) >= 40, mu$smaller2(tty),
ifelse(nchar(tty) > 20, mu$smaller(tty), tty))
leftmargin <- plotlyParm$lrmargin(tty)
rx <- if(auxwhere == 'right' && lenaux > 0) dx else dx / 2
ylim <- c(min(y) - .15, max(y) + 1.5)
lo <- list(title=main,
xaxis=list(title=xlab,
range=c(xlim[1] - 0.2 * dx,
xlim[2] + rx),
zeroline=FALSE,
tickvals=tlx, ticktext=ttx),
yaxis=list(title=ylab, range=ylim,
zeroline=FALSE,
tickvals=tly, ticktext=tty),
# width=width,
# height=if(length(height) && height == 'auto')
# plotlyParm$heightDotchart(n) else height,
autosize=(length(width) + length(height)) == 0,
margin=list(l=leftmargin, t=5),
showlegend=showlegend)
if(layoutattr) {
attr(p, 'layout') <- lo
return(p)
}
plotly::layout(p,
title = main,
xaxis = list(title=xlab,
range=c(xlim[1] - 0.2 * dx,
xlim[2] + rx),
zeroline=FALSE,
tickvals=tlx, ticktext=ttx),
yaxis = list(title=ylab, range=ylim,
zeroline=FALSE,
tickvals=tly, ticktext=tty),
# width = width,
# height= if(length(height) && height == 'auto')
# plotlyParm$heightDotchart(n) else height,
# autosize=(length(width) + length(height)) == 0,
margin = list(l=leftmargin, t=5),
legendgroup=legendgroup, showlegend = showlegend)
}
summaryD <- function(formula, data=NULL, fun=mean, funm=fun,
groupsummary=TRUE, auxvar=NULL, auxtitle='',
auxwhere=c('hover', 'right'),
vals=length(auxvar) > 0, fmtvals=format,
symbol=if(use.plotly) 'circle' else 21,
col=if(use.plotly) colorspace::rainbow_hcl else 1:10,
legendgroup=NULL,
cex.auxdata=.7, xlab=v[1], ylab=NULL,
gridevery=NULL, gridcol=gray(.95), sort=TRUE, ...) {
use.plotly <- grType() == 'plotly'
auxwhere <- match.arg(auxwhere)
if(! missing(fmtvals)) vals <- TRUE
data <- if(! length(data)) environment(formula)
else list2env(data, parent=environment(formula))
if(length(auxvar) && is.character(auxvar) && missing(auxtitle))
auxtitle <- auxvar
v <- all.vars(formula)
m <- length(v) - 1
yn <- v[1]; xn <- v[-1]
two <- length(xn) == 2
y <- get(yn, envir=data)
x1 <- get(xn[1], envir=data)
x2 <- if(two) get(xn[2], envir=data)
s <- summarize(y, if(two) llist(x1, x2) else llist(x1), fun,
type='matrix', keepcolnames=TRUE)
## if(is.matrix(s$y)) colnames(s$y) <- colnames(y)
cx1 <- if(is.factor(s$x1)) as.integer(s$x1)
else
s$x1
yy <- if(is.matrix(s$y)) s$y[, 1, drop=FALSE] else s$y
if(sort) s <- if(two) s[order(cx1, - yy), ] else s[order(- yy), ]
auxd <- function(z) {
sy <- z$y
if(length(auxvar)) {
if(! is.matrix(sy))
stop('auxvar is only used when fun returns > 1 statistic')
f <- if(vals) fmtvals(sy[, auxvar])
sy <- if(is.numeric(auxvar)) sy[, -auxvar, drop=FALSE]
else
sy[, setdiff(colnames(sy), auxvar), drop=FALSE]
}
else
f <- if(vals) fmtvals(if(is.matrix(sy)) sy[, 1] else sy)
list(sy=sy, fval=f) # sy = remaining y, fval = formatted auxvar
}
z <- auxd(s)
if(two) {
if(groupsummary) {
s2 <- summarize(y, llist(x1), funm, type='matrix', keepcolnames=TRUE)
z2 <- auxd(s2)
}
z <- auxd(s)
col <- if(length(col)) {
if(! is.function(col)) col
else
col(if(is.matrix(z$sy)) ncol(z$sy) else 1)
}
## if already sorted (group variable order first) don't re-sort
## sort causes problems to dotchart3
res <- if(use.plotly)
dotchartp(z$sy, s$x2, groups=s$x1,
auxdata=z$fval, auxtitle=if(vals) auxtitle,
auxwhere=auxwhere,
cex.auxdata=cex.auxdata,
gdata =if(groupsummary) z2$sy,
auxgdata=if(groupsummary) z2$fval,
xlab=xlab, ylab=ylab, symbol=symbol, col=col,
legendgroup=legendgroup, sort=FALSE, ...)
else
dotchart3(z$sy, s$x2, groups=s$x1,
auxdata=z$fval, auxtitle=if(vals) auxtitle,
cex.auxdata=cex.auxdata,
gdata =if(groupsummary) z2$sy,
auxgdata=if(groupsummary) z2$fval,
xlab=xlab, ylab=ylab, pch=symbol, ...)
}
else
res <- if(use.plotly)
dotchartp(z$sy, s$x1, auxdata=z$fval,
auxtitle=if(vals) auxtitle,
auxwhere=auxwhere,
cex.auxdata=cex.auxdata, xlab=xlab, ylab=ylab,
symbol=symbol, col=col, legendgroup=legendgroup,
sort=FALSE, ...)
else
dotchart3(z$sy, s$x1, auxdata=z$fval,
auxtitle=if(vals) auxtitle, pch=symbol,
cex.auxdata=cex.auxdata, xlab=xlab, ylab=ylab, ...)
if(! use.plotly && length(gridevery)) {
xmin <- par('usr')[1]
xmin <- ceiling(xmin / gridevery) * gridevery
xmax <- if(length(xn) == 1) max(s$y, na.rm=TRUE)
else
max(c(s$y, s2$y), na.rm=TRUE)
abline(v=seq(xmin, xmax, by=gridevery), col=gridcol)
}
if(use.plotly) res else invisible(res)
}
summaryDp <-
function(formula,
fun=function(x) c(Mean=mean(x, na.rm=TRUE),
N=sum(! is.na(x))),
overall=TRUE, xlim=NULL, xlab=NULL,
data=NULL, subset=NULL, na.action=na.retain,
ncharsmax=c(50, 30), digits=4, ...) {
Y <- if(length(subset))
model.frame(formula, data=data, subset=subset, na.action=na.action)
else
model.frame(formula, data=data, na.action=na.action)
X <- Y[-1]
y <- Y[[1]]
swr <- function(w, ...)
sapply(strwrap(w, ..., simplify=FALSE),
function(x) paste(x, collapse=' '))
addbr <- markupSpecs$html$addBreak
width <- ncharsmax[1]; minbreak <- ncharsmax[2]
if(! length(xlab)) xlab <- swr(label(y, default=names(Y)[1]), width=width)
major <- minor <- ht <- character(0)
x <- numeric(0)
funlabs <- names(fun(y))
nx <- names(X)
if(overall) nx <- c(nx, 'Overall')
for(v in nx) {
if(v == 'Overall') {
by <- rep('Overall', length(y))
bylab <- 'Overall'
} else {
by <- X[[v]]
bylab <- addbr(label(by, default=v), minbreak=minbreak)
}
s <- summarize(y, by, fun)
i <- order(- s[, 2])
s <- s[i, ]
m <- s[, 2]
faux <- paste0(funlabs[1], ': ', format(m, digits=digits))
if(NCOL(s) > 2) {
j <- 0
aux <- s[-(1:2)]
for(a in names(aux)) {
j <- j + 1
faux <- paste0(faux, ' ', funlabs[j + 1], ': ',
format(aux[[a]], digits=digits))
}
}
major <- c(major, rep(bylab, length(m)))
minor <- c(minor, if(v == 'Overall') '' else as.character(s[, 1]))
ht <- c(ht, faux)
x <- c(x, unname(m))
}
if(! length(xlim)) {
r <- range(x)
xlim <- r + c(-1, 1) * diff(r) / 20
}
dotchartpl(x, major, minor, htext=ht, xlim=xlim, xlab=xlab, ...)
}
Hmisc/R/consolidate.s 0000644 0001762 0000144 00000001625 13215500012 014177 0 ustar ligges users #combine <- function(x, value, protect, ...) stop("combine() depricated due to naming conflict renamed consolidate()")
#'combine<-' <- function(x, protect, ..., value) stop("combine<-() depricated due to naming conflict renamed consolidate<-()")
consolidate <- function(x, value, protect, ...) {
UseMethod("consolidate")
}
'consolidate<-' <- function(x, protect=FALSE, ..., value)
consolidate(x, value, protect, ...)
consolidate.default <- function(x, value, protect=FALSE, ...) {
if(missing(x) || is.null(x))
x <- vector()
if(missing(value) || is.null(value))
value <- vector()
xNames <- names(x)
valueNames <- names(value)
if(is.null(xNames) || is.null(valueNames) || all(valueNames == "") ||
all(xNames == ""))
return(c(x, value))
vars <- intersect(xNames, valueNames[valueNames != ""])
if(!protect)
x[vars] <- value[vars]
c(x, value[!valueNames %in% vars])
}
Hmisc/R/multLines.s 0000644 0001762 0000144 00000004113 14400355516 013660 0 ustar ligges users multLines <- function(x, y, pos=c('left', 'right'),
col='gray', lwd=1, lty=1,
lwd.vert=.85, lty.vert=1, alpha=0.4,
grid=FALSE,
pobj=plotly::plot_ly(), xlim,
name=colnames(y)[1], legendgroup=name, showlegend=TRUE, ...) {
if(grid) sRequire('lattice')
pos <- match.arg(pos)
p <- ncol(y)
n <- nrow(y)
if(! is.matrix(y) || p == 1 || p %% 2 != 1)
stop('y must have 3, 5, 7, ... columns')
if(length(x) != n)
stop('length of x must match rows of y')
vcol <- adjustcolor(col, alpha.f=alpha)
pl <- grType() == 'plotly' && requireNamespace("plotly")
if(pl) {
pobj <- plotly::add_lines(pobj, data=data.frame(x=x, y=y[,1]),
x=~x, y=~y, color=I(col),
name=name, legendgroup=legendgroup,
showlegend=showlegend, ...)
xdel <- 0.005 * diff(xlim)
}
else if(grid) {
lattice::llines(x, y[, 1], col=col, lwd=lwd, lty=lty)
xdel <- unit(0.75, 'mm')
x <- unit(x, 'native')
gp <- gpar(col=vcol, lwd=lwd.vert, lty=lty.vert)
}
else {
lines(x, y[, 1], col=col, lwd=lwd, lty=lty)
xdel <- 0.005 * diff(par('usr')[1 : 2])
}
half <- (p - 1) / 2
x0 <- if(grid) unit(x, 'native') else x
for(i in 1 : half) {
i1 <- i + 1
i2 <- p - i + 1
x0 <- switch(pos, left = x0 - xdel, right = x0 + xdel)
tn <- paste0(colnames(y)[i1], ' - ', colnames(y)[i2])
if(pl) pobj <- plotly::add_segments(pobj,
data=data.frame(x0=x0, y0=y[, i1],
y1=y[, i2]),
x=~x0, y=~y0, xend=~x0, yend=~y1,
name=tn, legendgroup=tn, showlegend=showlegend,
col=I(vcol), ...)
else if(grid) grid.segments(x0, y[, i1], x0, y[, i2],
gp=gp, default.units='native')
else segments(x0, y[, i1], x0, y[, i2], col=vcol,
lty=lty.vert, lwd=lwd.vert)
}
if(pl) pobj else invisible(NULL)
}
Hmisc/R/qcrypt.r 0000644 0001762 0000144 00000014323 14656136535 013244 0 ustar ligges users ##' Store and Encrypt R Objects or Files or Read and Decrypt Them
##'
##' `qcrypt` is used to protect sensitive information on a user's computer or when transmitting a copy of the file to another R user. Unencrypted information only exists for a moment, and the encryption password does not appear in the user's script but instead is managed by the `keyring` package to remember the password across R sessions, and the `getPass` package, which pops up a password entry window and does not allow the password to be visible. The password is requested only once, except perhaps when the user logs out of their operating system session or reboots.
##'
##' The keyring can be bypassed and the password entered in a popup window by specifying `service=NA`. This is the preferred approach when sending an encrypted file to a user on a different computer.
##'
##' `qcrypt` writes R objects to disk in a temporary file using the `qs` package `qsave` function. The file is quickly encrypted using the `safer` package, and the temporary unencrypted `qs` file is deleted. When reading an encrypted file the process is reversed.
##'
##' To save an object in an encrypted file, specify the object as the first argument `obj` and specify a base file name as a character string in the second argument `base`. The full `qs` file name will be of the form `base.qs.encrypted` in the user's current working directory. To unencrypt the file into a short-lived temporary file and use `qs::qread` to read it, specify the base file name as a character string with the first argument, and do not specify the `base` argument.
##'
##' Alternatively, `qcrypt` can be used to encrypt or decrypt existing files of any type using the same password and keyring mechanism. The former is done by specifying `file` that does not end in `'.encrypted'` and the latter is done by ending `file` with `'.encrypted'`. When `file` does not contain a path it is assumed to be in the current working directory. When a file is encrypted the original file is removed. Files are decrypted into a temporary directory created by `tempdir()`, with the name of the file being the value of `file` with `'.encrypted'` removed.
##'
##' Interactive password provision works when running `R`, `Rscript`, `RStudio`, or `Quarto` but does not work when running `R CMD BATCH`. `getPass` fails under `RStudio` on Macs.
##'
##' See [R Workflow](https://hbiostat.org/rflow/fcreate.html#sec-fcreate-secure) for more information.
##' @title qcrypt
##' @param obj an R object to write to disk and encrypt (if `base` is specified) or the base file name to read and uncrypted (if `base` is not specified). Not used when `file` is given.
##' @param base base file name when creating a file. Not used when `file` is given.
##' @param service a fairly arbitrary `keyring` service name. The default is almost always OK unless you need to use different passwords for different files.
##' @param file full name of file to encrypt or decrypt
##' @return (invisibly) the full encrypted file name if writing the file, or the restored R object if reading the file. When decrypting a general file with `file=`, the returned value is the full path to a temporary file containing the decrypted data.
##' @author Frank Harrell
##' @md
##' @export
##' @examples
##' \dontrun{
##' # Suppose x is a data.table or data.frame
##' # The first time qcrypt is run with a service a password will
##' # be requested. It will be remembered across sessions thanks to
##' # the keyring package
##' qcrypt(x, 'x') # creates x.qs.encrypted in current working directory
##' x <- qcrypt('x') # unencrypts x.qs.encrypted into a temporary
##' # directory, uses qs::qread to read it, and
##' # stores the result in x
##' # Encrypt a general file using a different password
##' qcrypt(file='report.pdf', service='pdfkey')
##' # Decrypt that file
##' fi <- qcrypt(file='report.pdf.encrypted', service='pdfkey')
##' fi contains the full unencrypted file name which is in a temporary directory
##' # Encrypt without using a keyring
##' qcrypt(x, 'x', service=NA)
##' x <- qcrypt('x', service=NA)
##' }
qcrypt <- function(obj, base, service='R-keyring-service', file) {
if(! is.na(service) && ! requireNamespace('keyring', quietly=TRUE))
stop('you must install the keyring package to use qcrypt with service != NA')
if(! requireNamespace('getPass', quietly=TRUE))
stop('you must install the getPass package to use qcrypt')
if(! requireNamespace('qs', quietly=TRUE))
stop('you must install the qs package to use qcrypt')
if(! requireNamespace('safer', quietly=TRUE))
stop('you must install the safer package to use qcrypt')
if(is.na(service)) {
prompt <- if(! missing(base)) 'Define password for storing encrypted data: '
else 'Enter password previously used to store data: '
pw <- getPass::getPass(msg = prompt, noblank=TRUE)
} else {
pw <- tryCatch(keyring::key_get(service), error = function(...) '')
if(pw == '') {
prompt <- if(! missing(base)) 'Define password for storing encrypted data: '
else 'Enter password previously used to store data: '
pw <- getPass::getPass(msg = prompt, noblank=TRUE)
keyring::key_set_with_value(service, password=pw)
}
pw <- keyring::key_get(service)
}
tf <- tempfile()
if(! missing(file)) {
if(! grepl('\\.encrypted$', file)) { # encrypt
f <- paste0(file, '.encrypted')
unlink(f) # just in case; encrypt_file throws an error if file exists
safer::encrypt_file(file, key=pw, outfile=f)
unlink(file)
return(invisible(f))
} else { # decrypt
fout <- paste(tempdir(), sub('\\.encrypted$', '', basename(file)), sep='/')
safer::decrypt_file(file, key=pw, outfile=fout)
return(invisible(fout))
}
}
if(! missing(base)) { # base specified -> create file
qs::qsave(obj, tf)
f <- paste0(base, '.qs.encrypted')
unlink(f) # just in case
safer::encrypt_file(tf, key=pw, outfile=f)
unlink(tf) # quickly remove unencrypted file
return(invisible(f))
}
# base is missing and base file name is assumed to be in the first argument, obs
f <- paste0(obj, '.qs.encrypted')
safer::decrypt_file(f, key=pw, outfile=tf)
x <- qs::qread(tf)
unlink(tf) # quickly remove unencrypted file
x
}
Hmisc/R/makeNstr.s 0000644 0001762 0000144 00000000357 12243661443 013500 0 ustar ligges users makeNstr <- function(char, len) {
mapply(function(char, len) {
if(is.na(len)) {
'\n'
} else if(len == 0) {
''
} else {
paste(rep.int(x=char, times=len), collapse='')
}
}, char, len, USE.NAMES=FALSE)
}
Hmisc/R/stat-plsmo.r 0000644 0001762 0000144 00000007246 14401135001 014002 0 ustar ligges users #' Add a lowess smoother without counfidence bands.
#'
#' Automatically selects \code{iter=0} for \code{lowess} if \code{y} is binary, otherwise uses \code{iter=3}.
#'
#'
#' @param mapping,data,geom,position,show.legend,inherit.aes see ggplot2 documentation
#' @param span see \code{f} argument to \code{lowess}
#' @param fun a function to transform smoothed \code{y}
#' @param fullrange should the fit span the full range of the plot, or just
#' the data
#' @param n number of points to evaluate smoother at
#' @param na.rm If \code{FALSE} (the default), removes missing values with
#' a warning. If \code{TRUE} silently removes missing values.
#' @param ... other arguments are passed to smoothing function
#' @return a data.frame with additional columns
#' \item{y}{predicted value}
#' @seealso
#' \code{\link{lowess}} for \code{loess} smoother.
#' @export
#' @examples
#' \donttest{
#' require(ggplot2)
#' c <- ggplot(mtcars, aes(qsec, wt))
#' c + stat_plsmo()
#' c + stat_plsmo() + geom_point()
#'
#' c + stat_plsmo(span = 0.1) + geom_point()
#'
#' # Smoothers for subsets
#' c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl)
#' c + stat_plsmo() + geom_point()
#' c + stat_plsmo(fullrange = TRUE) + geom_point()
#'
#' # Geoms and stats are automatically split by aesthetics that are factors
#' c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl)))
#' c + stat_plsmo() + geom_point()
#' c + stat_plsmo(aes(fill = factor(cyl))) + geom_point()
#' c + stat_plsmo(fullrange=TRUE) + geom_point()
#'
#' # Example with logistic regression
#' data("kyphosis", package="rpart")
#' qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) + stat_plsmo()
#' }
stat_plsmo <- function (mapping = NULL, data = NULL, geom = "smooth",
position = "identity",
n = 80, fullrange = FALSE, span=2/3, fun=function(x) x,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
stat = StatPlsmo, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(
n = n,
fullrange = fullrange,
span = span,
fun = fun,
na.rm = na.rm,
...)
)
}
StatPlsmo <- ggplot2::ggproto("StatPlsmo", ggplot2::Stat,
required_aes = c("x", "y"),
setup_data = function(data, params) {
if (!requireNamespace("plyr", quietly = TRUE))
stop("This function requires the 'plyr' package.")
rows <- plyr::daply(data, "group", function(df) length(unique(df$x)))
if (all(rows == 1) && length(rows) > 1) {
message("geom_plsmo: Only one unique x value each group.",
"Maybe you want aes(group = 1)?")
return(data.frame())
}
data
},
compute_group = function(., data, scales, n=80, span=2/3, fun=function(x) x,
fullrange=FALSE, xseq = NULL, na.rm = FALSE) {
data <- remove_missing(data, na.rm, c("x", "y"), name="stat_plsmo")
if (length(unique(data$x)) < 2) {
# Not enough data to perform fit
return(data.frame())
}
if (is.null(xseq)) {
if (is.integer(data$x)) {
if (fullrange) {
xseq <- scales$x$dimension()
} else {
xseq <- sort(unique(data$x))
}
} else {
if (fullrange) {
range <- scales$x$dimension()
} else {
range <- range(data$x, na.rm = TRUE)
}
xseq <- seq(range[1], range[2], length.out = n)
}
}
n_y <- length(unique(data$y[!is.na(data$y)]))
z <- lowess(data$x, data$y,
iter = if (n_y < 3) 0 else 3,
f = span
)
z <- approx(z, xout = xseq)
z$y <- fun(z$y)
as.data.frame(z)
}
)
Hmisc/R/histbackback.s 0000644 0001762 0000144 00000003652 13361120173 014316 0 ustar ligges users histbackback <-
function(x, y, brks = NULL, xlab = NULL, axes = TRUE, probability = FALSE,
xlim = NULL, ylab='', ...)
{
if(length(xlab))
xlab <- rep(xlab, length = 2)
if(is.list(x))
{
namx <- names(x)
y <- x[[2]]
if(!length(xlab))
{
if(length(namx))
xlab <- namx[1:2]
else
{
xlab <- deparse(substitute(x))
xlab <- paste(xlab, c("x", "y"), sep = "$")
}
}
x <- x[[1]]
}
else if(!length(xlab))
xlab <- c(deparse(substitute(x)), deparse(substitute(y)))
if(!length(brks))
brks <- hist(c(x, y), plot = FALSE)$breaks
ll <- hist(x, breaks = brks, plot = FALSE)
rr <- hist(y, breaks = brks, plot = FALSE)
if(probability)
{
ll$counts <- ll$density
rr$counts <- rr$density
}
if(length(xlim) == 2)
xl <- xlim
else
{
xl <- pretty(range(c( - ll$counts, rr$counts)))
xl <- c(xl[1], xl[length(xl)])
}
if(length(ll$counts) > 0)
{
barplot(-ll$counts, xlim=xl, space=0,
horiz=TRUE, axes=FALSE, col=0, ...)
par(new = TRUE)
}
if(length(rr$counts) > 0)
barplot(rr$counts, xlim=xl, space=0,
horiz=TRUE, axes=FALSE, col=0, ...)
if(axes)
{
mgp.axis(1, at=pretty(xl), labels=format(abs(pretty(xl))))
del <- (brks[2]-brks[1] - (brks[3]-brks[2]))/2
brks[1] <- brks[1] + del
brks[-1] <- brks[-1] - del
at <- 0 : (length(brks) - 1)
pb <- pretty(brks)
atpb <- approxExtrap(brks, at, xout=pb)$y
mgp.axis(2, at=atpb, labels=format(pb))
title(xlab = xlab[1], adj = (-0.5 * xl[1])/( - xl[1] + xl[2]))
title(xlab = xlab[2], adj = (-xl[1] + 0.5 * xl[2])/(-xl[1] + xl[2]))
if(ylab!='') title(ylab=ylab)
}
abline(v = 0)
box()
invisible(list(left = ll$counts, right = rr$counts, breaks = brks))
}
Hmisc/R/strgraphwrap.s 0000644 0001762 0000144 00000005760 12243661443 014443 0 ustar ligges users strgraphwrap <-
function (x, width = 0.9 * getOption("width"),
indent = 0, exdent = 0,
prefix = "", simplify = TRUE, units='user', cex=NULL)
{
if (!is.character(x))
x <- as.character(x)
spc.len <- strwidth(" ", units=units, cex=cex)
prefix.len <- strwidth(prefix, units = units, cex=cex)
indentString <- paste(rep.int(" ", indent), collapse = "")
indent <- indent * spc.len
exdentString <- paste(rep.int(" ", exdent), collapse = "")
exdent <- exdent * spc.len
y <- list()
z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]")
for (i in seq_along(z)) {
yi <- character(0)
for (j in seq_along(z[[i]])) {
words <- z[[i]][[j]]
nc <- strwidth(words, units=units, cex=cex)
if (any(is.na(nc))) {
nc0 <- strwidth(words, units=units, cex=cex)
nc[is.na(nc)] <- nc0[is.na(nc)]
}
if (any(nc == 0)) {
zLenInd <- which(nc == 0)
zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$",
words) + 1))]
if (length(zLenInd) > 0) {
words <- words[-zLenInd]
nc <- nc[-zLenInd]
}
}
if (length(words) == 0) {
yi <- c(yi, "", prefix)
next
}
currentIndex <- 0
lowerBlockIndex <- 1
upperBlockIndex <- integer(0)
lens <- cumsum(nc + spc.len)
first <- TRUE
maxLength <- width - prefix.len -
indent
while (length(lens) > 0) {
k <- max(sum(lens <= maxLength), 1)
if (first) {
first <- FALSE
maxLength <- maxLength + indent - exdent
}
currentIndex <- currentIndex + k
if (nc[currentIndex] == 0)
upperBlockIndex <- c(upperBlockIndex, currentIndex -
1)
else upperBlockIndex <- c(upperBlockIndex, currentIndex)
if (length(lens) > k) {
if (nc[currentIndex + 1] == 0) {
currentIndex <- currentIndex + 1
k <- k + 1
}
lowerBlockIndex <- c(lowerBlockIndex, currentIndex +
1)
}
if (length(lens) > k)
lens <- lens[-(1:k)] - lens[k]
else lens <- NULL
}
nBlocks <- length(upperBlockIndex)
s <- paste(prefix, c(indentString, rep.int(exdentString,
nBlocks - 1)), sep = "")
for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]],
collapse = " "), sep = "")
yi <- c(yi, s, prefix)
}
y <- if (length(yi))
c(y, list(yi[-length(yi)]))
else c(y, "")
}
if (simplify)
y <- unlist(y)
y
}
Hmisc/R/valueTags.s 0000644 0001762 0000144 00000003641 12250441743 013643 0 ustar ligges users ## $Id$
.valueTagAttrs <- c(label="label", units="units", name="shortlabel")
valueTags <- function(x)
attributes(x)[names(attributes(x)) %in% .valueTagAttrs]
"valueTags<-" <- function(x, value) {
if(is.null(value) || length(value) == 0) {
attributes(x)[names(attributes(x)) %in% .valueTagAttrs] <- NULL
class(x) <- class(x)[class(x) != 'labelled']
return(x)
}
if(!is.list(value)) {
stop("list must be a named list of valueTags")
}
value[(!names(value) %in% .valueTagAttrs) |
unlist(lapply(value, is.null))] <- NULL
if(length(value) == 0) {
attributes(x)[names(attributes(x)) %in% .valueTagAttrs] <- NULL
class(x) <- class(x)[class(x) != 'labelled']
return(x)
}
attributes(x)[setdiff(names(attributes(x))[names(attributes(x)) %in%
.valueTagAttrs],
names(value))] <- NULL
consolidate(attributes(x)) <- value
if(all(class(x) != 'labelled'))
class(x) <- c('labelled', class(x))
return(x)
}
valueLabel <- function(x)
attr(x, 'label')
"valueLabel<-" <- function(x, value) {
if(!is.character(value) || length(value) != 1)
stop("value label must be a character vector of length 1")
attr(x, 'label') <- value
class(x) <- c('labelled', class(x)[class(x) != 'labelled'])
return(x)
}
valueUnit <- function(x)
attr(x, 'units')
"valueUnit<-" <- function(x, value) {
if(!is.character(value) || length(value) != 1)
stop("value unit must be a character vector of length 1")
attr(x, 'units') <- value
class(x) <- c('labelled', class(x)[class(x) != 'labelled'])
return(x)
}
valueName <- function(x)
attr(x, 'valueName')
"valueName<-" <- function(x, value) {
if(!is.character(value) || length(value) != 1)
stop("value name must be a character vector of length 1")
attr(x, 'valueName') <- value
class(x) <- c('labelled', class(x)[class(x) != 'labelled'])
return(x)
}
Hmisc/R/latexDotchart.s 0000644 0001762 0000144 00000013373 12723405372 014524 0 ustar ligges users latexDotchart <-
function(data, labels, groups = NULL, gdata = NA,
xlab = "", auxdata, auxgdata=NULL, auxtitle,
w=4, h=4, margin, lines = TRUE, dotsize = .075, size='small',
size.labels = 'small', size.group.labels = 'normalsize',
ttlabels = FALSE, sort.=TRUE, xaxis=TRUE, lcolor='gray',
...)
{
txt <- function(x, y, s, size=NULL, just=c('c','l','r'), tt=FALSE) {
just <- match.arg(just)
s <- latexTranslate(s)
n <- max(length(x), length(y), length(s))
x <- rep(x, length.out=n)
y <- rep(y, length.out=n)
s <- rep(s, length.out=n)
z <- character(n)
if(tt) s <- paste('\\texttt{', s, '}', sep='')
if(length(size)) s <- paste('\\', size, ' ', s, sep='')
for(i in 1:n)
z[i] <- sprintf('\\put(%g,%g){\\makebox(.001,.001)[%s]{%s}}',
x[i], y[i], just, s[i])
z
}
ln <- function(x1, y1, x2, y2, color='black') {
n <- max(length(x1), length(x2), length(y1), length(y2))
x1 <- rep(x1, length.out=n)
y1 <- rep(y1, length.out=n)
x2 <- rep(x2, length.out=n)
y2 <- rep(y2, length.out=n)
z <- character(n)
for(i in 1:n)
z[i] <- if(x1[i] == x2[i])
sprintf('\\put(%g,%g){\\line(0,%g){%g}}', x1[i], y1[i],
1*(y2[i] >= y1[i]) - 1*(y2[i] < y1[i]), abs(y2[i]-y1[i]))
else if(y1[i] == y2[i])
sprintf('\\put(%g,%g){\\line(%g,0){%g}}', x1[i], y1[i],
1*(x2[i] >= x1[i]) - 1*(x2[i] < x1[i]), abs(x2[i]-x1[i]))
else
sprintf('\\drawline(%g,%g)(%g,%g)',
x1[i], y1[i], x2[i], y2[i])
if(color != 'black')
z <- c(if(color == 'gray') '\\color[gray]{0.8}' else
sprintf('\\color{%s}', color),
z, '\\color{black}')
z
}
## Approximate length in inches of longest char. string
acl <- function(s) 0.09 * max(nchar(s))
f <- sprintf
if(size.labels == size) size.labels <- NULL
if(size.group.labels == size) size.group.labels <- NULL
z <- c('\\setlength{\\unitlength}{1in}',
f('\\begin{picture}(%g,%g)', w, h),
f('\\%s', size))
ndata <- length(data)
if(missing(labels)) {
if(length(names(data)))
labels <- names(data)
else labels <- paste("#", seq(along = ndata))
}
else labels <- rep(as.character(labels), length = ndata)
if(missing(groups)) {
glabels <- NULL
gdata <- NULL
if(sort.) {
ord <- order(-data)
data <- data[ord]
labels <- labels[ord]
if(! missing(auxdata)) auxdata <- auxdata[ord]
}
} else {
if(! sort.) {
##assume data sorted in groups, but re-number groups
##to be as if groups given in order 1,2,3,...
ug <- unique(as.character(groups))
groups <- factor(as.character(groups), levels=ug)
}
groups <- unclass(groups)
glabels <- levels(groups)
gdata <- rep(gdata, length = length(glabels))
ord <- if(sort.) order(groups, -data) else
order(groups, seq(along = groups))
groups <- groups[ord]
data <- data[ord]
labels <- labels[ord]
if(! missing(auxdata)) auxdata <- auxdata[ord]
}
alldat <- c(data, gdata)
if(! missing(auxdata)) auxdata <- format(c(auxdata, auxgdata))
alllab <- c(labels, glabels)
## set up margins and user coordinates, draw box
xl <- range(p <- pretty(alldat))
yl <- c(1, length(alldat))
if(missing(margin))
margin <- c(acl(alllab),
ifelse(xlab == '', .2, .4),
ifelse(missing(auxdata), 0, acl(auxdata)),
ifelse(missing(auxtitle), 0, .1))
xt <- function(x) round((w - sum(margin[c(1,3)]))*(x - xl[1])/diff(xl) +
margin[1], 5)
yt <- function(y) round((h - sum(margin[c(2,4)]))*(y - yl[1])/diff(yl) +
margin[2], 5)
## \color screws up line and circle placement if first multiputlist
## and put occur after \color
if(xaxis) {
z <- c(z, paste(f('\\multiputlist(%g,%g)(%g,%g){',
xt(xl[1]), yt(yl[1]) - .15, diff(xt(p[1:2])), 0),
paste(p, collapse=','), '}', sep=''))
z <- c(z, ln(xt(p), yt(yl[1]) - 0.05, xt(p), yt(yl[1])))
if(xlab != '')
z <- c(z, txt(xt(xl[1] + diff(xl)/2), .1, xlab))
}
z <- c(z, ln(xt(xl), yt(yl[1]), xt(xl), yt(yl[2])),
ln(xt(xl[1]), yt(yl), xt(xl[2]), yt(yl)))
den <- ndata + 2 * length(glabels) + 1
delt <- ( - (yl[2] - yl[1]))/den
ypos <- seq(yl[2], by = delt, length = ndata)
if(! missing(groups)) {
ypos1 <- ypos + 2 * delt * (if(length(groups)>1)
cumsum(c(1, diff(groups) > 0))
else 1)
diff2 <- c(3 * delt, diff(ypos1))
ypos2 <- ypos1[abs(diff2 - 3 * delt) < abs(0.001 * delt)] -
delt
ypos <- c(ypos1, ypos2) - delt
}
##put on labels and data
ypos <- ypos + delt
nongrp <- 1:ndata
if(lines)
z <- c(z, ln(xt(xl[1]), yt(ypos[nongrp]), xt(xl[2]), yt(ypos[nongrp]),
color=lcolor))
for(i in seq(along = alldat))
if(! is.na(alldat[i] + ypos[i]))
z <- c(z, f('\\put(%g,%g){\\circle*{%g}}',
xt(alldat[i]), yt(ypos[i]), dotsize))
if(! missing(auxdata)) {
z <- c(z, txt(w - 0.02, yt(ypos[nongrp]), auxdata,
size=size.labels, just='r'))
if(! missing(auxtitle))
z <- c(z, txt(w - 0.02, yt(yl[2]) + 0.1, auxtitle,
size=size.labels, just='r'))
}
labng <- alllab[nongrp]
yposng <- ypos[nongrp]
z <- c(z, txt(margin[1] - 0.05, yt(yposng), labng,
size=size.labels, just='r', tt=ttlabels))
if(! missing(groups))
z <- c(z, txt(margin[1] - 0.05, yt(ypos[-nongrp]), alllab[-nongrp],
size=size.group.labels, just='r'))
z <- c(z, '\\end{picture}')
z
}
Hmisc/R/biVar.s 0000644 0001762 0000144 00000017667 12604551203 012764 0 ustar ligges users biVar <- function(formula, statinfo, data=NULL, subset=NULL,
na.action=na.retain, exclude.imputed=TRUE, ...)
{
call <- match.call()
x <- do.call('model.frame',
list(formula, data=data, subset=subset, na.action=na.action))
nam <- names(x); yname <- nam[1]
y <- x[[1]]
ylabel <- label(y)
x <- x[-1]
xlabel <- sapply(x, label)
m <- ncol(x)
statnames <- statinfo$names
stats <- matrix(NA, nrow=m, ncol=length(statnames),
dimnames=list(names(x), statnames))
nmin <- statinfo$nmin
fun <- statinfo$fun
N <- integer(m)
yna <- if(is.matrix(y))is.na(y %*% rep(1,ncol(y))) else is.na(y)
for(i in 1:m) {
w <- x[[i]]
j <- !(yna | is.na(w))
if(exclude.imputed) j <- j & !(is.imputed(w) | is.imputed(y))
yy <- if(is.matrix(y)) y[j,,drop=FALSE] else y[j]
w <- w[j]
N[i] <- length(w)
stats[i,] <- if(N[i] >= nmin) fun(w, yy, ...) else
rep(NA, length(statnames))
}
stats <- cbind(stats, n=N)
structure(stats, class='biVar', yname=yname,
ylabel=ylabel, xlabel=xlabel,
statinfo=statinfo, call=call)
}
print.biVar <- function(x, ...) {
info <- attr(x, 'statinfo')
yname <- attr(x, 'yname')
cat('\n', info$title, ' Response variable:', yname, '\n\n', sep='')
dig <- c(info$digits,0)
for(i in 1:ncol(x))
x[,i] <- round(x[,i],dig[i])
attr(x,'yname') <- attr(x, 'statinfo') <- attr(x, 'call') <-
attr(x, 'ylabel') <- attr(x, 'xlabel') <- class(x) <- NULL
print(x)
invisible()
}
plot.biVar <- function(x,
what=info$defaultwhat,
sort.=TRUE,
main, xlab,
vnames=c('names','labels'), ...) {
vnames <- match.arg(vnames)
yname <- attr(x, 'yname')
ylabel <- attr(x, 'ylabel')
if(vnames=='labels' && ylabel!='') yname <- sedit(ylabel, ' ', '~')
xlabel <- attr(x, 'xlabel')
info <- attr(x, 'statinfo')
aux <- info$aux
auxlabel <- info$auxlabel
if(!length(auxlabel)) auxlabel <- aux
i <- match(what, info$names)
if(is.na(i)) stop(paste('what must be one of',
paste(info$names,collapse=' ')))
if(missing(xlab))
xlab <- info$rxlab[i]
if(missing(main)) main <-
parse(text=paste(as.character(info$rmain),'~~~~Response:',
yname,sep=''))
auxtitle <- 'N'; auxdata <- format(x[,'n'])
if(length(aux)) {
auxtitle <- paste('N', auxlabel, sep=' ')
auxdata <- paste(format(x[,'n']), format(x[,aux]))
}
stat <- x[,what]
if(vnames=='labels')
names(stat) <- ifelse(xlabel=='', names(stat), xlabel)
if(sort.) {
i <- order(stat)
stat <- stat[i]
auxdata <- auxdata[i]
}
dotchart3(stat, auxdata=auxdata,
xlab=xlab, auxtitle=auxtitle,
main=main, ...)
invisible()
}
chiSquare <- function(formula, data=NULL, subset=NULL, na.action=na.retain,
exclude.imputed=TRUE, ...) {
g <- function(x, y, minlev=0, g=3) {
if(minlev) y <- combine.levels(y, minlev=minlev)
if((is.character(x) || is.factor(x)) && minlev)
x <- combine.levels(x, minlev=minlev)
if(is.numeric(x) && length(unique(x)) > g) x <- cut2(x, g=g)
ct <- chisq.test(x, y)
chisq <- ct$statistic
df <- ct$parameter
pval <- ct$p.value
c(chisq, df, chisq-df, pval)
}
statinfo <- list(fun=g,
title='Pearson Chi-square Tests',
main='Pearson Chi-squared',
rmain=expression(Pearson~chi^2),
names=c('chisquare','df','chisquare-df','P'),
xlab=c('Chi-square','d.f.','Chi-square - d.f.','P-value'),
rxlab=expression(chi^2, d.f., chi^2 - d.f., P-value),
digits=c(2,0,2,4),
aux='df', nmin=2, defaultwhat='chisquare-df')
biVar(formula, statinfo=statinfo, data=data, subset=subset,
na.action=na.action, exclude.imputed=TRUE, ...)
}
spearman2 <- function(x, ...) UseMethod("spearman2")
spearman2.default <- function(x, y, p=1, minlev=0,
na.rm=TRUE, exclude.imputed=na.rm, ...)
{
if(p > 2)
stop('p must be 1 or 2')
y <- as.numeric(y)
if(is.character(x))
x <- factor(x)
if(na.rm) {
s <- !(is.na(x) | is.na(y))
if(exclude.imputed) {
im <- is.imputed(x) | is.imputed(y)
s <- s & !im
}
x <- x[s]; y <- y[s]
}
n <- length(x)
## If number of non-NA values is less then 3 then return a NA
## value.
if(n < 3)
return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,'Adjusted rho2'=NA))
## Find the number of unique values in x
u <- length(unique(x))
## If is a factor and unique values are greater then 2 then find the
## lm.fit.qr.bare without an intercept.
if(is.factor(x) && u > 2) {
if(minlev > 0) {
x <- combine.levels(x, minlev)
if(length(levels(x))<2) {
warning(paste('x did not have >= 2 categories with >=',
minlev,'of the observations'))
return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,'Adjusted rho2'=NA))
}
}
x <- model.matrix(~x, data=data.frame(x))
p <- ncol(x)-1
rsquare <- lm.fit.qr.bare(x, rank(y), intercept=FALSE)$rsquared
} else {
x <- as.numeric(x)
if(u < 3)
p <- 1
x <- rank(x)
rsquare <-
if(p==1)
cor(x, rank(y))^2
else {
x <- cbind(x, x^2)
lm.fit.qr.bare(x, rank(y), intercept=TRUE)$rsquared
}
}
df2 <- n-p-1
fstat <- rsquare/p/((1-rsquare)/df2)
pvalue <- 1-pf(fstat,p,df2)
rsqa <- 1 - (1 - rsquare)*(n-1)/df2
x <- c(rsquare,fstat,p,df2,pvalue,n,rsqa)
names(x) <- c("rho2","F","df1","df2","P","n","Adjusted rho2")
x
}
spearman2.formula <- function(formula, data=NULL, subset=NULL,
na.action=na.retain,
exclude.imputed=TRUE, ...)
{
g <- function(x, y, p=1, minlev=0)
spearman2(x, y, p=p, minlev=minlev, na.rm=FALSE)[-6]
statinfo <- list(fun=g,
title='Spearman rho^2',
main='Spearman rho^2',
rmain=expression(Spearman~rho^2),
names=c('rho2','F','df1','df2','P','Adjusted rho2'),
xlab=c('rho^2','F','df2','df2','P-value','Adjusted rho^2'),
rxlab=expression(rho^2, F, df1, df2, P-value, Adjusted~rho^2),
digits=c(3,2,0,0,4,3),
aux='df1', auxlabel='df', nmin=2, defaultwhat='Adjusted rho2')
biVar(formula, statinfo=statinfo, data=data, subset=subset,
na.action=na.action, exclude.imputed=exclude.imputed, ...)
}
rcorrcens <- function(x, ...) UseMethod("rcorrcens")
rcorrcens.formula <- function(formula, data=NULL, subset=NULL,
na.action=na.retain,
exclude.imputed=TRUE, outx=FALSE, ...)
{
g <- function(x, y, outx)
{
lev <- levels(x)
if(is.factor(x) && length(lev)==2) x <- as.integer(x)
u <- if(is.factor(x))
{
i <- order(-table(x))
u <- rcorr.cens(1*(x==lev[i[1]]), y, outx=outx)
v <- rcorr.cens(1*(x==lev[i[2]]), y, outx=outx)
if(abs(v['Dxy']) > abs(u['Dxy'])) v else u
}
else rcorr.cens(x, y, outx=outx)
Dxy <- u['Dxy']
SE <- u['S.D.']
aDxy <- abs(Dxy)
z <- aDxy/SE
P <- 2 * pnorm(- z)
c(C=u['C Index'], Dxy=Dxy, aDxy=aDxy, SD=SE, Z=z, P=P)
}
statinfo <- list(fun=g,
title="Somers' Rank Correlation for Censored Data",
main="Somers' Rank Correlation",
rmain=expression(paste("Somers' ", D[xy])),
names=c('C','Dxy','aDxy','SD','Z','P'),
xlab=c('C','Dxy','|Dxy|','SD','Z','P-value'),
rxlab=expression(C-index, D[xy], paste('|',D[xy],'|'), SD, Z, P-value),
digits=c(3,3,3,3,2,4),
# aux='n', auxlabel='N',
nmin=2, defaultwhat='aDxy')
biVar(formula, statinfo=statinfo, data=data, subset=subset,
na.action=na.action, exclude.imputed=exclude.imputed, outx=outx, ...)
}
Hmisc/R/nstr.s 0000644 0001762 0000144 00000000531 12243661443 012674 0 ustar ligges users nstr <- function(string, times) {
if(!is.atomic(string))
stop("argument string must be an atomic vector")
if(!is.numeric(times))
stop("len must be a numeric vector")
if(length(string) == 0)
return(NULL)
if(length(times) == 0)
return(character(0))
return(.Call("do_nstr", as.character(string), as.integer(times)))
}
Hmisc/R/combine.levels.r 0000644 0001762 0000144 00000010764 14377377210 014631 0 ustar ligges users ##' Combine Infrequent Levels of a Categorical Variable
##'
##' After turning `x` into a `factor` if it is not one already, combines
##' levels of `x` whose frequency falls below a specified relative frequency `minlev` or absolute count `m`. When `x` is not treated as ordered, all of the
##' small frequency levels are combined into `"OTHER"`, unless `plevels=TRUE`.
##' When `ord=TRUE` or `x` is an ordered factor, only consecutive levels
##' are combined. New levels are constructed by concatenating the levels with
##' `sep` as a separator. This is useful when comparing ordinal regression
##' with polytomous (multinomial) regression and there are too many
##' categories for polytomous regression. `combine.levels` is also useful
##' when assumptions of ordinal models are being checked empirically by
##' computing exceedance probabilities for various cutoffs of the
##' dependent variable.
##' @title combine.levels
##' @param x a factor, `ordered` factor, or numeric or character variable that will be turned into a `factor`
##' @param minlev the minimum proportion of observations in a cell before that cell is combined with one or more cells. If more than one cell has fewer than minlev*n observations, all such cells are combined into a new cell labeled `"OTHER"`. Otherwise, the lowest frequency cell is combined with the next lowest frequency cell, and the level name is the combination of the two old level levels. When `ord=TRUE` combinations happen only for consecutive levels.
##' @param m alternative to `minlev`, is the minimum number of observations in a cell before it will be combined with others
##' @param ord set to `TRUE` to treat `x` as if it were an ordered factor, which allows only consecutive levels to be combined
##' @param plevels by default `combine.levels` pools low-frequency levels into a category named `OTHER` when `x` is not ordered and `ord=FALSE`. To instead name this category the concatenation of all the pooled level names, separated by a comma, set `plevels=TRUE`.
##' @param sep the separator for concatenating levels when `plevels=TRUE`
##' @return a factor variable, or if `ord=TRUE` an ordered factor variable
##' @author Frank Harrell
##' @examples
##' x <- c(rep('A', 1), rep('B', 3), rep('C', 4), rep('D',1), rep('E',1))
##' combine.levels(x, m=3)
##' combine.levels(x, m=3, plevels=TRUE)
##' combine.levels(x, ord=TRUE, m=3)
##' x <- c(rep('A', 1), rep('B', 3), rep('C', 4), rep('D',1), rep('E',1),
##' rep('F',1))
##' combine.levels(x, ord=TRUE, m=3)
## Modified 27Feb23 - added ord
combine.levels <- function(x, minlev=.05, m, ord=is.ordered(x),
plevels=FALSE, sep=',') {
x <- as.factor(x)
i <- ! is.na(x)
nu <- length(unique(x[i]))
if(nu < 2) return(x)
notna <- sum(i)
if(notna == 0) return(x)
lev <- levels(x)
n <- table(x) # excludes NAs in tabulation
f <- n / notna
if(! missing(m)) minlev <- m / notna
i <- f < minlev
si <- sum(i)
if(si == 0) return(x)
if(ord) {
if(nu > 250) warning('combine.levels with ord=TRUE is slow with more than 250 distinct values.\nConsider using cut2().')
if(missing(m)) m <- round(minlev * notna)
if(sum(n) < 2 * m)
stop(paste('with m=', m, 'must have >=', 2 * m,
'non-missing observations'))
levs <- newlevs <- names(n)
names(n) <- NULL
l <- length(levs)
while(TRUE) {
## Find first value of x having < m observations when counting
## by the current combined levels
## If it's the first level, combine it with the 2nd level
## If it's the last level, combine it with the next-to-last level
## Otherwise, combine it with the smaller of the previous level
## and the next level
## factor() below makes tapply keep levels in order
freq <- tapply(n, factor(newlevs, unique(newlevs)), sum)
i <- which.min(freq)
if(freq[i] >= m) break
l <- length(freq)
j <- if(i == 1) 1:2
else if(i == l) (l-1) : l
else if(freq[i-1] < freq[i+1]) (i-1) : i else i : (i+1)
## freq,i,j are aligned with unique(newlevs)
## Need to make changes index into the longer newlevs
unewlevs <- names(freq)
newlevs[newlevs %in% unewlevs[j]] <- paste(unewlevs[j], collapse=sep)
}
levels(x) <- newlevs
return(ordered(x))
}
comb <- if(si == 1) names(sort(f))[1 : 2]
else names(f)[i]
keepsep <- setdiff(names(f), comb)
names(keepsep) <- keepsep
w <- c(list(OTHER=comb), keepsep)
if(plevels) names(w)[1] <- paste(comb, collapse=sep)
levels(x) <- w
x
}
Hmisc/R/rm.boot.s 0000644 0001762 0000144 00000033244 12243661443 013275 0 ustar ligges users rm.boot <- function(time, y, id=seq(along=time), subset=TRUE,
plot.individual=FALSE,
bootstrap.type=c('x fixed','x random'),
nk=6, knots, B=500, smoother=supsmu,
xlab, xlim, ylim=range(y),
times=seq(min(time),max(time),length=100),
absorb.subject.effects=FALSE, rho=0,
cor.pattern=c('independent','estimate'), ncor=10000,
...)
{
bootstrap.type <- match.arg(bootstrap.type)
absorb.subject.effects <- absorb.subject.effects & !missing(id)
if(!is.function(cor.pattern))
cor.pattern <- match.arg(cor.pattern)
if(!(is.character(cor.pattern) && cor.pattern=='independent') &&
rho!=0)
stop("can't specify both cor.pattern='estimate' and rho")
if(rho != 0)
cor.pattern <- 'equal correlation'
dodep <- rho !=0 || !is.character(cor.pattern) || cor.pattern=='estimate'
## X fixed also implies that subjects are fixed
id <- as.character(id)
ylab <- label(y)
if(ylab=='')
ylab <- 'y'
if(missing(xlab)) {
xlab <- units(time)
if(xlab=='')
xlab <- 'Time'
}
if(length(subset) > 1) {
id <- id[subset];
time <- time[subset]; y <- y[subset]
}
s <- is.na(time + y)
if(any(s)) {
s <- !s
id <- id[s]
time <- time[s]
y <- y[s]
}
## Need to order data so that a subject's records stay together
## Otherwise, the mean residuals at each time will not vary over resamples
## when bootstrap.type='x fixed'
s <- order(id, time)
id <- id[s];
time <- time[s];
y <- y[s]
if(bootstrap.type=='x fixed' && diff(range(table(id))) != 0)
warning('To work properly with bootstrap.type="x fixed" all subjects must have the same # observations')
n <- length(y)
clusters <- unique(id)
if(plot.individual) {
ploti <- function(time, y, id, clusters, xlim, ylim, xlab, ylab,
smoother, ...)
{
plot(0,0,xlim=range(pretty(range(time))),ylim=ylim,
xlab=xlab, ylab=ylab, type='n')
j <- 0
for(i in clusters) {
s <- id==i
j <- j+1
lines(smoother(time[s],y[s],...),lty=j)
}
}
ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)
}
if(nk==0) knots <- double(0)
if(missing(knots) && nk>0) {
knots <- rcspline.eval(time,nk=nk,knots.only=TRUE)
if(length(knots) != nk) {
warning('could not obtain requested number of knots')
nk <- length(knots)
}
} else nk <- length(knots)
p <- if(nk==0) 1
else nk-1
X.times <- if(nk==0) as.matrix(times)
else rcspline.eval(times, knots, inclx=TRUE)
X.Time <- if(nk==0) as.matrix(time)
else rcspline.eval(time, knots, inclx=TRUE)
X <- if(missing(id)) cbind(X.Time,1)
else
model.matrix(~ X.Time+id-1,
data=list(X.Time=X.Time,id=as.factor(id)))
## was id=id 3Apr02 Thanks: Don MacQueen, for R
f <- lm.fit.qr.bare(X, y, intercept=FALSE)
res <- f$residuals
sigma2 <- sum(res^2)/n
if(absorb.subject.effects) {
mean.intercept <- mean(c(0,f$coef[-(1:p)]))
y <- y + mean.intercept - (f$coef[-(1:p)])[paste('id',id,sep='')]
if(plot.individual) {
ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)
title('Raw Data Adjusted to Have a Common Intercept')
}
}
if(is.character(cor.pattern) && cor.pattern=='estimate') {
timediff <- product <- single(ncor)
used <- 0
i <- 0
meanres <- tapply(res, time, mean)
names(meanres) <- as.numeric(names(meanres))
sdres <- sqrt(tapply(res, time, var))
names(sdres) <- as.numeric(names(sdres))
if(any(is.na(sdres)))
stop('one or more times occur in only one subject')
for(wid in clusters) {
s <- id==wid
x <- time[s]
cx <- as.character(x)
r <- (res[s] - meanres[cx])/sdres[cx]
if(any(is.na(r)))
stop('program logic error')
diffs <- outer(x, x, FUN=function(a,b)abs(a-b))
prods <- outer(r, r, FUN='*')
np <- length(prods)
if(used + np > ncor) {
cat('\nUsed only',i,'subjects in estimating covariance pattern.\nMay want to increase ncor.\n')
break
}
i <- i+1
timediff[(used+1):(used+np)] <- diffs
product[(used+1):(used+np)] <- prods
used <- used+np
}
timediff <- timediff[1:used]; product <- product[1:used]
product <- tapply(product, round(timediff,4), mean)
timediff <- as.numeric(names(product))
product[timediff==0] <- 1
plot(timediff, product, xlab='Absolute Difference in Time',
ylab='Correlation', type='b')
cor.pattern <- list(x=timediff, y=product)
}
##Subject effects are at the end, using cell means model
##Take intercept as average of all subject effects
cof <- function(fit,p)
{
ko <- fit$coef
c(mean(ko[-(1:p)]), ko[1:p])
}
o.coef <- cof(f,p)
if(bootstrap.type=='x random') {
orig.obsno <- split(1:n, id)
} else {
R <- split(res, id)
yhat <- if(!absorb.subject.effects) f$fitted.values
else o.coef[1] + X.Time %*% o.coef[-1]
}
Coef <- matrix(NA, B+1, p+1)
sse <- loglik <- single(B+1)
loglik.dep <- NULL
Coef[1,] <- o.coef
sse[1] <- sigma2*n
loglik[1] <- n*logb(2*pi*sigma2) + n
if(dodep) {
loglik.dep <- loglik
lldep <- function(time, id, sigma2, res, rho, cor.pattern)
{
ll <- 0
for(subj in unique(id)) {
s <- id==subj
x <- time[s]
y <- res[s]
p <- sum(s)
if(is.character(cor.pattern) && cor.pattern=='equal correlation')
cov <- sigma2*(diag(rep(1-rho,p))+rho)
else {
cov <- if(is.function(cor.pattern))
outer(x, x, cor.pattern)*sigma2
else {
timediff <- outer(x, x, function(a,b)abs(a-b))
matrix(approx(cor.pattern, xout=timediff)$y, nrow=p)*sigma2
}
}
## Following code taken from dmvnorm()
eS <- eigen(cov, symmetric = TRUE)
## y <- y %*% (eS$vectors * rep(1/sqrt(eS$values), each = p)) 24Feb02
y <- y %*% (eS$vectors * rep(1/sqrt(eS$values),
rep(p,length(eS$values))))
logl <- sum(y^2) + p*logb(2*pi) + logb(prod(eS$values))
ll <- ll + logl
}
ll
}
loglik.dep[1] <- lldep(time, id, sigma2, res, rho, cor.pattern)
}
uneven <- 0
for(i in 1:B) {
if(i %% 10 ==0)
cat(i,'')
pts <- sample(clusters, replace=TRUE)
if(bootstrap.type=='x random') {
obsn <- unlist(orig.obsno[pts])
idb <- id[obsn]
xt <- X.Time[obsn,,drop=FALSE]
f.b <- lm.fit.qr.bare(if(absorb.subject.effects || missing(id))
cbind(xt,1)
else
model.matrix(~xt+idb-1,
data=list(xt=xt,idb=as.factor(idb))),
y[obsn], intercept=FALSE)
## was idb=idb 3Apr02
} else {
rr <- unlist(R[pts])
lrr <- length(rr)
uneven <- max(uneven, abs(lrr-n))
if(lrr > n)
rr <- rr[1:n]
else if(lrr < n)
rr <- c(rr, sample(rr, n-lrr, replace=TRUE))
yb.e <- yhat + rr
f.b <- if(absorb.subject.effects)
lm.fit.qr.bare(cbind(X.Time,1), yb.e,
intercept=FALSE)
else
lm.fit.qr.bare(X, yb.e, intercept=FALSE)
}
cofb <- cof(f.b, p) #26Jun97
pred <-
if(bootstrap.type=='x fixed') {
if(!absorb.subject.effects)
X %*% f.b$coefficients
else
cofb[1] + X.Time %*% cofb[-1]
} else cofb[1] + X.Time %*% cofb[-1]
## x random case may only work properly if absorb.subject.effects, as
## we have to ignore the original subject ids anyway (the bootstrap
## sample in general won't represent all subjects)
Coef[i+1,] <- cofb #26Jun97
sse[i+1] <- sum((y-pred)^2)
sigma2 <- sum(f.b$residuals^2)/length(f.b$residuals)
loglik[i+1] <- n*logb(2*pi*sigma2) + sse[i+1]/sigma2
if(dodep)
loglik.dep[i+1] <- lldep(time, id, sigma2, y-pred,
rho, cor.pattern)
}
if(uneven>0)
warning(paste('Subjects had unequal number of records.\nMaximum discrepency between ',
'total number of bootstrap records sampled and original\nnumber of ',
'records (',n,') is ',uneven,'. Bootstrap estimates are approximate.',
sep=''))
if(dodep) {
srho <- spearman(loglik, loglik.dep)
cat('\n\nSpearman rank correlation between',B+1,'log likelihoods ',
'assuming independence and assuming dependence:',
round(srho,3),'\n')
}
mode(Coef) <- 'single'
mode(sse) <- 'single'
structure(list(Coef=Coef, sse=sse, loglik=loglik, loglik.dep=loglik.dep,
times=times, X.times=X.times,
xlab=xlab, ylab=ylab, ylim=ylim,
bootstrap.type=bootstrap.type, fit=f, knots=knots,
rho=rho, cor.pattern=cor.pattern),
class='rm.boot')
}
plot.rm.boot <-
function(x, obj2, conf.int=.95,
xlab=x$xlab, ylab=x$ylab, xlim, ylim=x$ylim,
individual.boot=FALSE,
pointwise.band=FALSE,
curves.in.simultaneous.band=FALSE,
col.pointwise.band=2,
objective=c('-2 log L','sse','dep -2 log L'),
add=FALSE, ncurves,
multi=FALSE, multi.method=c('color','density'),
multi.conf=c(.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,.99),
multi.density=c(-1,90,80,70,60,50,40,30,20,10, 7,4),
multi.col =c( 1, 8,20, 5, 2, 7,15,13,10,11,9,14),
subtitles=TRUE, ...)
{
## 2 was between 5 and 7, 17 was between 8 and 20
obj <- x
objective <- match.arg(objective)
if(missing(objective))
objective <-
if(obj$rho==0 && is.character(obj$cor.pattern))
'-2 log L'
else 'dep -2 log L'
sse <- switch(objective,
sse = obj$sse,
'-2 log L' = obj$loglik,
'dep -2 log L' = obj$loglik.dep)
B <- length(sse)
Coef <- obj$Coef
times <- obj$times
if(!missing(obj2)) {
if((length(times) != length(obj2$times)) ||
(any(times != obj2$times, na.rm=TRUE)))
stop('times vector must be identical for both rm.boot objects')
times <- ifelse(is.na(times), NA, obj2$times)
sse <- sse + obj2$sse
if(missing(ylab))
ylab <- paste(obj$ylab,'-',obj2$ylab)
}
## order from best -2 log likelihood or sum of squared errors to worst
i <- order(sse)
## Select best confidence coefficient*B estimates
conf <- if(multi) max(multi.conf)
else conf.int
i <- i[1:round(conf*B)]
if(i[1] != 1)
warning(paste('design is imbalanced enough that best log likelihood or SSE was not\n',
'obtained from overall fit (objective=',format(sse[1]),') but from\n',
'a bootstrap fit (objective=',format(sse[i[1]]),
')\nThis can also happen if the objective is not -2 log L',sep=''))
## Evaluate all fits on time grid and compute point by point max and min
curves <- cbind(1,obj$X.times) %*% t(Coef)
if(!missing(obj2)) {
curves <- curves - cbind(1,obj2$X.times) %*% t(obj2$Coef)
if(missing(ylim))
ylim <- range(curves[,i])
}
if(multi) {
multi.method <- match.arg(multi.method)
if(missing(xlim))
plot(times, curves[,1], type='n',
xlab=xlab, ylab=ylab, ylim=ylim)
else
plot(times, curves[,1], type='n',
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)
title(paste('Simultaneous',min(multi.conf),'-',max(multi.conf),
'Confidence Regions'))
high.prev <- low.prev <- curves[,1]
for(j in 1:length(multi.conf)) {
ii <- i[1:round(multi.conf[j]*B)]
high <- apply(curves[,ii], 1, max)
low <- apply(curves[,ii], 1, min)
if(multi.method=='density') {
polygon(c(times,rev(times)), c(high.prev,rev(high)),
density=multi.density[j])
polygon(c(times,rev(times)), c(low.prev, rev(low)),
density=multi.density[j])
} else {
polygon(c(times,rev(times)), c(high.prev,rev(high)),
col=multi.col[j])
polygon(c(times,rev(times)), c(low.prev, rev(low)),
col=multi.col[j])
}
high.prev <- high; low.prev <- low
}
lines(times, curves[,1], lwd=2, col=0) ## point estimates in white
} else {
if(add)
lines(times, curves[,1])
else {
if(missing(xlim))
plot(times, curves[,1], type='l',
xlab=xlab, ylab=ylab, ylim=ylim)
else
plot(times, curves[,1], type='l',
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)
title(paste('Simultaneous',conf.int,'Confidence Region'))
}
high <- apply(curves[,i], 1, max)
low <- apply(curves[,i], 1, min)
lines(times, high, lty=2)
lines(times, low, lty=2)
}
result <- list(times=times, fitted=curves[,1], lower=low, upper=high)
if(individual.boot || curves.in.simultaneous.band) {
subs <- if(individual.boot) 1:B
else i
if(!missing(ncurves))
subs <- sample(subs, ncurves)
for(j in subs)
lines(times, curves[,j], lty=2)
}
if(pointwise.band) {
p <- apply(curves, 1, quantile, probs=c((1-conf.int)/2,1-(1-conf.int)/2))
lines(times,p[1,],col=col.pointwise.band)
lines(times,p[2,],col=col.pointwise.band)
result <- c(result, list(pointwise.lower=p[1,], pointwise.upper=p[2,]))
}
if(!add && subtitles) {
title(sub=obj$bootstrap.type,adj=1)
title(sub=paste(B-1,'bootstrap repetitions'),adj=0)
}
invisible(result)
}
Hmisc/R/hlab.r 0000644 0001762 0000144 00000015057 14411647751 012631 0 ustar ligges users ##' Easy Extraction of Labels/Units Expressions for Plotting
##'
##' Given a single unquoted variable, first looks to see if a non-`NULL` `LabelsUnits` object exists (produced by `extractlabs()`). When `LabelsUnits` does not exist or is `NULL`, looks up the attributes in the current dataset, which defaults to `d` or may be specified by `options(current_ds='name of the data frame/table')`. Finally the existence of a variable of the given name in the global environment is checked. When a variable is not found in any of these three sources or has a blank `label` and `units`, an `expression()` with the variable name alone is returned. If `html=TRUE`, HTML strings are constructed instead, suitable for `plotly` graphics.
##'
##' The result is useful for `xlab` and `ylab` in base plotting functions or in `ggplot2`, along with being useful for `labs` in `ggplot2`. See example.
##' @title hlab
##' @param x a single variable name, unquoted
##' @param name a single character string providing an alternate way to name `x` that is useful when `hlab` is called from another function such as `hlabs`
##' @param html set to `TRUE` to return HTML strings instead of `plotmath` expressions
##' @param plotmath set to `FALSE` to use plain text instead of plotmath
##' @return an expression created by `labelPlotmath` with `plotmath=TRUE`
##' @author Frank Harrell
##' @seealso [label()], [units()], [contents()], [hlabs()], [extractlabs()], [plotmath]
##' @md
##' @examples
##' d <- data.frame(x=1:10, y=(1:10)/10)
##' d <- upData(d, labels=c(x='X', y='Y'), units=c(x='mmHg'), print=FALSE)
##' hlab(x)
##' hlab(x, html=TRUE)
##' hlab(z)
##' require(ggplot2)
##' ggplot(d, aes(x, y)) + geom_point() + labs(x=hlab(x), y=hlab(y))
##' # Can use xlab(hlab(x)) + ylab(hlab(y)) also
##' # Store names, labels, units for all variables in d in object
##' LabelsUnits <- extractlabs(d)
##' # Remove d; labels/units still found
##' rm(d)
##' hlab(x)
##' # Remove LabelsUnits and use a current dataset named
##' # d2 instead of the default d
##' rm(LabelsUnits)
##' options(current_ds='d2')
hlab <- function(x, name=NULL, html=FALSE, plotmath=TRUE) {
xname <- if(length(name)) name else as.character(substitute(x))
ldef <- labelPlotmath(xname, html=html, plotmath=plotmath)
lu <- if(exists('LabelsUnits')) LabelsUnits
if(length(lu)) {
if(xname %nin% lu$name) return(ldef)
lu <- lu[xname][1]
if(lu$label != '' || lu$units != '')
return(labelPlotmath(lu$label, lu$units, html=html, plotmath=plotmath))
}
currds <- getOption('current_ds', 'd')
if(exists(currds)) {
d <- get(currds)
if(xname %in% names(d)) {
xx <- d[[xname]]
at <- attributes(xx)
if(length(c(at$label, at$units)) > 0)
return(label(xx, plot=plotmath, default=xname, html=html))
}
}
if(exists(xname, envir=parent.frame()))
label(x, plot=plotmath, default=xname, html=html) else ldef
}
##' Front-end to ggplot2 labs Function
##'
##' Runs `x`, `y`, or both through [hlab()] and passes the constructed labels to the [ggplot2::labs] function to specify x- and y-axis labels specially formatted for units of measurement
##' @title hlabs
##' @param x a single variable name, unquoted
##' @param y a single variable name, unquoted
##' @param html set to `TRUE` to render in html (for `plotly`), otherwise the result is `plotmath` expressions
##' @return result of [ggplot2::labs()]
##' @author Frank Harrell
##' @md
##' @examples
##' # Name the current dataset d, or specify a name with
##' # options(curr_ds='...') or run `extractlabs`, then
##' # ggplot(d, aes(x,y)) + geom_point() + hlabs(x,y)
##' # to specify only the x-axis label use hlabs(x), or to
##' # specify only the y-axis label use hlabs(y=...)
hlabs <- function(x, y, html=FALSE) {
xname <- as.character(substitute(x)) # results in '' if no x
yname <- as.character(substitute(y))
if(yname == '') labs(x=hlab(x, name=xname, html=html))
else if(xname == '') labs(y=hlab(y, name=yname, html=html))
else labs(x=hlab(x, name=xname, html=html),
y=hlab(y, name=yname, html=html))
}
##' Easily Retrieve Text Form of Labels/Units
##'
##' Uses the same search method as `hlab` returns label and units in a character string with units, if present, in brackets
##' @title vlab
##' @param x a single variable name, unquoted
##' @param name optional character string to use as variable name
##' @return character string
##' @author Frank Harrell
##' @md
##' @seealso [hlab()]
vlab <- function(x, name=NULL) {
xname <- if(length(name)) name else as.character(substitute(x))
hlab(x, name=xname, html=FALSE, plotmath=FALSE)
}
##' Extract Labels and Units From Multiple Datasets
##'
##' For one or more data frames/tables extracts all labels and units and comb ines them over dataset, dropping any variables not having either labels or units defined. The resulting data table is returned and is used by the `hlab` function if the user stores the result in an objectnamed `LabelsUnits`. The result is `NULL` if no variable in any dataset has a non-blank `label` or `units`. Variables found in more than one dataset with duplicate `label` and `units` are consolidated. A warning message is issued when duplicate variables have conflicting labels or units, and by default, details are printed. No attempt is made to resolve these conflicts.
##' @title extractlabs
##' @param ... one ore more data frames or data tables
##' @param print set to `FALSE` to not print details about variables with conflicting attributes
##' @return a data table
##' @author Frank Harrell
##' @md
##' @seealso [label()], [contents()], [units()], [hlab()]
##' @examples
##' d <- data.frame(x=1:10, y=(1:10)/10)
##' d <- upData(d, labels=c(x='X', y='Y'), units=c(x='mmHg'), print=FALSE)
##' d2 <- d
##' units(d2$x) <- 'cm'
##' LabelsUnits <- extractlabs(d, d2)
##' LabelsUnits
extractlabs <- function(..., print=TRUE) {
dotlist <- list(...)
labu <- function(d)
list(name=names(d), label=sapply(d, label), units=sapply(d, units))
u <- data.table::rbindlist(lapply(dotlist, labu))
data.table::setkeyv(u, 'name')
lu <- if(any(u$label != '') || any(u$units != ''))
u[label != '' | units != '']
w <- lu[, .(nc=length(unique(paste(label, units, sep='|')))), by=name]
nconflict <- sum(w$nc > 1)
if(nconflict) {
warning(paste(nconflict,
'variables have conflicting labels/units from different datasets'))
if(print) {
cname <- w$name[w$nc > 1]
cat('Variable names with inconsistent attributes:\n')
print(lu[name %in% cname])
cat('\n')
}
}
unique(lu)
}
utils::globalVariables(c('name', 'LabelsUnits'))
Hmisc/R/simMarkovOrd.r 0000644 0001762 0000144 00000114310 14241432662 014322 0 ustar ligges users ##' Simulate Ordinal Markov Process
##'
##' Simulates longitudinal data for subjects following a first-order Markov process under a proportional odds model. Optionally, response-dependent sampling can be done, e.g., if a subject hits a specified state at time t, measurements are removed for times t+1, t+3, t+5, ... This is applicable when for example a study of hospitalized patients samples every day, Y=1 denotes patient discharge to home, and sampling is less frequent outside the hospital. This example assumes that arriving home is not an absorbing state, i.e., a patient could return to the hospital.
##' @title simMarkovOrd
##' @param n number of subjects to simulate
##' @param y vector of possible y values in order (numeric, character, factor)
##' @param times vector of measurement times
##' @param initial initial value of `y` (baseline state; numeric, character, or factor matching `y`). If length 1 this value is used for all subjects, otherwise it is a vector of length `n`.
##' @param X an optional vector of matrix of baseline covariate values passed to `g`. If a vector, `X` represents a set of single values for all the covariates and those values are used for every subject. Otherwise `X` is a matrix with rows corresponding to subjects and columns corresponding to covariates which `g` must know how to handle. `g` only sees one row of `X` at a time.
##' @param absorb vector of absorbing states, a subset of `y` (numeric, character, or factor matching `y`). The default is no absorbing states. Observations are truncated when an absorbing state is simulated.
##' @param intercepts vector of intercepts in the proportional odds model. There must be one fewer of these than the length of `y`.
##' @param g a user-specified function of three or more arguments which in order are `yprev` - the value of `y` at the previous time, the current time `t`, the `gap` between the previous time and the current time, an optional (usually named) covariate vector `X`, and optional arguments such as a regression coefficient value to simulate from. The function needs to allow `yprev` to be a vector and `yprev` must not include any absorbing states. The `g` function returns the linear predictor for the proportional odds model aside from `intercepts`. The returned value must be a matrix with row names taken from `yprev`. If the model is a proportional odds model, the returned value must be one column. If it is a partial proportional odds model, the value must have one column for each distinct value of the response variable Y after the first one, with the levels of Y used as optional column names. So columns correspond to `intercepts`. The different columns are used for `y`-specific contributions to the linear predictor (aside from `intercepts`) for a partial or constrained partial proportional odds model. Parameters for partial proportional odds effects may be included in the ... arguments.
##' @param carry set to `TRUE` to carry absorbing state forward after it is first hit; the default is to end records for the subject once the absorbing state is hit
##' @param rdsample an optional function to do response-dependent sampling. It is a function of these arguments, which are vectors that stop at any absorbing state: `times` (ascending measurement times for one subject), `y` (vector of ordinal outcomes at these times for one subject. The function returns `NULL` if no observations are to be dropped, returns the vector of new times to sample.
##' @param ... additional arguments to pass to `g` such as a regresson coefficient
##' @return data frame with one row per subject per time, and columns id, time, yprev, y, values in ...
##' @author Frank Harrell
##' @seealso
##' @export
##' @md
simMarkovOrd <- function(n=1, y, times, initial, X=NULL, absorb=NULL,
intercepts, g, carry=FALSE, rdsample=NULL, ...) {
if(is.factor(y)) y <- as.character(y)
if(is.factor(initial)) initial <- as.character(initial)
if(is.factor(absorb)) absorb <- as.character(absorb)
ychar <- is.character(y)
if(length(initial) == 1) initial <- rep(initial, n)
if(length(initial) != n) stop('initial must have length 1 or n')
if(any(initial %in% absorb))
stop('initial state cannot be an absorbing state')
Xmat <- length(X) && is.matrix(X)
if(Xmat && ! length(colnames(X)))
stop('when a matrix, X must have column names')
if(length(X) && ! Xmat && ! length(names(X)))
stop('when a vector, elements of X must have names')
nt <- length(times)
Y <- Yp <- if(ychar) character(nt) else numeric(nt)
gaps <- numeric(nt)
ID <- Time <- Gap <- numeric(n * nt)
YYprev <- YY <- if(ychar) character(n * nt) else numeric(n * nt)
is <- 1
times.saved <- 0
for(id in 1 : n) {
tprev <- 0
i <- 0
for(t in times) {
i <- i + 1
gap <- t - tprev
gaps[i] <- gap
yprev <- if(i == 1) initial[id] else Y[i - 1]
Yp[i] <- yprev
if(carry && (yprev %in% absorb)) Y[i] <- yprev
else {
xb <- g(yprev, t, gap, X=if(Xmat) X[id, ] else X, ...)
## If partial PO model xb has 1 row (since yprev is scalar) and
## columns corresponding to intercepts. If PO, is 1x1
probs <- plogis(intercepts + xb)
## Compute cell probabilities from successive differences in
## exceedance probs
probs <- c(1., probs) - c(probs, 0.)
lo <- probs < 0.
hi <- probs > 1.
## The following is needed for partial proportional odds models
if(any(c(lo , hi))) {
warning(paste('Probabilities < 0 or > 1 at time t=', t,
'id=', id,
':', paste(probs[c(lo, hi)], collapse=' '),
'set to 0 or 1'))
if(any(lo)) probs[lo] <- 0.
if(any(hi)) probs[hi] <- 1.
}
Y[i] <- sample(y, 1, prob=probs)
if(! carry && (Y[i] %in% absorb)) break
}
tprev <- t
}
s <- 1 : i
atimes <- times[s]
agaps <- gaps[s]
aYp <- Yp[s]
aY <- Y[s]
if(length(rdsample)) {
stimes <- rdsample(atimes, aY)
lt <- length(stimes)
if(lt) {
times.saved <- times.saved + i - lt
tsprev <- c(0, stimes[- lt])
agaps <- stimes - tsprev
aY <- aY[times %in% stimes]
if(length(aY) != lt) stop('program logic error in simMarkovOrd')
aYp <- c(aYp[1], aY[- lt])
atimes <- stimes
}
}
ie <- is + length(aY) - 1
ID [is : ie] <- id
Time [is : ie] <- atimes
Gap [is : ie] <- agaps
YYprev[is : ie] <- aYp
YY [is : ie] <- aY
is <- ie + 1
}
yy <- YY[1 : ie]
if(ychar) yy <- factor(yy, levels=y)
yyp <- YYprev[1 : ie]
if(ychar) yyp <- factor(yyp, levels=setdiff(y, absorb))
res <- data.frame(id=ID[1 : ie], time=Time[1 : ie], gap=Gap[1 : ie],
yprev=yyp, y=yy, ...)
attr(res, 'times.saved.per.subject') <- times.saved / n
## Handle case where X is a constant vector to distribute to all obs
if(length(X)) {
if(Xmat) for(nam in colnames(X)) res[[nam]] <- X[res$id, nam]
else
for(nam in names(X)) res[[nam]] <- X[nam]
}
res
}
#' State Occupancy Probabilities for First-Order Markov Ordinal Model
#'
#' @title soprobMarkovOrd
#' @inheritParams simMarkovOrd
#' @param y a vector of possible y values in order (numeric, character, factor)
#' @param times vector of measurement times
#' @param initial initial value of `y` (baseline state; numeric, character, factr)
#' @param absorb vector of absorbing states, a subset of `y`. The default is no absorbing states. (numeric, character, factor)
#' @param intercepts vector of intercepts in the proportional odds model, with length one less than the length of `y`
#' @param ... additional arguments to pass to `g` such as covariate settings
#'
#' @return matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities
#' @export
#' @author Frank Harrell
#' @seealso
#' @export
#' @md
soprobMarkovOrd <- function(y, times, initial, absorb=NULL,
intercepts, g, ...) {
if(initial %in% absorb) stop('initial state cannot be an absorbing state')
k <- length(y)
nt <- length(times)
P <- matrix(NA, nrow=nt, ncol=k)
colnames(P) <- as.character(y)
rownames(P) <- as.character(times)
yna <- setdiff(y, absorb) # all states except absorbing ones
yc <- as.character(y)
ynac <- as.character(yna)
## Don't uncondition on initial state
xb <- g(initial, times[1], times[1], ...) # 3rd arg (gap) assumes time origin 0
## Since initial is scalar, xb has one row. It has multiple columns if
## model is partial PO model, with columns exactly corresponding to intercepts
pp <- plogis(intercepts + xb)
## Compute cell probabilities
pp <- c(1., pp) - c(pp, 0.)
P[1, ] <- pp
tprev <- times[1]
for(it in 2 : nt) {
t <- times[it]
gap <- t - tprev
## Compute linear predictor at all non-absorbing states
xb <- g(yna, t, gap, ...) #non-intercept part of x * beta
## g puts non-absorbing states as row names (= ynac)
## If partial PO model xb has > 1 column that correspond to intercepts
## Matrix of conditional probabilities of Y conditioning on previous Y
## Columns = k conditional probabilities conditional on a single previous state
## Rows = all possible previous states
## When the row corresponds to an absorbing state, with prob. 1
## a subject will remain in that state so give it a prob of 1 and
## all other states a prob of 0
cp <- matrix(NA, nrow=k, ncol=k, dimnames=list(yc, yc))
for(yval in y) { # current row
yvalc <- as.character(yval)
if(yval %in% absorb) { # current row is an absorbing state
cp[yvalc, setdiff(yc, yvalc)] <- 0. # P(moving to non-abs state)=0
cp[yvalc, yvalc] <- 1. # certainty in staying
}
else { # current row is non-absorbing state
pp <- plogis(intercepts + xb[yvalc, ])
## Compute cell probabilities
pp <- c(1., pp) - c(pp, 0.)
cp[yvalc, ] <- pp
}
}
P[it, ] <- t(cp) %*% P[it - 1, ]
tprev <- t
}
P
}
##' Simulate Comparisons For Use in Sequential Markov Longitudinal Clinical Trial Simulations
##'
##' Simulates sequential clinical trials of longitudinal ordinal outcomes using a first-order Markov model. Looks are done sequentially after subject ID numbers given in the vector `looks` with the earliest possible look being after subject 2. At each look, a subject's repeated records are either all used or all ignored depending on the sequent ID number. For each true effect parameter value, simulation, and at each look, runs a function to compute the estimate of the parameter of interest along with its variance. For each simulation, data are first simulated for the last look, and these data are sequentially revealed for earlier looks. The user provides a function `g` that has extra arguments specifying the true effect of `parameter` the treatment `group` expecting treatments to be coded 1 and 2. `parameter` is usually on the scale of a regression coefficient, e.g., a log odds ratio. Fitting is done using the `rms::lrm()` function, unless non-proportional odds is allowed in which case `VGAM::vglm()` is used. If `timecriterion` is specified, the function also, for the last data look only, computes the first time at which the criterion is satisfied for the subject or use the event time and event/censoring indicator computed by `timecriterion`. The Cox/logrank chi-square statistic for comparing groups on the derived time variable is saved. If `coxzph=TRUE`, the `survival` package correlation coefficient `rho` from the scaled partial residuals is also saved so that the user can later determine to what extent the Markov model resulted in the proportional hazards assumption being violated when analyzing on the time scale. `vglm` is accelerated by saving the first successful fit for the largest sample size and using its coefficients as starting value for further `vglm` fits for any sample size for the same setting of `parameter`.
##' @title estSeqMarkovOrd
##' @inheritParams simMarkovOrd
##' @param y vector of possible y values in order (numeric, character, factor)
##' @param times vector of measurement times
##' @param initial a vector of probabilities summing to 1.0 that specifies the frequency distribution of initial values to be sampled from. The vector must have names that correspond to values of `y` representing non-absorbing states.
##' @param absorb vector of absorbing states, a subset of `y`. The default is no absorbing states. Observations are truncated when an absorbing state is simulated. May be numeric, character, or factor.
##' @param intercepts vector of intercepts in the proportional odds model. There must be one fewer of these than the length of `y`.
##' @param parameter vector of true parameter (effects; group differences) values. These are group 2:1 log odds ratios in the transition model, conditioning on the previous `y`.
##' @param looks integer vector of ID numbers at which maximum likelihood estimates and their estimated variances are computed. For a single look specify a scalar value for `loops` equal to the number of subjects in the sample.
##' @param formula a formula object given to the `lrm()` function using variables with these name: `y`, `time`, `yprev`, and `group` (factor variable having values '1' and '2'). The `yprev` variable is converted to a factor before fitting the model unless `yprevfactor=FALSE`.
##' @param ppo a formula specifying the part of `formula` for which proportional odds is not to be assumed, i.e., that specifies a partial proportional odds model. Specifying `ppo` triggers the use of `VGAM::vglm()` instead of `rms::lrm` and will make the simulations run slower.
##' @param yprevfactor see `formula`
##' @param groupContrast omit this argument if `group` has only one regression coefficient in `formula`. Otherwise if `ppo` is omitted, provide `groupContrast` as a list of two lists that are passed to `rms::contrast.rms()` to compute the contrast of interest and its standard error. The first list corresponds to group 1, the second to group 2, to get a 2:1 contrast. If `ppo` is given and the group effect is not just a simple regression coefficient, specify as `groupContrast` a function of a `vglm` fit that computes the contrast of interest and its standard error and returns a list with elements named `Contrast` and `SE`. For the latter type you can optionally have formal arguments `n1`, `n2`, and `parameter` that are passed to `groupContrast` to compute the standard error of the group contrast, where `n1` and `n2` respectively are the sample sizes for the two groups and `parameter` is the true group effect parameter value.
##' @param cscov applies if `ppo` is not used. Set to `TRUE` to use the cluster sandwich covariance estimator of the variance of the group comparison.
##' @param timecriterion a function of a time-ordered vector of simulated ordinal responses `y` that returns a vector `FALSE` or `TRUE` values denoting whether the current `y` level met the condition of interest. For example `estSeqMarkovOrd` will compute the first time at which `y >= 5` if you specify `timecriterion=function(y) y >= 5`. This function is only called at the last data look for each simulated study. To have more control, instead of `timecriterion` returning a logical vector have it return a numeric 2-vector containing, in order, the event/censoring time and the 1/0 event/censoring indicator.
##' @param sstat set to a function of the time vector and the corresponding vector of ordinal responses for a single group if you want to compute a Wilcoxon test on a derived quantity such as the number of days in a given state.
##' @param coxzph set to `TRUE` if `timecriterion` is specified and you want to compute a statistic for testing proportional hazards at the last look of each simulated data
##' @param nsim number of simulations (default is 1)
##' @param maxest maximum acceptable absolute value of the contrast estimate, ignored if `NULL`. Any values exceeding `maxest` will result in the estimate being set to `NA`.
##' @param maxvest like `maxest` but for the estimated variance of the contrast estimate
##' @param progress set to `TRUE` to send current iteration number to `pfile` every 10 iterations. Each iteration will really involve multiple simulations, if `parameter` has length greater than 1.
##' @param pfile file to which to write progress information. Defaults to `''` which is the console. Ignored if `progress=FALSE`.
##' @return a data frame with number of rows equal to the product of `nsim`, the length of `looks`, and the length of `parameter`, with variables `sim`, `parameter`, `look`, `est` (log odds ratio for group), and `vest` (the variance of the latter). If `timecriterion` is specified the data frame also contains `loghr` (Cox log hazard ratio for group), `lrchisq` (chi-square from Cox test for group), and if `coxph=TRUE`, `phchisq`, the chi-square for testing proportional hazards. The attribute `etimefreq` is also present if `timecriterion` is present, and it probvides the frequency distribution of derived event times by group and censoring/event indicator. If `sstat` is given, the attribute `sstat` is also present, and it contains an array with dimensions corresponding to simulations, parameter values within simulations, `id`, and a two-column subarray with columns `group` and `y`, the latter being the summary measure computed by the `sstat` function. The returned data frame also has attribute `lrmcoef` which are the last-look logistic regression coefficient estimates over the `nsim` simulations and the parameter settings, and an attribute `failures` which is a data frame containing the variables `reason` and `frequency` cataloging the reasons for unsuccessful model fits.
##' @author Frank Harrell
##' @seealso `gbayesSeqSim()`, `simMarkovOrd()`,
##' @export
##' @md
estSeqMarkovOrd <- function(y, times, initial, absorb=NULL, intercepts,
parameter, looks, g, formula, ppo=NULL,
yprevfactor=TRUE,
groupContrast=NULL, cscov=FALSE,
timecriterion=NULL, coxzph=FALSE,
sstat=NULL, rdsample=NULL,
maxest=NULL, maxvest=NULL,
nsim=1, progress=FALSE, pfile='') {
olddd <- getOption('datadist')
on.exit(options(datadist=olddd))
isppo <- length(ppo) > 0
if(isppo) {
if(! inherits(ppo, 'formula')) stop('ppo must be a formula')
if(! requireNamespace('VGAM'))
stop('ppo specified and VGAM package not available')
# VGAM wants you to declare FALSE to indicate non-PO
vglm <- VGAM::vglm
ppo <- formula(paste('FALSE ~', as.character(ppo)[-1]))
} else if (!requireNamespace("rms", quietly = TRUE))
stop('ppo not specified and rms package not available')
if(isppo && cscov) stop('may not specify cscov=TRUE with ppo')
nas <- setdiff(y, absorb) # non-absorbing states
if(length(initial) != length(nas))
stop('length of initial must be number of non-absorbing values of y')
if(! all(sort(names(initial)) == sort(as.character(nas))))
stop('names of elements in initial are incorrect')
if(coxzph && ! length(timecriterion))
stop('must specify timecriterion when coxzph=TRUE')
looks <- sort(looks)
nlook <- length(looks)
N <- max(looks)
np <- length(parameter)
nc <- nsim * nlook * np
parm <- est <- vest <- numeric(nc)
look <- sim <- integer(nc)
ndy <- length(y)
Etimefreq <- NULL
if(length(timecriterion)) {
Etimefreq <-
array(0, dim=c(nsim, np, 2, 2, length(times)),
dimnames=list(paste('sim', 1 : nsim),
as.character(parameter),
c('1', '2'),
c('censored', 'event'),
as.character(times)))
loghr <- lrchisq <- rep(NA, nc)
if(coxzph) phchisq <- rep(NA, nc)
}
if(length(sstat))
Sstat <- array(0L, dim=c(nsim, np, N, 2),
dimnames=list(paste('sim', 1 : nsim),
as.character(parameter),
paste('id', 1 : N),
c('group', 'y')))
groupContrastUsesN <-
length(groupContrast) &&
all(c('n1', 'n2', 'parameter') %in% names(formals(groupContrast)))
## For each simulation and each parameter value, simulate data for the
## whole study
is <- 0
pname <- if(isppo) 'group2' else 'group=2'
h <- function(time, y) {
u <- timecriterion(y)
if(! is.logical(u))
return(list(etime=as.numeric(u[1]), event=as.integer(u[2])))
# Note that if there are any absorbing events, the time vector
# would already have been truncated at the first of such events
if(any(u)) list(etime=as.numeric(min(time[u])), event=1L)
else
list(etime=as.numeric(max(time)), event=0L)
}
lrmcoef <- NULL
co.na <- NULL # template of coef vector to be all NAs
coefprev <- list() # to hold first working fit at last look for each parameter
## coefprev speeds up vglm (last look = maximum sample size)
failures <- character(0)
for(isim in 1 : nsim) {
if(progress && (isim %% 10 == 0))
cat('Simulation', isim, '\n', file=pfile)
for(param in parameter) {
cparam <- as.character(param)
## Sample N initial states
initials <- sample(names(initial), N, replace=TRUE, prob=initial)
if(is.numeric(y)) initials <- as.numeric(initials)
## Sample treatment groups 1 and 2
X <- matrix(sample(1 : 2, N, replace=TRUE), ncol=1,
dimnames=list(NULL, 'group'))
## For simMarkovOrd X must be a matrix if it varies
sdata <- simMarkovOrd(n=N, y, times, initials, X=X, absorb=absorb,
intercepts=intercepts, g=g, parameter=param,
rdsample=rdsample)
tsps <- attr(sdata, 'time.saved.per.subject')
if(length(tsps))
cat('Average number of measurement times saved per subject by response-dependent sampling:', round(tsps, 1), '\n')
## sdata is a data frame containing id, time, yprev, y, ...
sdata$group <- as.factor(sdata$group)
if(yprevfactor) sdata$yprev <- as.factor(sdata$yprev)
if(isim == 1 && ! isppo) {
.dd. <- rms::datadist(sdata)
options(datadist=.dd.) # requires rms 6.1-1
}
## For each look compute the parameter estimate and its variance
## If a contrast is specified (say when treatment interacts with time)
## use that instead of a simple treatment effect
## For vglm speed up by taking as starting values the estimates
## from the last successful run
for(l in looks) {
## Subjects are numbered consecutively with id=1,2,3,... and
## these correspond to sequential data looks when accumulated
dat <- subset(sdata, id <= l)
luy <- length(unique(dat$y))
if(luy != ndy) {
f <- paste('Simulated data for simulation with sample size', l,
'has', luy, 'distinct y values instead of the required',
ndy)
fail <- TRUE
} else {
if(isppo) {
cprev <- coefprev[[cparam]]
## Could not get system to find cprev when regular call inside try()
ff <- call('vglm', formula,
VGAM::cumulative(parallel=ppo, reverse=TRUE),
coefstart=cprev, data=dat)
f <- try(eval(ff), silent=TRUE)
} else
f <- try(rms::lrm(formula, data=dat, x=cscov, y=cscov), silent=TRUE)
fail <- inherits(f, 'try-error')
}
if(fail) failures <- c(failures, as.character(f))
else {
if(isppo && l == max(looks) && ! length(coefprev[[cparam]]))
coefprev[[cparam]] <- coef(f)
if(! length(co.na)) { # save template to insert for failures
co.na <- coef(f)
co.na[] <- NA
}
}
if(cscov && ! fail) f <- rms::robcov(f, dat$id)
is <- is + 1
sim [is] <- isim
parm[is] <- param
look[is] <- l
if(fail) {
est [is] <- NA
vest[is] <- NA
} else {
if(length(groupContrast)) {
fc <- if(isppo) (if(groupContrastUsesN)
groupContrast(f, n1=sum(dat$group == '1'),
n2=sum(dat$group == '2'),
parameter=param)
else
groupContrast(f))
else
rms::contrast(f, groupContrast[[2]], groupContrast[[1]])
est [is] <- fc$Contrast
vest[is] <- (fc$SE) ^ 2
}
else {
est [is] <- coef(f)[pname]
vest[is] <- vcov(f)[pname, pname]
}
if(length(maxest) && abs(est[is]) > maxest) {
failures <- c(failures, paste0('|contrast|>', maxest))
est[is] <- vest[is] <- NA
fail <- TRUE
} else if(length(maxvest) && vest[is] > maxvest) {
failures <- c(failures, paste0('variance>', maxvest))
est[is] <- vest[is] <- NA
fail <- TRUE
}
} # end else if not fail
} # end looks
co <- if(fail) co.na else coef(f)
if(! length(lrmcoef))
lrmcoef <- array(0., dim=c(length(parameter), nsim, length(co)),
dimnames=list(as.character(parameter),
paste('sim', 1 : nsim),
names(co)))
ww <- try(lrmcoef[as.character(param), isim, ] <- co)
if(inherits(ww, 'try-error')) {
wf <- 'estSeqMarkovOrd.err'
prn(dimnames(lrmcoef), file=wf)
prn(as.character(param), file=wf)
prn(isim, file=wf)
prn(co, file=wf)
stop('non-conformable coefficients in estSeqMarkovOrd. See file estSeqMarkovOrd.err in current working directory.')
}
if(length(timecriterion)) {
# Separately for each subject compute the time until the
# criterion is satisfied. Right censor at last observed time if it
# doesn't occur
setDT(sdata, key=c('group', 'id', 'time'))
d <- sdata[, h(time, y), by=.(group, id)]
fit <- survival::coxph(Surv(etime, event) ~ group, data=d)
loghr [is] <- fit$coef
lrchisq[is] <- 2. * diff(fit$loglik)
if(coxzph)
phchisq[is] <- survival::cox.zph(fit, transform='identity',
global=FALSE)$table[, 'chisq']
for(gr in c('1', '2')) {
for(ev in 0 : 1) {
utimes <- with(subset(d, group == gr & event == ev),
as.character(etime))
utimes <- factor(utimes, as.character(times))
tab <- table(utimes)
Etimefreq[isim, as.character(param), gr, ev + 1, ] <-
Etimefreq[isim, as.character(param), gr, ev + 1, ] + tab
} # end censored vs event
} # end group
} # end timecriterion
if(length(sstat)) {
## Separately for each subject compute the summary statistic
sds <- sdata[, ys := sstat(time, y), by=.(group, id)]
Sstat[isim, as.character(param), sds$id, ] <-
cbind(sds$group, sds$ys)
} # end sstat
} # end param
} # end sim
res <- data.frame(sim=sim, parameter=parm, look=look,
est=est, vest=vest)
if(length(timecriterion)) {
res$loghr <- loghr
res$lrchisq <- lrchisq
if(coxzph) res$phchisq <- phchisq
attr(res, 'etimefreq') <- Etimefreq
}
if(length(sstat)) attr(res, 'sstat') <- Sstat
attr(res, 'lrmcoef') <- lrmcoef
failures <- if(length(failures))
as.data.frame(table(failure=failures))
else
data.frame(failure='', Freq=0)
attr(res, 'failures') <- failures
res
}
#' Compute Parameters for Proportional Odds Markov Model
#'
#' Given a vector `intercepts` of initial guesses at the intercepts in a Markov proportional odds model, and a vector `extra` if there are other parameters, solves for the `intercepts` and `extra` vectors that yields a set of occupancy probabilities at time `t` that equal, as closely as possible, a vector of target values.
#' @title intMarkovOrd
#' @inheritParams simMarkovOrd
#' @param intercepts vector of initial guesses for the intercepts
#' @param extra an optional vector of intial guesses for other parameters passed to `g` such as regression coefficients for previous states and for general time trends. Name the elements of `extra` for more informative output.
#' @param target vector of target state occupancy probabilities at time `t`. If `extra` is specified, `target` must be a matrix where row names are character versions of `t` and columns represent occupancy probabilities corresponding to values of `y` at the time given in the row.
#' @param t target times. Can have more than one element only if `extra` is given.
#' @param ftarget an optional function defining constraints that relate to transition probabilities. The function returns a penalty which is a sum of absolute differences in probabilities from target probabilities over possibly multiple targets. The `ftarget` function must have two arguments: `intercepts` and `extra`.
#' @param onlycrit set to `TRUE` to only return the achieved objective criterion and not print anything
#' @param constraints a function of two arguments: the vector of current intercept values and the vector of `extra` parameters, returning `TRUE` if that vector meets the constrains and `FALSE` otherwise
#' @param printsop set to `TRUE` to print solved-for state occupancy probabilities for groups 1 and 2 and log odds ratios corresponding to them
#' @param ... optional arguments to pass to [stats::nlm()]. If this is specified, the arguments that `intMarkovOrd` normally sends to `nlm` are not used.
#'
#' @return list containing two vectors named `intercepts` and `extra` unless `oncrit=TRUE` in which case the best achieved sum of absolute errors is returned
#' @author Frank Harrell
#' @export
#' @md
#' @seealso
intMarkovOrd <- function(y, times, initial, absorb=NULL,
intercepts, extra=NULL, g, target, t, ftarget=NULL,
onlycrit=FALSE, constraints=NULL,
printsop=FALSE, ...) {
if(any(diff(intercepts) > 0)) stop('initial intercepts are out of order')
t <- as.character(t)
if(length(t) > 1 && (! is.matrix(target) || nrow(target) != length(t)))
stop('target must be a matrix with # rows = length of t')
if(length(t) == 1) target <- matrix(target, nrow=1, dimnames=list(t, NULL))
for(ti in t)
if(abs(sum(target[ti, ]) - 1.) > 1e-5)
stop('each row of target must sum to 1')
h <- function(a) {
## Compute state occupancy probabilities at time t for current
## vector of intercept values and extra
ints <- a[1 : (length(a) - length(extra))]
if(any(diff(ints) > 0.)) return(1000.)
if(length(extra)) extra <- a[-(1 : length(ints))]
if(length(constraints) && ! constraints(ints, extra)) return(1000.)
s <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb,
intercepts=ints, g=g, X=1, extra=extra)[t,, drop=FALSE ]
# Objective function to minimize: sum of absolute differences with targets
# with restriction that intercepts be in descending order
crit <- 0. # if(any(diff(ints) > 0.)) 1000. else 0.
for(tim in rownames(s)) crit <- crit + sum(abs(s[tim, ] - target[tim, ]))
if(length(ftarget)) crit <- crit + ftarget(intercepts=ints, extra=extra)
crit
}
if(length(list(...)))
u <- nlm(h, c(intercepts, extra), ...)
else
u <- nlm(h, c(intercepts, extra), iterlim=300)
if(onlycrit) return(u$minimum)
cat('\nIterations:', u$iterations, '\n')
cat('Sum of absolute errors:', u$minimum, '\n')
ints <- u$estimate[1 : (length(u$estimate) - length(extra))]
if(length(extra)) extra <- structure(u$estimate[-(1 : length(ints))],
names=names(extra))
cat('Intercepts:', round(ints, 3), '\n')
if(length(extra)) {
cat('\nExtra parameters:\n\n')
print(round(extra, 4))
}
s1 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb,
intercepts=ints, g=g, X=1, extra=extra)
if(printsop) {
cat('\nOccupancy probabilities for group 1:\n\n')
print(round(s1, 3))
}
# Show occupancy probabilities for group 2
s2 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb,
intercepts=ints, g=g, X=2, extra=extra)
if(printsop) {
cat('\nOccupancy probabilities for group 2:\n\n')
print(round(s2, 3))
}
## Compute log odds ratios at day t
if(printsop) for(ti in t) {
## Get cumulative probabilities from right to left except for the first
s1t <- rev(cumsum(rev(s1[ti, -1])))
s2t <- rev(cumsum(rev(s2[ti, -1])))
lor <- round(qlogis(s2t) - qlogis(s1t), 3)
cat('\nLog odds ratios at', paste0('t=', ti),
'from occupancy probabilities:' , lor, '\n')
}
list(intercepts=ints, extra=extra)
}
#' State Occupancy Probabilities for First-Order Markov Ordinal Model from a Model Fit
#'
#' Computes state occupancy probabilities for a single setting of baseline covariates. If the model fit was from `rms::blrm()`, these probabilities are from all the posterior draws of the basic model parameters. Otherwise they are maximum likelihood point estimates.
#'
#' @title soprobMarkovOrdm
#' @param object a fit object created by `blrm`, `lrm`, `orm`, `VGAM::vglm()`, or `VGAM::vgam()`
#' @param data a single observation list or data frame with covariate settings, including the initial state for Y
#' @param times vector of measurement times
#' @param ylevels a vector of ordered levels of the outcome variable (numeric or character)
#' @param absorb vector of absorbing states, a subset of `ylevels`. The default is no absorbing states. (numeric, character, factor)
#' @param tvarname name of time variable, defaulting to `time`
#' @param pvarname name of previous state variable, defaulting to `yprev`
#' @param gap name of time gap variable, defaults assuming that gap time is not in the model
#'
#' @return if `object` was not a Bayesian model, a matrix with rows corresponding to times and columns corresponding to states, with values equal to exact state occupancy probabilities. If `object` was created by `blrm`, the result is a 3-dimensional array with the posterior draws as the first dimension.
#' @export
#' @author Frank Harrell
#' @seealso
#' @md
soprobMarkovOrdm <- function(object, data, times, ylevels, absorb=NULL,
tvarname='time', pvarname='yprev',
gap=NULL) {
cl <- class(object)[1]
ftypes <- c(lrm='rms', orm='rms', blrm='rmsb', vglm='vgam', vgam='vgam')
ftype <- ftypes[cl]
if(is.na(ftype)) stop(paste('object must be a fit from one of:',
paste(ftypes, collapse=' ')))
## For VGAM objects, predict() did not find the right function when
## inside the rms package
prd <-
switch(ftype,
rms =function(obj, data) predict(obj, data, type='fitted.ind'),
vgam=function(obj, data) VGAM::predict(obj, data, type='response'),
rmsb=function(obj, data) predict(obj, data, type='fitted.ind',
posterior.summary='all'))
if(pvarname %nin% names(data))
stop(paste(pvarname, 'is not in data'))
if(length(absorb) && (pvarname %in% absorb))
stop('initial state cannot be an absorbing state')
nd <- if(ftype == 'rmsb' && length(object$draws)) nrow(object$draws) else 0
if((nd == 0) != (ftype != 'rmsb'))
stop('model fit inconsistent with having posterior draws')
k <- length(ylevels)
s <- length(times)
P <- if(nd == 0)
array(NA, c(s, k),
dimnames=list(as.character(times),
as.character(ylevels))) else
array(NA, c(nd, s, k),
dimnames=list(paste('draw', 1 : nd), as.character(times),
as.character(ylevels)))
# Never uncondition on initial state
data[[tvarname]] <- times[1]
if(length(gap)) data[[gap]] <- times[1]
data <- as.data.frame(data)
p <- prd(object, data)
if(nd == 0) P[1, ] <- p else P[, 1, ] <- p
# cp: matrix of conditional probabilities of Y conditioning on previous time Y
# Columns = k conditional probabilities conditional on a single previous state
# Rows = all possible previous states
# This is for a single posterior draw (or for a frequentist fit)
rnameprev <- paste('t-1', ylevels)
rnameprevna <- paste('t-1', setdiff(ylevels, absorb))
if(length(absorb)) {
rnamepreva <- paste('t-1', absorb)
cnamea <- paste('t', absorb)
}
cp <- matrix(0., nrow=k, ncol=k,
dimnames=list(rnameprev, paste('t', ylevels)))
## cp is initialized to zero, which will remain the values for
## probabilities of moving out of absorbing (row) states
## Set probabilities of staying in absorbing states to 1
if(length(absorb)) cp[cbind(rnamepreva, cnamea)] <- 1.
data <- as.list(data)
yna <- setdiff(ylevels, absorb) # non-absorbing states
data[[pvarname]] <- yna # don't request estimates for absorbing states
edata <- expand.grid(data)
for(it in 2 : s) {
edata[[tvarname]] <- times[it]
if(length(gap)) edata[[gap]] <- times[it] - times[it - 1]
pp <- prd(object, edata)
if(nd == 0) {
## If there are absorbing states, make a bigger version of
## the cell probability matrix that includes them
## Rows representing absorbing states have P(stating in that state)=1
cp[rnameprevna, ] <- pp
## Compute unconditional probabilities of being in all possible states
## at current time t
P[it, ] <- t(cp) %*% P[it - 1, ]
}
else {
for(idraw in 1 : nd) {
cp[rnameprevna, ] <- pp[idraw, ,]
P[idraw, it, ] <- t(cp) %*% P[idraw, it - 1, ]
}
}
}
P
}
utils::globalVariables(c('id', 'group', 'event', ':=', 'ys'))
Hmisc/R/sas.get.s 0000644 0001762 0000144 00000105105 14112734310 013244 0 ustar ligges users ## $Id$
sas.get <-
function(libraryName,
member,
variables = character(0),
ifs = character(0),
format.library = libraryName,
id,
dates. = c("sas","yymmdd","yearfrac","yearfrac2"),
keep.log = TRUE,
log.file = "_temp_.log",
macro = sas.get.macro,
data.frame.out = existsFunction("data.frame"),
clean.up = FALSE,
quiet = FALSE,
temp = tempfile("SaS"),
formats=TRUE,
recode=formats,
special.miss=FALSE,
sasprog="sas",
as.is=.5,
check.unique.id=TRUE,
force.single=FALSE,
pos,
uncompress=FALSE,
defaultencoding="latin1",
var.case="lower")
{
if(force.single) stop('force.single does not work under R')
dates. <- match.arg(dates.)
fexists <- function(name) {
w <- file.exists(name)
attr(w, 'which') <- name[w]
w
}
file.is.dir <- function(name) {
isdir <- file.info(name)$isdir
isdir && !is.na(isdir)
}
file.is.readable <- function(name) file.access(name,4)==0
fileShow <- function(x) file.show(x)
if(recode) formats <- TRUE
if(missing(formats) || formats) {
## ***** Next line begins mod from Mike Kattan edits 11 Sep 97
## Redone FEH 22Oct00
no.format <- all(!fexists(file.path(format.library,
c('formats.sc2','formats.sct','formats.sct01','formats.sas7bcat'))))
if(no.format) {
if((!missing(formats) && formats) || (!missing(recode) && recode))
warning(paste(paste(format.library,
"/formats.sc? or formats.sas7bcat",sep = ""),
" not found. Formatting ignored. \n"))
formats <- recode <- FALSE
}
## ***** End Mike Kattan edits 11 Sep 97
}
## 5 Changes here from Claudie Berger 19feb00
## Allows work on sas v7.
sasin <- paste(temp, ".3.sas", sep = "")
sasout1 <- paste(temp, ".1.sas", sep = "")
sasout2 <- paste(temp, ".2.sas", sep = "")
sasout3 <- paste(temp, ".4.sas", sep = "")
sasout4 <- paste(temp, ".5.sas", sep = "")
nvariables <- length(variables)
if(nvariables>0) {
if(any(jdup <- duplicated(variables)))
stop(paste("duplicate variables requested: ", variables[jdup]))
}
varstring <- paste(variables, collapse = "\n ")
ifs <- paste("'",paste(ifs, collapse = ";\n "),"'",sep="")
if(length(sasin) != 1)
stop("Illegal temporary file name")
temp.files <- c(sasin, sasout1, sasout2, sasout3, sasout4)
if(!keep.log)
temp.files <- c(temp.files, log.file)
if(clean.up)
on.exit(unlink(temp.files))
##on.exit(sys(paste("rm -f", paste(temp.files, collapse = " "))))
## 4oct03
if(missing(member))
stop("SAS member name is required")
if(missing(libraryName))
stop("SAS library name is required")
## Encoding added by Reinhold Koch 24Jan14
cat("%LET DEFAULTE=", defaultencoding, ";\n", sep="", file=sasin)
cat(macro, sep="\n", file=sasin, append=TRUE)
sasds.suffix <- c('sd2','sd7','ssd01','ssd02','ssd03','ssd04','sas7bdat')
## 22Oct00
if(libraryName == "") libraryName <- "."
if(!file.is.dir(libraryName))
stop(paste(sep = "", "library, \"", libraryName,
"\", is not a directory"))
unix.file <- file.path(libraryName, paste(member, sasds.suffix, sep="."))
if(uncompress) {
if(any(fe <- fexists(paste(unix.file,".gz",sep=""))))
system(paste("gunzip ", attr(fe,'which'),'.gz',sep=''))
else if(any(fe <- fexists(paste(unix.file,".Z",sep=""))))
system(paste("uncompress ",attr(fe,'which'),'.Z',sep=''))
}
if(!any(fe <- fexists(unix.file))) {
stop(paste(sep = "", "Unix file, \"",
paste(unix.file,collapse=' '),
"\", does not exist"))
} else {
file.name <- attr(fe,'which')
if(!file.is.readable(file.name)) {
stop(paste(sep = "",
"You do not have read permission for Unix file, \"",
file.name, "\"")) # 22Oct00
}
}
cat("libname temp '", libraryName, "';\n", file = sasin, append = TRUE,
sep = "")
## format.library should contain formats.sct containing user defined
## formats used by this dataset. It must be present.
cat("libname library '", format.library, "';\n", file = sasin,
append = TRUE, sep = "")
cat("%sas_get(temp.", member, ",\n",
" ", sasout1, ",\n",
" ", sasout2, ",\n",
" ", sasout3, ",\n",
" ", sasout4, ",\n",
" dates=", dates., ",\n",
" vars=", varstring, ",\n",
" ifs=", ifs, ",\n",
" formats=", as.integer(formats), "\n,",
" specmiss=", as.integer(special.miss), ");\n",
file = sasin, append = TRUE, sep = "")
status <- system(paste(shQuote(sasprog), shQuote(sasin), "-log",
shQuote(log.file)), intern=FALSE)
## 24nov03 added output=F
if(status != 0) {
if(!quiet && fexists(log.file)) fileShow(log.file) ## 4oct03
stop(paste("SAS job failed with status", status))
}
##
## Read in the variable information
##
if(!(fexists(sasout1) && fexists(sasout2))) {
if(!quiet)
fileShow(log.file) ## 4oct03
stop("SAS output files not found")
}
vars <-
scan(sasout1, list(name = "", type = 0, length = 0,
format = "", label = "", n = 0),
multi.line = FALSE, sep = "\022",
flush=TRUE, comment.char='', quote='')
## Thanks Don MacQueen for scan fix for R
nvar <- length(vars$name)
if(nvar == 0) {
if(!quiet)
fileShow(log.file) ## 4oct03
stop("First SAS output is empty")
}
nrow <- vars$n[1] #n is the same for each variable
## Read the data in
## We try to be clever about the variable type. If SAS is character
## use char of course. If is numeric and length >4, use double. If
## numeric and length <4, use single. We could also use the format to
## choose further, if it consists of a number followed by a "."
## can we safely assume integer.
##
type <- ifelse(vars$type == 2, "character(nrow)",
ifelse(force.single, ##28Mar01
"single(nrow)", "double(nrow)"))
##BILL: I corrected the macro so the following isn't needed:
## get rid of trailing blank on names
## vars$name <- unix("sed 's/ $//'", vars$name)
inlist <- paste("\"", vars$name, "\"=", type,
sep = "", collapse = ", ")
inlist <- parse(text = paste("list(", inlist, ")"))
## Inlist would now be the size of the final data structure, if I had
## evaluated it.
## Read the data
ds <-
scan(sasout2, eval(inlist), sep = "\022", multi.line = FALSE,
flush=TRUE, comment.char='', quote='')
if(length(ds) < nvariables) {
m <- variables[is.na(match(variables, names(ds)))]
if(length(m) > 0) {
warning(paste(length(m),
"requested variables did not exist:",
paste("\"", m, "\"", sep = "", collapse = " "),
"\n\t(use sas.contents())"))
}
}
format <- vars$format
format[format=='$'] <- ' ' # 1Mar00
label <- vars$label
name <- vars$name
esasout3 <- formats && fexists(sasout3) #added formats && 1/20/93
if(recode && !esasout3) recode <- FALSE
FORMATS <- NULL
if(formats && esasout3) {
FORMATS <- dget(sasout3)
if(length(FORMATS)==0) {
FORMATS <- NULL;
recode <- FALSE
}
}
smiss <- NULL
if(special.miss && fexists(sasout4))
smiss <-
scan(sasout4, list(name="", code="", obs=integer(1)),
multi.line=FALSE, flush=TRUE, sep="\022",
comment.char='', quote='')
sasdateform <- c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy",
"julian","qtr","weekdate","weekdatx","weekday","month")
dateform <-
list(as.name("ddmmmyy"),"m/d/y","y/m/d","d/m/y",as.name("ddmmmyy"),
"mon year",as.name("ddmmmyy"),"mon",as.name("ddmmmyy"),
as.name("ddmmmyy"), as.name("ddmmmyy"),"m")
sastimeform <- c("hhmm","hour","mmss","time")
timeform <- c("h:m","h","m:s","h:m:s")
sasdatetimeform <- c("datetime","tod")
datetimeform <- list(list(as.name("ddmmmyy"),"h:m:s"), c("m/d/y"," "))
z <- "%02d%b%Y"
dateform4 <-
c(z,"%02m/%02d/%Y","%Y/%02m/%02d","%02d/%02m/%Y", z,"%02m %Y",
z,"%02m", z, z, z,"%02m")
timeform4 <- c("%02H:%02M","%02H","%02M:%02S","%02H:%02M:%02S")
datetimeform4 <- c("%02d%b%Y %02h:%02m:%02s","%02m/%02d/%Y")
## Don MacQueen
days.to.adj <- as.numeric(difftime(ISOdate(1970,1,1,0,0,0) ,
ISOdate(1960,1,1,0,0,0), 'days'))
secs.to.adj <- days.to.adj*24*60*60
for(i in 1:nvar) {
atr <- list()
dsi <- ds[[i]]
fname <- format[i]
rec <- FALSE
if(fname!=" ") {
ff <- fname
if(dates.=="sas" & (m <- match(fname,sasdateform,0)) >0) {
##look for partial dates
dd <- dsi-floor(dsi)
ddn <- !is.na(dd)
if(any(ddn) && any(dd[ddn]!=0)) {
ll <- 1:length(dd)
atr$partial.date <-
list(month=ll[dd==.5],day=ll[dd==.25],both=ll[dd==.75])
atr$imputed <- ll[dd!=0]
dsi <- floor(dsi)
}
dsi <- importConvertDateTime(dsi, 'date', 'sas',
form=dateform[m])
if(length(atr$imputed))
attr(dsi,'class') <- c("impute",attr(dsi,'class'))
ff <- NULL
} else {
if((m <- match(fname,sastimeform,0)) >0) {
dsi <- importConvertDateTime(dsi, 'time', 'sas',
form=timeform[m])
ff <- NULL
} else if((m <- match(fname,sasdatetimeform,0))>0) {
dsi <- importConvertDateTime(dsi, 'datetime', 'sas',
form=datetimeform[m])
ff <- NULL
}
}
atr$format <- ff
if(recode & length(g <- FORMATS[[fname]])) {
labs <- g$labels
if(!is.logical(recode)) {
labs <- if(recode==1) paste(g$values,":",labs,sep="")
else paste(labs,"(",g$values,")",sep="")
}
dsi <- factor(dsi, g$values, labs)
atr$sas.codes <- g$values
rec <- TRUE
}
}
if(data.frame.out && !rec && vars$type[i]==2 &&
((is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && length(unique(dsi)) < as.is*length(dsi))))
dsi <- factor(dsi, exclude="") #exclude added 5Mar93
## For data frames, char. var usually factors
if(label[i]!=" ")
label(dsi) <- label[i] #atr$label <- label[i]
if(length(smiss$name)) {
j <- smiss$name==name[i]
if(any(j)) {
atr$special.miss <-
list(codes=smiss$code[j],obs=smiss$obs[j])
attr(dsi,'class') <- c("special.miss",attr(dsi,'class'))
}
}
if(!is.null(atr))
attributes(dsi) <- c(attributes(dsi),atr)
if(missing(pos))
ds[[i]] <- dsi
else
assign(name[i], dsi, pos=pos)
}
if(!missing(pos))
return(structure(pos, class="pos"))
atr <- list()
if(missing(id)) {
if(data.frame.out)
atr$row.names <- as.character(1:nrow)
} else {
idname <- id
jj <- match(idname, names(ds), 0)
if(any(jj==0))
stop(paste("id variable(s) not in dataset:",
paste(idname[jj==0],collapse=" ")))
if(length(idname)==1) {
id <- ds[[idname]] #Need since not use data.frame
} else {
id <- as.character(ds[[idname[1]]])
for(jj in 2:length(idname))
id <- paste(id, as.character(ds[[idname[jj]]]))
}
if(check.unique.id) {
dup <- duplicated(id)
if(any(dup))
warning(paste("duplicate IDs:",
paste(id[dup], collapse=" ")))
}
if(data.frame.out)
atr$row.names <- as.character(id)
else atr$id <- id
}
if(var.case=="lower"){
names(ds)=tolower(names(ds))
}
if(var.case=="upper"){
names(ds)=toupper(names(ds))
}
if(!is.null(FORMATS))
atr$formats <- FORMATS
if(data.frame.out)
atr$class <- "data.frame"
attributes(ds) <- c(attributes(ds),atr)
ds
}
importConvertDateTime <-
function(x, type=c('date','time','datetime'),
input=c('sas','spss','dataload'), form) {
type <- match.arg(type)
input <- match.arg(input)
if(input != 'sas' && type != 'date')
stop('only date variables are support for spss, dataload')
adjdays <- c(sas=3653, spss=141428, dataload=135080)[input]
## 1970-1-1 minus 1960-1-1, 1582-10-14, or 1600-3-1
origin <- c(sas='1960-01-01', spss='1582-10-14', dataload='1600-03-01')[input]
if(input=='spss') x <- x/86400
switch(type,
date = structure(x - adjdays, class='Date'),
time = {
## Don MacQueen 3Apr02
z <- structure(x, class=c('POSIXt','POSIXct'))
f <- format(z, tz='GMT')
z <- as.POSIXct(format(z, tz='GMT'), tz='')
structure(z, class=c('timePOSIXt','POSIXt','POSIXct'))},
datetime = as.POSIXct(x, origin=origin, tz='GMT'))
# chron((x - adjdays*86400)/86400,
# out.format=c(dates='day mon year', times='h:m:s'))})
}
## Don MacQueen 3Apr02
## slightly modified copy of format.POSIXct() from R base
format.timePOSIXt <- function (x, format = "%H:%M:%S", tz = "",
usetz = FALSE, ...) {
if (!inherits(x, c("timePOSIXt","POSIXct"))) stop("wrong class")
class(x) <- class(x)[-1]
structure(format.POSIXlt(as.POSIXlt(x, tz), format, usetz, ...),
names = names(x))
}
print.timePOSIXt <- function(x, ...) print(format(x, ...))
##if(!.R.) {
## Output format routine needed by chron for usual SAS date format
ddmmmyy <- function(x)
{
if (!requireNamespace("chron", quietly = TRUE))
stop("This function requires the 'chron' package.")
y <- chron::month.day.year(trunc(unclass(x)), attr(x,"origin"))
yr <- y$year
m <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct",
"Nov","Dec")[y$month]
ifelse(yr<1900 | yr>=2000, paste(y$day,m,yr,sep=""),
paste(y$day,m,yr-1900,sep=""))
}
## Functions to handle special.miss class
is.special.miss <- function(x, code)
{
sm <- attr(x, "special.miss")
if(!length(sm))
return(rep(FALSE, length(x)))
if(missing(code)) {
z <- rep(FALSE, length(x))
z[sm$obs] <- TRUE
} else {
z <- rep(FALSE, length(x))
z[sm$obs[sm$codes==code]] <- TRUE
}
z
}
"[.special.miss" <- function(x, ..., drop=FALSE)
{
ats <- attributes(x)
ats$dimnames <- NULL
ats$dim <- NULL
ats$names <- NULL
attr(x,'class') <- NULL
y <- x[..., drop = drop]
if(length(y) == 0)
return(y)
k <- seq(along=x)
names(k) <- names(x)
k <- k[...]
attributes(y) <- c(attributes(y), ats)
smiss <- attr(y, "special.miss")
codes <- rep("ZZ",length(x))
codes[smiss$obs] <- smiss$codes
codes <- codes[...]
which <- codes!="ZZ"
if(sum(which)) attr(y,"special.miss") <-
list(obs=seq(along=k)[codes!="ZZ"],codes=codes[codes!="ZZ"])
else {
attr(y,"special.miss") <- NULL
attr(y,'class') <- attr(y,'class')[attr(y,'class') != "special.miss"]
if(length(attr(y,'class'))==0)
attr(y,'class') <- NULL
}
y
}
format.special.miss <- function(x, ...)
{
w <-
if(is.factor(x))
as.character(x)
else {
cl <- attr(x,'class');
cl <- cl[cl!="special.miss"]
if(length(cl)) {
attr(x,'class') <- cl;
format(x, ...)
} else format.default(x, ...)
}
sm <- attr(x, "special.miss")
names(w) <- names(x)
if(!length(sm))
return(w)
w[sm$obs] <- sm$codes
attr(w,"label") <- attr(w,"special.miss") <- attr(w,"class") <- NULL
w
}
print.special.miss <- function(x, ...)
{
sm <- attr(x, "special.miss")
if(!length(sm)) {
print.default(x)
return(invisible())
}
w <- format.special.miss(x)
print.default(w, quote=FALSE)
invisible()
}
sas.codes <- function(object) attr(object, "sas.codes")
code.levels <- function(object) {
if(length(cod <- attr(object,"sas.codes")))
levels(object) <- paste(cod,":",levels(object),sep="")
object
}
as.data.frame.special.miss <- function(x, row.names = NULL, optional = FALSE, ...)
{
nrows <- length(x)
if(is.null(row.names)) {
## the next line is not needed for the 1993 version of data.class and is
## included for compatibility with 1992 version
if(length(row.names <- names(x)) == nrows &&
!anyDuplicated(row.names)) {
}
else if(optional)
row.names <- character(nrows)
else row.names <- as.character(1:nrows)
}
value <- list(x)
if(!optional)
names(value) <- deparse(substitute(x))[[1]]
structure(value, row.names=row.names, class='data.frame')
}
## val{nval}=compress(value)||"" was =value 23mar04
sas.get.macro <-
c("/* Macro sas_get (modified by F. Harrell 30Jan90, Bill Dunlap Dec90, FH Mar92,",
"\t\t\tFH Apr95 (extend LENGTH smiss))",
" Sets up for conversion of SAS dataset to S dataset.",
" Arguments:", "\tdataset - name of SAS dataset",
"\ttemp1\t- Name of temporary dataset to contain data dictionar (unquoted)",
"\t\t default=/tmp/file.1",
"\ttemp2\t- Name of temporary dataset to contain ASCII version of SAS",
"\t\t dataset (unquoted)", "\t\t default=/tmp/file.2",
"\ttemp3 - Name of temporary dataset to contain ASCII file with S",
"\t\t program to store format values and labels",
"\ttemp4 - Name of temporary dataset to contain ASCII file with",
"\t\t locations of special missing values",
"\tdates\t- SAS to store date variables in SAS format ( # days from 1/1/60)",
"\t\t (default)",
"\t\t- YEARFRAC to store as days from 1/1/1900, divided by 365.25",
"\t\t- YEARFRAC2 to store as year + fraction of current year",
"\t\t- YYMMDD to store as numeric YYMMDD",
"\tvars - list of variable in dataset that you want returned to S",
" (unquoted, separate variable names with spaces) If empty,",
" then return all variables.",
" ifs - sequence of SAS subsetting if statements, (unquoted,",
" separated by semicolons).",
"\tformats - 0 (default) - do not create file on temp3 containing S",
"\t\t statements to store format values and labels, 1 do create",
"\tspecmiss- 0 (default). Set to 1 to write a data file on temp4 with",
"\t\t the fields: variable name, special missing value code,",
"\t\t observation number",
"\tdefencod - default encoding of dataset if it does not specify",
" */",
"%macro sas_get(dataset, temp1, temp2, temp3, temp4, dates=SAS, vars=, ifs=, ",
"\tformats=0, specmiss=0, defencod=&DEFAULTE);",
"OPTIONS NOFMTERR;",
"%LET DSID=%SYSFUNC(open(&dataset,i));",
"%LET ENCODE=%SCAN(%SYSFUNC(ATTRC(&DSID,ENCODING)),1);",
"%IF &ENCODE=Default %THEN %LET dataset=&dataset(encoding=&defencod);",
"%IF %QUOTE(&temp1)= %THEN %LET temp1=/tmp/file.1;",
"%IF %QUOTE(&temp2)= %THEN %LET temp2=/tmp/file.2;",
"%IF %QUOTE(&temp3)= %THEN %LET temp3=/tmp/file.3;",
"%IF %QUOTE(&temp4)= %THEN %LET temp4=/tmp/file.4;",
## Next line had %QUOTE(&ifs),1,\"'\" 31oct02
"%LET dates=%UPCASE(&dates);", "%LET ifs=%SCAN(%QUOTE(&ifs),1,'');",
"%LET _s_=_sav_;",
"/* BILL: Can these 2 subsets be combined into one pass of the data? -Frank*/",
"/* Subset by observation first */", "%IF %QUOTE(&ifs)^= %THEN %DO;",
" data _osub_ ;", " set &dataset ;", " &ifs ;",
" %LET dataset=_osub_ ;", " %END;", "/* Then subset by variable */",
"%IF &vars^= %THEN %DO;", " data _vsub_ ;", " set &dataset ;",
" keep &vars ;", " %LET dataset=_vsub_ ;", " %END;",
"proc contents data=&dataset out=&_s_(KEEP=name type length label format nobs ",
" varnum) noprint; ", "%IF &formats=1 %THEN %DO;",
" PROC FORMAT LIBRARY=LIBRARY CNTLOUT=f(KEEP=fmtname type start end label);",
" DATA f; SET f; RETAIN n 0; n+1; IF type=\"C\" THEN fmtname=\"$\"||fmtname;",
" PROC SORT DATA=f OUT=f(DROP=n); BY fmtname n; ",
" *Sort by n instead of start for numerics so 13 sorts after 2;",
" *Dont consider formats containing ANY range of values;",
" *Dont consider formats that dont have at least one non-missing (if",
" numeric) starting value. This gets rid of formats that are used",
" only to label special missing values;",
" DATA f2; SET f; BY fmtname; RETAIN anyrange 0 anynmiss 0;",
" IF FIRST.fmtname THEN DO;anyrange=0;anynmiss=0;END;",
" IF start^=end THEN anyrange=1;",
" IF TYPE=\"C\" THEN anynmiss=1; ",
" ELSE IF (start+0)>. THEN anynmiss=1;",
" IF LAST.fmtname & anynmiss & ^anyrange THEN OUTPUT; KEEP fmtname;",
" DATA f; MERGE f f2(IN=in2); BY fmtname; IF in2;",
" IF TYPE=\"N\" THEN DO; IF (start+0)>.; *S cannot handle special missings;",
" END;", " RENAME fmtname=format start=value; DROP end;",
" PROC SORT DATA=&_s_(KEEP=format) OUT=sform; BY format;",
" DATA sform; SET sform; BY format; IF LAST.format;",
" DATA f; MERGE sform(IN=in1) f(IN=in2); BY format; ",
" IF in1 & in2;",
" *This keeps formats ever used by any variable;",
" DATA _NULL_; SET f END=_eof_; BY format;",
" ARRAY val{*} $ 16 val1-val500; ARRAY lab{*} $ 40 lab1-lab500; ",
" RETAIN done 0 nform 0 nval 0 val1-val500 \" \" lab1-lab500 \" \" bk -1; ",
" FILE \"&temp3\" LRECL=4096;", " IF FIRST.format THEN DO;",
" IF ^done THEN PUT 'list(' @@; done=1;",
" nform=nform+1; nval=0;",
" format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",",
" \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" IF nform=1 THEN PUT '\"' format +bk '\"=list(' @@;",
" ELSE PUT ', \"' format +bk '\"=list(' @@;", " END;",
" nval=nval+1; ",
" IF nval>500 THEN DO; ERROR \">500 format values not allowed\";ABORT ABEND;",
" END;", ' val{nval}=compress(value)||""; lab{nval}=label; ',
" IF LAST.format THEN DO;", " PUT \"values=c(\" @@; ",
" DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;",
" IF type=\"N\" THEN PUT val{i} +bk @@;",
" ELSE PUT '\"' val{i} +bk '\"' @@;", " END;",
" PUT \"),labels=c(\" @@;",
" DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;",
" PUT '\"' lab{i} +bk '\"' @@;", " END;",
" PUT \"))\";", " END;",
" IF _eof_ THEN PUT \")\";", " %END;",
"PROC SORT DATA=&_s_;BY varnum;", "data _null_;", " set &_s_ end=eof;",
" FILE \"&temp1\"; RETAIN _bk_ -1;", " if _n_ = 1 then do;",
"%IF &specmiss=0 %THEN %LET ofile=_NULL_; ",
"%ELSE %LET ofile=smiss(KEEP=vname val obs);",
" put \"data &ofile; set &dataset end=eof;\";",
" put ' file \"&temp2\" RECFM=D LRECL=4096;';",
" put \" retain __delim 18 _bk_ -1 obs 0; LENGTH _xx_ $ 20 obs 5;obs+1; \";",
"%IF &specmiss=1 %THEN %DO;",
" put \"LENGTH vname $ 8 val $ 1;\"; %END;", " end;",
" IF type=2 THEN DO;", " PUT 'FORMAT ' name ';' @;",
" PUT 'IF ' name '=\" \" THEN PUT __delim IB1. @;';",
"/* $char added F.H. 24Mar92, dropped +_bk_ before __delim */",
"/* $CHAR. removed FEH 2Aug92, added null FORMAT above, added back +_bk_ */",
" PUT 'ELSE PUT ' name '+_bk_ __delim IB1. @;';", " END;",
" ELSE DO; ", " PUT 'IF ' name '<=.Z THEN _xx_=\"NA\";' @;",
" PUT 'ELSE _xx_=LEFT(PUT(' @;", " format=UPCASE(format);",
" IF format=\"DATE\"|format=\"MMDDYY\"|format=\"YYMMDD\"|",
"format=\"DDMMYY\"|format=\"YYQ\"|format=\"MONYY\"|format=\"JULIAN\" THEN DO;",
" %IF &dates=SAS %THEN", " PUT name \",BEST18.)\";",
" %ELSE %IF &dates=YYMMDD %THEN", " PUT name \",YYMMDD6.)\";",
" %ELSE %IF &dates=YEARFRAC %THEN",
" PUT \"(\" name \"-MDY(1,1,1900))/365.25,7.3)\";",
" %ELSE %IF &dates=YEARFRAC2 %THEN %DO;",
" PUT \"YEAR(\" name \")-1900+(\" name \"-MDY(1,1,YEAR(\" name \")))/\" @;",
" PUT \"(MDY(12,31,YEAR(\" name \"))-MDY(1,1,YEAR(\" name \"))+1),7.3)\";",
" %END;", " ;", " END;\t",
" ELSE DO;PUT name \",BEST18.)\" @;END;",
" PUT '); PUT _xx_ +_bk_ __delim IB1. @;'; *Added +_bk_ 2Aug92;",
"%IF &specmiss=1 %THEN %DO;",
" put 'IF .A<=' name '<=.Z THEN DO;",
" vname=\"' name +_bk_ '\"; val=put(' name ',1.); OUTPUT; END;';",
" %END;", " END;", "if eof then PUT 'PUT; RUN;';", "run;",
"%include \"&temp1\";", "data _null_; set &_s_;",
" retain __delim 18 _bk_ -1; ", " file \"&temp1\" LRECL=4096;",
" format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",",
" \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" put name +_bk_ __delim IB1. type +_bk_ __delim IB1. length +_bk_ __delim IB1.",
" format +_bk_ __delim IB1. label +_bk_ __delim IB1. nobs +_bk_ __delim IB1.;",
"run;", "%IF &specmiss=1 %THEN %DO;",
" PROC SORT DATA=smiss OUT=smiss;BY vname val obs;",
" DATA _NULL_; SET smiss;FILE \"&temp4\" RECFM=D LRECL=30;",
" RETAIN _bk_ -1 __delim 18;",
" vname=TRANSLATE(vname,\".abcdefghijklmnopqrstuvwxyz\",",
"\t\t \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" PUT vname +_bk_ __delim IB1. val +_bk_ __delim IB1. obs +_bk_ __delim IB1.;",
" RUN;", " %END;", "%mend sas_get;")
sasxport.get <- function(file, lowernames=TRUE,force.single=TRUE,
method=c('read.xport','dataload','csv'),
formats=NULL, allow=NULL, out=NULL,
keep=NULL, drop=NULL, as.is=0.5, FUN=NULL) {
method <- match.arg(method)
if(length(out) && method!='csv')
stop('out only applies to method="csv"')
rootsoftware <- if(method=='dataload')'dataload'
else 'sas'
sasdateform <-
toupper(c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy",
"julian","qtr","weekdate","weekdatx","weekday","month"))
sastimeform <- toupper(c("hhmm","hour","mmss","time"))
sasdatetimeform <- toupper(c("datetime","tod"))
## Note: unlike read.spss, read.dta, SAS xport reading functions do not
## support URLs. And thanks to Kurt Hornik for https
if(grepl("^https?://", tolower(file))) {
tf <- tempfile()
download.file(file, tf, mode='wb', quiet=TRUE)
file <- tf
}
dsinfo <-
if(method == 'csv') lookupSASContents(file)
else lookup.xport(file)
whichds <-
if(length(keep))
keep
else
setdiff(names(dsinfo), c(drop,'_CONTENTS_','_contents_'))
ds <- switch(method,
read.xport= read.xport(file),
dataload = read.xportDataload(file, whichds),
csv = if(!length(out))
readSAScsv(file, dsinfo, whichds))
if(method=='read.xport' && (length(keep) | length(drop)))
ds <- ds[whichds]
## PROC FORMAT CNTLOUT= dataset present?
fds <- NULL
if(!length(formats)) {
fds <- sapply(dsinfo, function(x)
all(c('FMTNAME','START','END','MIN','MAX','FUZZ')
%in% x$name))
fds <- names(fds)[fds]
if(length(fds) > 1) {
warning('transport file contains more than one PROC FORMAT CNTLOUT= dataset; using only the first')
fds <- fds[1]
}
}
finfo <- NULL
if(length(formats) || length(fds)) {
finfo <-
if(length(formats))
formats
else if(length(out))
readSAScsv(file, dsinfo, fds)
else ds[[fds]]
## Remove leading $ from char format names
## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME))
fmtname <- as.character(finfo$FMTNAME)
finfo <- split(finfo[c('START','END','LABEL')], fmtname)
finfo <- lapply(finfo,
function(f)
{
rb <- function(a)
{ # remove leading + trailing blanks
a <- sub('[[:space:]]+$', '', as.character(a))
sub('^[[:space:]]+', '', a)
}
st <- rb(f$START)
en <- rb(f$END)
lab <- rb(f$LABEL)
##j <- is.na(st) | is.na(en)
## st %in% c('','.','NA') | en %in% c('','.','NA')
j <- is.na(st) | is.na(en) | st == '' | en == ''
if(any(j)) {
warning('NA in code in FORMAT definition; removed')
st <- st[!j]; en <- en[!j]; lab <- lab[!j]
}
if(!all(st==en))
return(NULL)
list(value = all.is.numeric(st, 'vector'),
label = lab)
})
}
## Number of non-format datasets
nods <- length(whichds)
nds <- nods - (length(formats) == 0 && length(finfo) > 0)
which.regular <- setdiff(whichds, fds)
dsn <- tolower(which.regular)
if((nds > 1) && !length(out)) {
res <- vector('list', nds)
names(res) <- gsub('_','.',dsn)
}
if(length(FUN)) {
funout <- vector('list', length(dsn))
names(funout) <- gsub('_','.',dsn)
}
possiblyConvertChar <- if(method=='read.xport')
(is.logical(as.is) && as.is) ||
(is.numeric(as.is) && as.is < 1) else
(is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && as.is > 0)
## reverse logic because read.xport always converts characters to factors
j <- 0
for(k in which.regular) {
j <- j + 1
cat('Processing SAS dataset', k, '\t ')
w <-
if(length(out))
readSAScsv(file, dsinfo, k)
else if(nods==1)
ds
else ds[[k]]
cat('.')
if(!length(w)) {
cat('Empty dataset', k, 'ignored\n')
next
}
chcase <- if(lowernames) tolower else function(x) x
nam <- chcase(makeNames(names(w), allow=allow))
names(w) <- nam
dinfo <- dsinfo[[k]]
fmt <- sub('^\\$','',dinfo$format)
lab <- dinfo$label
ndinfo <- chcase(makeNames(dinfo$name, allow=allow))
names(lab) <- names(fmt) <- ndinfo
for(i in 1:length(w)) {
changed <- FALSE
x <- w[[i]]
fi <- fmt[nam[i]]; names(fi) <- NULL
if(fi != '' && length(finfo) && (fi %in% names(finfo))) {
f <- finfo[[fi]]
if(length(f)) { ## may be NULL because had a range in format
x <- factor(x, f$value, f$label)
attr(x, 'format') <- fi
changed <- TRUE
}
}
if(is.numeric(x)) {
if(fi %in% sasdateform) {
x <- importConvertDateTime(x, 'date', rootsoftware)
changed <- TRUE
} else if(fi %in% sastimeform) {
x <- importConvertDateTime(x, 'time', rootsoftware)
changed <- TRUE
} else if(fi %in% sasdatetimeform) {
x <- importConvertDateTime(x, 'datetime', rootsoftware)
changed <- TRUE
} else if(force.single) {
if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(max(abs(x),na.rm=TRUE) <= (2^31-1) &&
all(floor(x) == x, na.rm=TRUE)) {
storage.mode(x) <- 'integer'
changed <- TRUE
}
}
} else if(method=='read.xport' && possiblyConvertChar && is.factor(x)) {
if((is.logical(as.is) && as.is) ||
(is.numeric(as.is) && length(unique(x)) >= as.is*length(x))) {
x <- as.character(x)
changed <- TRUE
}
} else if(possiblyConvertChar && is.character(x)) {
if((is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && length(unique(x)) < as.is*length(x))) {
x <- factor(x, exclude='')
changed <- TRUE
}
}
lz <- lab[nam[i]]
if(lz != '') {
names(lz) <- NULL
label(x) <- lz
changed <- TRUE
}
if(changed)
w[[i]] <- x
}
cat('.\n')
if(length(out)) {
nam <- gsub('_','.',dsn[j])
assign(nam, w)
## ugly, but a way to get actual data frame name into first
## argument of save( )
eval(parse(text=paste('save(',nam,', file="',
paste(out, '/', nam,'.rda',sep=''),
'", compress=TRUE)',sep='')))
if(length(FUN) && length(w))
funout[[nam]] <- FUN(w)
remove(nam)
} else if(nds > 1)
res[[j]] <- w
}
if(length(out)) {
names(dsinfo) <- gsub('_','.',tolower(names(dsinfo)))
if(length(FUN))
attr(dsinfo, 'FUN') <- funout
invisible(dsinfo)
} else if(nds > 1)
res
else w
}
## Use dataload program to create a structure like read.xport does
read.xportDataload <- function(file, dsnames) {
outf <- substring(tempfile(tmpdir=''),2)
file.copy(file, paste(tempdir(),outf,sep='/'))
curwd <- getwd()
on.exit(setwd(curwd))
setwd(tempdir())
n <- length(dsnames)
w <- vector('list', n); names(w) <- dsnames
for(a in dsnames) {
status <- system(paste('dataload', outf, 'zzzz.rda', a),
intern=FALSE)
if(status==0) {
load('zzzz.rda')
names(zzzz) <- makeNames(names(zzzz))
w[[a]] <- zzzz
}
}
w
}
utils::globalVariables(c("NOBS", "memname", "memlabel"))
## Read _contents_.csv and store it like lookup.xport output
lookupSASContents <- function(sasdir) {
w <- read.csv(paste(sasdir,'_contents_.csv',sep='/'), as.is=TRUE)
z <- tapply(w$NOBS, w$MEMNAME, function(x)x[1])
if(any(z == 0)) {
cat('\nDatasets with 0 observations ignored:\n')
print(names(z)[z == 0], quote=FALSE)
w <- subset(w, NOBS > 0)
}
w$TYPE <- ifelse(w$TYPE==1, 'numeric', 'character')
names(w) <- tolower(names(w))
unclass(split(subset(w,select=-c(memname,memlabel)), w$memname))
}
## Read all SAS csv export files and store in a list
readSAScsv <- function(sasdir, dsinfo, dsnames=names(dsinfo)) {
sasnobs <- sapply(dsinfo, function(x)x$nobs[1])
multi <- length(dsnames) > 1
if(multi) {
w <- vector('list', length(dsnames))
names(w) <- dsnames
}
for(a in dsnames) {
z <- read.csv(paste(sasdir,'/',a,'.csv', sep=''),
as.is=TRUE, blank.lines.skip=FALSE,
comment.char="")
importedLength <- length(z[[1]])
if(importedLength != sasnobs[a])
cat('\nError: NOBS reported by SAS (',sasnobs[a],') for dataset ',
a,' is not the same as imported length (', importedLength,
')\n', sep='')
if(multi)
w[[a]] <- z
}
if(multi)
w
else z
}
sasdsLabels <- function(file)
{
w <- scan(file, sep='\n', what='', quiet=TRUE)
i <- grep('Data Set Name:', w)
if(!length(i))
return(NULL)
n <- tolower(sub('.*\\.([A-Z0-9\\_]*)[[:space:]]+.*','\\1',w[i]))
w <- gsub('\t','',w)
labs <- ifelse(nchar(w[i-1])==0,w[i-2],w[i-1])
names(labs) <- n
labs
}
Hmisc/R/deff.s 0000644 0001762 0000144 00000001164 12243661443 012615 0 ustar ligges users deff <- function(y, cluster)
{
ss <- function(x)
{
n <- length(x)
xbar <- sum(x) / n
sum((x - xbar)^2)
}
if(!is.factor(cluster)) cluster <- as.factor(cluster)
cluster <- unclass(cluster)
s <- !is.na(cluster + y)
y <- y[s]
cluster <- as.integer(cluster[s])
n <- length(y)
sst <- ss(y)
sses <- tapply(y,cluster,ss)
k <- length(sses)
R2 <- 1 - sum(sses) / sst
Fstat <- R2 * (n - k) / (1 - R2) / k
g <- (Fstat - 1.) * k / n
rho <- if(R2 == 1.) 1. else g / (1. + g)
ng <- table(cluster)
B <- sum(ng^2) / n
deff <- 1 + (B - 1) * rho
c(n=n, clusters=k, rho=rho, deff=deff)
}
Hmisc/R/na.detail.response.s 0000644 0001762 0000144 00000002450 12243661443 015404 0 ustar ligges users na.detail.response <- function(mf)
{
if(is.null(z <- .Options$na.detail.response) || !z)
return(NULL)
response <- model.extract(mf, response)
if(is.null(response))
return(NULL)
if(!is.matrix(response))
response <- as.matrix(response)
GFUN <- options()$na.fun.response
if(is.null(GFUN))
GFUN <- function(x, ...)
{
if(is.matrix(x)) x <- x[,ncol(x)]
x <- x[!is.na(x)]
c(N=length(x),Mean=mean(x))
}
else GFUN <- eval.parent(as.name(GFUN))
w <- NULL; nam <- names(mf); wnam <- NULL
N <- nrow(mf)
p <- ncol(mf)
omit <- rep(FALSE, N)
for(i in 2:p) {
x <- mf[,i]
if(is.matrix(x))
x <- x[,1]
isna <- is.na(x)
omit <- omit | isna
nmiss <- sum(isna)
if(nmiss) {
w <- cbind(w, GFUN(response[isna,]))
wnam <- c(wnam, paste(nam[i],"=NA",sep=""))
}
n <- N-nmiss
if(n) {
w <- cbind(w, GFUN(response[!isna,]))
wnam <- c(wnam, paste(nam[i],"!=NA",sep=""))
}
}
## summarize responce for ANY x missing
if(p>2) {
nmiss <- sum(omit)
if(nmiss) {
w <- cbind(w, GFUN(response[omit,]))
wnam <- c(wnam, "Any NA")
}
if(N-nmiss) {
w <- cbind(w, GFUN(response[!omit,]))
wnam <- c(wnam, "No NA")
}
}
dimnames(w)[[2]] <- wnam
w
}
Hmisc/R/gettext.s 0000644 0001762 0000144 00000003163 12243661443 013376 0 ustar ligges users ### These are function that are designed to compatibility with S-plus
### for R internationalization. They are named with a prefix of
### "Splus".
###
### These functions contain representations of sprintf, gettext,
### gettextf, and ngettext
if(!exists("sprintf")) sprintf <- function(fmt, ...) {
ldots <- list(...)
text <- vector("character")
vars <- vector("character")
i <- 1; j <- 1;
temp <- fmt
while (nchar(temp)) {
ne <- regexpr('(? 0){
arg <- sub('^(\\d+)\\$.*$', '\\1', vars[i], perl=TRUE)
if(arg > 0 && arg < length(ldots)) {
val <- as.integer(arg)
}
else
stop("Error")
}
else {
val <- j
j <- j + 1
}
output <- paste(output, ldots[[val]], sep='')
}
return(paste(output, text[length(text)], sep=''))
}
if(!exists("gettext")) gettext <- function(..., domain=NULL)
return(unlist(list(...)))
if(!exists("gettextf")) gettextf <- function(fmt, ..., domain=NULL) {
return(sprintf(fmt, ...))
}
if(!exists("ngettext")) ngettext <- function(n, msg1, msg2, domain = NULL) {
if(n == 1)
return(msg1)
return(msg2)
}
Hmisc/R/plot.describe.s 0000644 0001762 0000144 00000027715 14247426647 014473 0 ustar ligges users plot.describe <- function(x, which=c('both', 'continuous', 'categorical'),
what=NULL, sort=c('ascending', 'descending', 'none'),
n.unique=10, digits=5, bvspace=2, ...) {
which <- match.arg(which)
pty <- grType() == 'plotly' && requireNamespace("plotly")
auto <- .Options$plotlyauto
auto <- length(auto) && auto
if(length(what)) x <- x[what]
sort <- match.arg(sort)
specs <- if(pty) markupSpecs$html else markupSpecs$plain
if(bvspace == 1) stop('bvspace may not be 1.0')
format_counts <- function(s) {
bl <- ' '
na <- substring(paste(names(s), bl, sep=''), 1,
max(nchar(names(s))))
va <- paste(substring(bl, 1,
max(nchar(s)) - nchar(s) + 1), s, sep='')
zz <- paste(na, va, sep=':')
gsub(' ', specs$space, zz)
}
fmtlab <- function(x) {
lab <- sub('^.*:', '', x$descript)
if(length(x$units)) lab <- specs$varlabel(lab, x$units)
lab
}
ge <- function(i) unlist(lapply(w, function(x) x[[i]]))
if(which != 'continuous') {
f <- function(x) {
s <- x$counts
v <- x$values
type <- if('Sum' %in% names(s)) 'binary'
else
if(length(v) && is.list(v) &&
all(names(v) == c('value', 'frequency')) &&
is.character(v$value) && length(v$value) <= 20) 'categorical'
else 'none'
if(type == 'none')
return(data.frame(prop=numeric(0), text=character(0)))
n <- as.numeric(s['n'])
if(type == 'binary') {
val <- ''
freq <- as.numeric(s['Sum'])
}
else {
val <- v$value
freq <- v$frequency
}
category <- if(type == 'categorical') val else ''
text <-
paste(category, if(type == 'categorical') ' ',
round(freq / n, 3), specs$frac(freq, n, size=90))
## Details about variable for minimum frequency category
left <- which.min(freq)[1]
text[left] <- paste(c(fmtlab(x), text[left], format_counts(s)),
collapse=specs$br)
y <- if(type == 'binary') 0 else 1 : length(freq)
y <- c(bvspace, rep(1, length(freq) - 1))
## extra 1 for between-variable spacing
j <- switch(sort,
ascending = order(freq),
descending= order(-freq),
none = TRUE)
data.frame(prop=freq[j] / n, y=y, text=I(text[j]),
category=I(category[j]),
missing=as.numeric(s['missing']), nlev=length(freq))
}
w <- lapply(x, f)
nam <- names(w)
for(na in nam) {
l <- length(w[[na]]$prop)
w[[na]]$xname <- if(l) rep(na, l) else character(0)
}
if(length(ge('xname')) == 0) {
warning('no categorical variables found')
pcat <- NULL
} else {
z <- data.frame(xname = I(ge('xname')),
Proportion = ge('prop'),
y = ge('y'),
text = I(ge('text')),
category = I(ge('category')),
Missing = ge('missing'),
nlev = ge('nlev'))
un <- unique(z$xname)
nv <- length(un)
z$xnamef <- factor(z$xname, levels=un)
z$xnamen <- as.integer(z$xnamef) - z$y / pmax(0.7 * nv, z$nlev)
z$cumy <- cumsum(z$y)
if(! pty) z$cumy <- - z$cumy
tly <- z$cumy[z$y == bvspace]
if(! pty) {
r <- range(z$Proportion)
z$proplev <- r[2] + .2
pcat <-
if(any(z$Missing > 0))
ggplot(z, aes(text=text)) +
geom_point(aes(x=Proportion, y=cumy, color=Missing)) +
geom_text(aes(x=proplev, y=cumy, label=category),
size=2.5, hjust=1) +
scale_y_continuous(breaks=tly, labels=un) +
scale_x_continuous(breaks=pretty(r)) +
scale_color_gradientn(colors=viridis::viridis(10)) +
ylab(NULL) +
theme(panel.grid.minor.y = element_blank())
else
ggplot(z, aes(text=text)) + geom_point(aes(x=Proportion, y=cumy)) +
geom_text(aes(x=proplev, y=cumy, label=category),
size=2.5, hjust=1) +
scale_y_continuous(breaks=tly, labels=un) +
scale_x_continuous(breaks=pretty(r)) +
ylab(NULL) +
theme(panel.grid.minor.y = element_blank())
}
else
{
z$proplev <- 1.15
pcat <- if(any(z$Missing > 0))
plotly::plot_ly(z, x = ~ Proportion, y= ~ cumy, text= ~ text,
color=~ Missing, mode='markers',
hoverinfo='text',
type='scatter', name='',
height=if(! auto) plotlyParm$heightDotchart(nrow(z)))
else
plotly::plot_ly(z, x=~ Proportion, y=~ cumy, text=~ text,
mode='markers', hoverinfo='text',
type='scatter', name='',
height=if(! auto) plotlyParm$heightDotchart(nrow(z)))
pcat <-
plotly::add_trace(pcat,
data=z, x=~ proplev, y=~ cumy, text=~ category,
mode='text', textposition='left',
textfont=list(size=9), hoverinfo='none',
name='Levels')
tl <- seq(0, 1, by=0.05)
## X tick mark labels for multiples of 0.1
tt <- ifelse(tl %% 0.1 == 0, as.character(tl), '')
tly <- z$cumy[z$y == bvspace]
pcat <- plotly::layout(pcat,
xaxis=list(range=c(0,1.15), zeroline=TRUE,
tickvals=tl, ticktext=tt,
title='Proportion'),
yaxis=list(title='', autorange='reversed',
tickvals=tly, ticktext=un),
margin=list(l=plotlyParm$lrmargin(un)))
}
}
}
if(which != 'categorical') {
f <- function(x) {
s <- x$counts
v <- x$values
isn <- function(z) is.numeric(z) || testDateTime(z, 'either')
if(! (as.numeric(s['distinct']) >= n.unique &&
length(v) && is.list(v) &&
all(names(v) == c('value', 'frequency')) &&
isn(v$value)) )
return(data.frame(X=numeric(0), count=numeric(0), text=character(0)))
X <- v$value
Y <- v$frequency
Xn <- as.numeric(X)
text <- paste(format(X, digits=digits), ' (n=', Y, ')', sep='')
X <- (Xn - Xn[1]) / diff(range(Xn))
zz <- format_counts(s)
lab <- fmtlab(x)
text[1] <- paste(c(lab, text[1], zz), collapse=' ')
## Note: plotly does not allow html tables as hover text
m <- rep(as.numeric(s['missing']), length(X))
list(X=X, prop=Y / sum(Y), text=I(text), missing=m)
}
w <- lapply(x, f)
nam <- names(w)
for(n in nam) {
l <- length(w[[n]]$X)
w[[n]]$xname <- if(l) rep(n, l) else character(0)
}
if(length(ge('xname')) == 0) {
warning('no continuous variables found')
pcon <- NULL
} else {
z <- data.frame(xname = I(ge('xname')),
X = ge('X'),
Proportion = round(ge('prop'), 4),
text = I(ge('text')),
Missing = ge('missing'))
z <- z[nrow(z) : 1, ] # so plotly will keep right ordering
unam <- unique(z$xname)
z$yy <- match(as.character(z$xname), unam)
## Scale Proportion so that max over all variables is 0.9
z$Proportion <- 0.9 * z$Proportion / max(z$Proportion)
g <- if(any(z$Missing > 0))
ggplot(z, aes(text=text)) +
geom_segment(aes(x=X, xend=X, y=yy, yend=yy + Proportion,
color=Missing)) +
scale_y_continuous(breaks=1 : length(unam), labels=unam) +
scale_color_gradientn(colors=viridis::viridis(10)) +
xlab(NULL) + ylab(NULL) +
theme(axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.ticks.x = element_blank())
else
ggplot(z, aes(text=text)) +
geom_segment(aes(x=X, xend=X, y=yy, yend=yy + Proportion)) +
scale_y_continuous(breaks=1 : length(unam), labels=unam) +
xlab(NULL) + ylab(NULL) +
theme(axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.ticks.x = element_blank())
## ggplotly would not hover text at x=0 when height < 350 px
curtail <- function(x) min(1000, max(x, 350))
pcon <- if(! pty) g
else
plotly::ggplotly(g, tooltip='text', width=if(! auto) 800,
height=if(! auto) curtail(60 + 25 * length(unam)))
## If don't run plot_ly, hovering will pop up all vertical points
# pcon <- if(any(z$missing > 0))
# plotly::plot_ly(z, x=X, y=yy, mode='markers',
# color=missing, colors=colors, name='')
# else
# plotly::plot_ly(z, x=X, y=yy, mode='none', name='')
#
# pcon <- with(z, histSpikep(NULL, x=X, y=yy, z=Proportion,
# hovertext=text, tracename='',
# color=if(any(missing > 0)) missing,
# colors=colors))
## Note: plotly does not allow font, color, size changes for hover text
# pcon <- if(any(z$missing > 0))
# plotly::plot_ly(z, x=X, y=xname, size=Proportion, text=text,
# color=missing, mode='markers',
# marker=list(symbol='line-ns-open'),
# type='scatter', hoverinfo='text')
# else
# plotly::plot_ly(z, x=X, y=xname, size=Proportion,
# text=text,
# mode='markers',
# marker=list(symbol='line-ns-open'),
# type='scatter', hoverinfo='text')
#
# maxlen <- max(nchar(as.character(unam)))
# pcon <- plotly::layout(pcon, xaxis=list(title='', showticklabels=FALSE,
# zeroline=FALSE, showgrid=FALSE),
# yaxis=list(title='',
# tickvals=1 : length(unam),
# ticktext=unam),
# margin=list(l=max(70, maxlen * 6)),
# autosize=TRUE, evaluate=TRUE)
}
}
if(which == 'both') {
if(! length(pcat)) return(pcon)
if(! length(pcon)) return(pcat)
}
## knitr.in.progress not set to TRUE unless explicitly select menu
## choice knit to html. Even when properly set, resulting html
## file was messed up as plotly widgets were duplicated
switch(which,
categorical = pcat,
continuous = pcon,
both = if(FALSE && pty &&
isTRUE(getOption('knitr.in.progress')))
htmltools::tagList(list(plotly::as.widget(pcat),
plotly::as.widget(pcon)))
else
list(Categorical=pcat, Continuous=pcon))
}
## Some of these are for dotchart3.s
utils::globalVariables(c('X', 'Proportion', 'xname', 'cumy', 'proplev',
'category', 'xb', 'Missing', 'yy'))
Hmisc/R/nCoincident.r 0000644 0001762 0000144 00000001457 14374007133 014147 0 ustar ligges users ##' Number of Coincident Points
##'
##' Computes the number of x,y pairs that are likely to be obscured in a regular scatterplot, in the sense of overlapping pairs after binning into `bins` x `bins` squares where `bins` defaults to 400. `NA`s are removed first.
##' @title nCoincident
##' @param x numeric vector
##' @param y numeric vector
##' @param bins number of bins in both directions
##' @return integer count
##' @author Frank Harrell
##' @md
##' @examples
##' nCoincident(c(1:5, 4:5), c(1:5, 4:5)/10)
nCoincident <- function(x, y, bins=400) {
i <- ! is.na(x + y) # exclude points missing on x or y
x <- x[i]
y <- y[i]
rx <- range(x)
ry <- range(y)
x <- round((x - rx[1]) / diff(rx) * 300)
y <- round((y - ry[1]) / diff(ry) * 300)
z <- paste(x, y)
length(z) - length(unique(z))
}
Hmisc/R/tex.s 0000644 0001762 0000144 00000000246 12243661443 012511 0 ustar ligges users tex <- function(string, lref='c', psref='c', scale=1, srt=0)
paste('\\tex[',lref,'][',psref,'][',
format(scale),'][',format(srt),']{',string,'}',sep='')
Hmisc/R/geom_stepconfint.r 0000644 0001762 0000144 00000005554 14753500276 015266 0 ustar ligges users #' @title Step function confidence intervals for ggplot2
#' @description Produces a step function confidence interval for survival curves. This function is taken from
#' the \code{utile.visuals} package by Eric Finnesgard. That package is not used because of its
#' strong dependencies.
#' @param mapping Aesthetic mappings with aes() function. Like geom_ribbon(), you must provide
#' columns for x, ymin (lower limit), ymax (upper limit).
#' @param data The data to be displayed in this layer. Can inherit from ggplot parent.
#' @param stat The statistical transformation to use on the data for this layer, as a string.
#' Defaults to 'identity'.
#' @param position Position adjustment, either as a string, or the result of a call to a
#' position adjustment function.
#' @param na.rm If FALSE, the default, missing values are removed with a warning. If TRUE,
#' missing values are silently removed.
#' @param ... Optional. Any other ggplot geom_ribbon() arguments.
#' @note Originally adapted from the survminer package .
#' @examples
#' require(survival)
#' require(ggplot2)
#'
#' f <- survfit(Surv(time, status) ~ trt, data = diabetic)
#' d <- with(f, data.frame(time, surv, lower, upper, trt=rep(names(f$strata), f$strata)))
#' ggplot(d, aes(x = time, y=surv)) +
#' geom_step(aes(color = trt)) +
#' geom_stepconfint(aes(ymin = lower, ymax = upper, fill = trt), alpha = 0.3) +
#' coord_cartesian(c(0, 50)) +
#' scale_x_continuous(expand = c(0.02,0)) +
#' labs(x = 'Time', y = 'Freedom From Event') +
#' scale_color_manual(
#' values = c('#d83641', '#1A45A7'),
#' name = 'Treatment',
#' labels = c('None', 'Laser'),
#' aesthetics = c('colour', 'fill'))
#' @export
#' @author Eric Finnesgard
geom_stepconfint <- function (
mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE, ...) {
ggplot2::layer(
mapping = mapping,
data = data,
stat = stat,
geom = ggplot2::ggproto(
`_class` = 'GeomConfint',
`_inherit` = ggplot2::GeomRibbon,
required_aes = c("x", "ymin", "ymax"),
draw_group = function (self, data, panel_scales, coord, na.rm = FALSE) {
if (na.rm) data <- data[stats::complete.cases(self$required_aes), ]
data <- data[order(data$group, data$x), ]
data <- self$stairstep_confint(data)
ggplot2::GeomRibbon$draw_group(data, panel_scales, coord, na.rm = FALSE)
},
stairstep_confint = function (data) {
data <- as.data.frame(data)[order(data$x), ]
n <- nrow(data)
ys <- rep(1:n, each = 2)[-2 * n]
xs <- c(1, rep(2:n, each = 2))
data.frame(
x = data$x[xs],
ymin = data$ymin[ys],
ymax = data$ymax[ys],
data[xs, setdiff(names(data), c("x", "ymin", "ymax"))]
)
}
),
position = position,
params = list(na.rm = na.rm, ...)
)
}
Hmisc/R/is.present.s 0000644 0001762 0000144 00000000143 12243661443 013777 0 ustar ligges users is.present <- function(x)
{
if(is.character(x))
return(x!="")
else
return(!is.na(x))
}
Hmisc/R/samplesize.bin.s 0000644 0001762 0000144 00000001551 12243661443 014634 0 ustar ligges users ## Rick Chappell <> Asst. Professor, Depts. of Statistics and Human Oncology
## <> University of Wisconsin at Madison <> chappell@stat.wisc.edu
## (608) 263-5572 / 262-2733 <> take logs
samplesize.bin <- function(alpha, beta, pit, pic, rho=.5)
{
## alpha is the scalar ONE-SIDED test size, or two-sided size/2
## beta is a scalar or vector of powers
## pit is the hypothesized treatment probability of success
## pic is the hypothesized control probability of success
## returns required TOTAL sample size, using arcsin transformation
## rho is the proportion of the sample devoted to treated group (0