mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Add surv Verb to Estimate a Survival Curve (#1788)
Add a surv verb to estimate a survival curve using Kaplan-Meier. It requires duration and status (event or censored) columns, and outputs each distinct duration and corresponding probability of survival.
This commit is contained in:
parent
35c7eeb977
commit
df73ad8ec0
9 changed files with 216 additions and 4 deletions
11
go.mod
11
go.mod
|
|
@ -14,13 +14,16 @@ module github.com/johnkerl/miller/v6
|
|||
// Local development:
|
||||
// replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin
|
||||
|
||||
go 1.21
|
||||
go 1.23.0
|
||||
|
||||
toolchain go1.24.2
|
||||
|
||||
require (
|
||||
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
|
||||
github.com/johnkerl/lumin v1.0.0
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
|
||||
github.com/klauspost/compress v1.17.11
|
||||
github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1
|
||||
github.com/lestrrat-go/strftime v1.1.0
|
||||
github.com/mattn/go-isatty v0.0.20
|
||||
github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
|
||||
|
|
@ -28,13 +31,17 @@ require (
|
|||
github.com/stretchr/testify v1.10.0
|
||||
golang.org/x/sys v0.30.0
|
||||
golang.org/x/term v0.29.0
|
||||
golang.org/x/text v0.22.0
|
||||
golang.org/x/text v0.23.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/felixge/fgprof v0.9.3 // indirect
|
||||
github.com/golang/snappy v1.0.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
|
||||
github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
golang.org/x/tools v0.26.0 // indirect
|
||||
gonum.org/v1/gonum v0.16.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
|
|
|||
14
go.sum
14
go.sum
|
|
@ -8,6 +8,8 @@ github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1S
|
|||
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc=
|
||||
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
|
||||
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
|
||||
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
|
||||
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y=
|
||||
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
|
||||
|
|
@ -17,6 +19,10 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU
|
|||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
|
||||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
||||
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
||||
github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48=
|
||||
github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU=
|
||||
github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY=
|
||||
github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1/go.mod h1:uvVFnikBpVz7S1pdsyUI+BBRlz64vmU6Q+kviiB+fpU=
|
||||
github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
|
||||
github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
|
||||
github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg=
|
||||
|
|
@ -41,8 +47,12 @@ golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
|||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
|
||||
golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
|
||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
|
||||
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
|
||||
golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
|
||||
golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
|
||||
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
|
|||
StepSetup,
|
||||
SubSetup,
|
||||
SummarySetup,
|
||||
SurvSetup,
|
||||
TacSetup,
|
||||
TailSetup,
|
||||
TeeSetup,
|
||||
|
|
|
|||
173
pkg/transformers/surv.go
Normal file
173
pkg/transformers/surv.go
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
package transformers
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/johnkerl/miller/v6/pkg/cli"
|
||||
"github.com/johnkerl/miller/v6/pkg/mlrval"
|
||||
"github.com/johnkerl/miller/v6/pkg/types"
|
||||
"github.com/kshedden/statmodel/duration"
|
||||
"github.com/kshedden/statmodel/statmodel"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSurv = "surv"
|
||||
|
||||
// SurvSetup defines the surv verb: Kaplan-Meier survival curve.
|
||||
var SurvSetup = TransformerSetup{
|
||||
Verb: verbNameSurv,
|
||||
UsageFunc: transformerSurvUsage,
|
||||
ParseCLIFunc: transformerSurvParseCLI,
|
||||
IgnoresInput: false,
|
||||
}
|
||||
|
||||
func transformerSurvUsage(o *os.File) {
|
||||
fmt.Fprintf(o, "Usage: %s %s -d {duration-field} -s {status-field}\n", "mlr", verbNameSurv)
|
||||
fmt.Fprint(o, `
|
||||
Estimate Kaplan-Meier survival curve (right-censored).
|
||||
Options:
|
||||
-d {field} Name of duration field (time-to-event or censoring).
|
||||
-s {field} Name of status field (0=censored, 1=event).
|
||||
-h, --help Show this message.
|
||||
`)
|
||||
}
|
||||
|
||||
func transformerSurvParseCLI(
|
||||
pargi *int,
|
||||
argc int,
|
||||
args []string,
|
||||
_ *cli.TOptions,
|
||||
doConstruct bool,
|
||||
) IRecordTransformer {
|
||||
argi := *pargi
|
||||
verb := args[argi]
|
||||
argi++
|
||||
|
||||
var durationField, statusField string
|
||||
|
||||
for argi < argc {
|
||||
opt := args[argi]
|
||||
if !strings.HasPrefix(opt, "-") {
|
||||
break
|
||||
}
|
||||
if opt == "-h" || opt == "--help" {
|
||||
transformerSurvUsage(os.Stdout)
|
||||
os.Exit(0)
|
||||
} else if opt == "-d" {
|
||||
if argi+1 >= argc {
|
||||
fmt.Fprintf(os.Stderr, "mlr %s: %s requires an argument\n", verb, opt)
|
||||
os.Exit(1)
|
||||
}
|
||||
argi++
|
||||
durationField = args[argi]
|
||||
argi++
|
||||
} else if opt == "-s" {
|
||||
if argi+1 >= argc {
|
||||
fmt.Fprintf(os.Stderr, "mlr %s: %s requires an argument\n", verb, opt)
|
||||
os.Exit(1)
|
||||
}
|
||||
argi++
|
||||
statusField = args[argi]
|
||||
argi++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
*pargi = argi
|
||||
if !doConstruct {
|
||||
return nil
|
||||
}
|
||||
if durationField == "" {
|
||||
fmt.Fprintf(os.Stderr, "mlr %s: -d option is required.\n", verbNameSurv)
|
||||
fmt.Fprintf(os.Stderr, "Please see 'mlr %s --help' for more information.\n", verbNameSurv)
|
||||
os.Exit(1)
|
||||
}
|
||||
if statusField == "" {
|
||||
fmt.Fprintf(os.Stderr, "mlr %s: -s option is required.\n", verbNameSurv)
|
||||
fmt.Fprintf(os.Stderr, "Please see 'mlr %s --help' for more information.\n", verbNameSurv)
|
||||
os.Exit(1)
|
||||
}
|
||||
return NewTransformerSurv(durationField, statusField)
|
||||
}
|
||||
|
||||
// TransformerSurv holds fields for surv verb.
|
||||
type TransformerSurv struct {
|
||||
durationField string
|
||||
statusField string
|
||||
times []float64
|
||||
events []bool
|
||||
}
|
||||
|
||||
// NewTransformerSurv constructs a new surv transformer.
|
||||
func NewTransformerSurv(durationField, statusField string) IRecordTransformer {
|
||||
return &TransformerSurv{
|
||||
durationField: durationField,
|
||||
statusField: statusField,
|
||||
times: make([]float64, 0),
|
||||
events: make([]bool, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Transform processes each record or emits results at end-of-stream.
|
||||
func (tr *TransformerSurv) Transform(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List,
|
||||
inputDownstreamDoneChannel <-chan bool,
|
||||
outputDownstreamDoneChannel chan<- bool,
|
||||
) {
|
||||
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
||||
if !inrecAndContext.EndOfStream {
|
||||
rec := inrecAndContext.Record
|
||||
mvDur := rec.Get(tr.durationField)
|
||||
if mvDur == nil {
|
||||
fmt.Fprintf(os.Stderr, "mlr surv: duration field '%s' not found\n", tr.durationField)
|
||||
os.Exit(1)
|
||||
}
|
||||
duration := mvDur.GetNumericToFloatValueOrDie()
|
||||
mvStat := rec.Get(tr.statusField)
|
||||
if mvStat == nil {
|
||||
fmt.Fprintf(os.Stderr, "mlr surv: status field '%s' not found\n", tr.statusField)
|
||||
os.Exit(1)
|
||||
}
|
||||
status := mvStat.GetNumericToFloatValueOrDie() != 0
|
||||
tr.times = append(tr.times, duration)
|
||||
tr.events = append(tr.events, status)
|
||||
} else {
|
||||
// Compute survival using kshedden/statmodel
|
||||
n := len(tr.times)
|
||||
if n == 0 {
|
||||
outputRecordsAndContexts.PushBack(inrecAndContext)
|
||||
return
|
||||
}
|
||||
durations := tr.times
|
||||
statuses := make([]float64, n)
|
||||
for i, ev := range tr.events {
|
||||
if ev {
|
||||
statuses[i] = 1.0
|
||||
} else {
|
||||
statuses[i] = 0.0
|
||||
}
|
||||
}
|
||||
dataCols := [][]float64{durations, statuses}
|
||||
names := []string{tr.durationField, tr.statusField}
|
||||
ds := statmodel.NewDataset(dataCols, names)
|
||||
sf, err := duration.NewSurvfuncRight(ds, tr.durationField, tr.statusField, &duration.SurvfuncRightConfig{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "mlr surv: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
sf.Fit()
|
||||
times := sf.Time()
|
||||
survProbs := sf.SurvProb()
|
||||
for i, t := range times {
|
||||
newrec := mlrval.NewMlrmapAsRecord()
|
||||
newrec.PutCopy("time", mlrval.FromFloat(t))
|
||||
newrec.PutCopy("survival", mlrval.FromFloat(survProbs[i]))
|
||||
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
|
||||
}
|
||||
outputRecordsAndContexts.PushBack(inrecAndContext)
|
||||
}
|
||||
}
|
||||
|
|
@ -1261,6 +1261,16 @@ Options:
|
|||
--transpose Show output with field names as column names..
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
surv
|
||||
Usage: mlr surv -d {duration-field} -s {status-field}
|
||||
|
||||
Estimate Kaplan-Meier survival curve (right-censored).
|
||||
Options:
|
||||
-d {field} Name of duration field (time-to-event or censoring).
|
||||
-s {field} Name of status field (0=censored, 1=event).
|
||||
-h, --help Show this message.
|
||||
|
||||
================================================================
|
||||
tac
|
||||
Usage: mlr tac [options]
|
||||
|
|
|
|||
1
test/cases/verb-surv/0001/cmd
Normal file
1
test/cases/verb-surv/0001/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv --from test/input/surv.csv surv -d duration -s status
|
||||
0
test/cases/verb-surv/0001/experr
Normal file
0
test/cases/verb-surv/0001/experr
Normal file
4
test/cases/verb-surv/0001/expout
Normal file
4
test/cases/verb-surv/0001/expout
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
time,survival
|
||||
1.00000000,0.80000000
|
||||
3.00000000,0.53333333
|
||||
5.00000000,0.00000000
|
||||
6
test/input/surv.csv
Normal file
6
test/input/surv.csv
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
duration,status
|
||||
1,1
|
||||
2,0
|
||||
3,1
|
||||
4,0
|
||||
5,1
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue