mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 10:15:36 +00:00
500 lines
23 KiB
HTML
500 lines
23 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html lang="en">
|
|
|
|
<!-- PAGE GENERATED FROM template.html and content-for-data-examples.html BY poki. -->
|
|
<!-- PLEASE MAKE CHANGES THERE AND THEN RE-RUN poki. -->
|
|
<head>
|
|
<meta http-equiv="Content-type" content="text/html;charset=UTF-8"/>
|
|
<meta name="description" content="Miller documentation"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/> <!-- mobile-friendly -->
|
|
<meta name="keywords"
|
|
content="John Kerl, Kerl, Miller, miller, mlr, OLAP, data analysis software, regression, correlation, variance, data tools, " />
|
|
|
|
<title> Data examples </title>
|
|
<link rel="stylesheet" type="text/css" href="css/miller.css"/>
|
|
<link rel="stylesheet" type="text/css" href="css/poki-callbacks.css"/>
|
|
</head>
|
|
|
|
<!-- ================================================================ -->
|
|
<script type="text/javascript">
|
|
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
|
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
|
</script>
|
|
<script type="text/javascript">
|
|
try {
|
|
var pageTracker = _gat._getTracker("UA-15651652-1");
|
|
pageTracker._trackPageview();
|
|
} catch(err) {}
|
|
</script>
|
|
|
|
<script type="text/javascript">
|
|
function toggle(divName) {
|
|
var eleDiv = document.getElementById(divName);
|
|
if (eleDiv != null) {
|
|
if (eleDiv.style.display == "block") {
|
|
eleDiv.style.display = "none";
|
|
} else {
|
|
eleDiv.style.display = "block";
|
|
}
|
|
}
|
|
}
|
|
</script>
|
|
|
|
<!--
|
|
The background image is from a screenshot of a Google search for "data analysis
|
|
tools", lightened and sepia-toned. Over this was placed a Mac Terminal app with
|
|
very light-grey font and translucent background, in which a few statistical
|
|
Miller commands were run with pretty-print-tabular output format.
|
|
-->
|
|
<body background="pix/sepia-overlay.jpg">
|
|
|
|
<!-- ================================================================ -->
|
|
<table width="100%">
|
|
<tr>
|
|
|
|
<!-- navbar -->
|
|
<td width="15%">
|
|
<!--
|
|
<img src="pix/mlr.jpg" />
|
|
<img style="border-width:1px; color:black;" src="pix/mlr.jpg" />
|
|
-->
|
|
|
|
<div class="pokinav">
|
|
<center><titleinbody>Miller</titleinbody></center>
|
|
|
|
<!-- PAGE LIST GENERATED FROM template.html BY poki -->
|
|
<br/>User info:
|
|
<br/>• <a href="index.html">About Miller</a>
|
|
<br/>• <a href="file-formats.html">File formats</a>
|
|
<br/>• <a href="feature-comparison.html">Miller features in the context of the Unix toolkit</a>
|
|
<br/>• <a href="record-heterogeneity.html">Record-heterogeneity</a>
|
|
<br/>• <a href="performance.html">Performance</a>
|
|
<br/>• <a href="etymology.html">Why call it Miller?</a>
|
|
<br/>• <a href="originality.html">How original is Miller?</a>
|
|
<br/>• <a href="reference.html">Reference</a>
|
|
<br/>• <a href="data-examples.html"><b>Data examples</b></a>
|
|
<br/>• <a href="internationalization.html">Internationalization</a>
|
|
<br/>• <a href="to-do.html">Things to do</a>
|
|
<br/>Developer info:
|
|
<br/>• <a href="build.html">Compiling, portability, dependencies, and testing</a>
|
|
<br/>• <a href="whyc.html">Why C?</a>
|
|
<br/>• <a href="contact.html">Contact information</a>
|
|
<br/>• <a href="https://github.com/johnkerl/miller">GitHub repo</a>
|
|
<br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/>
|
|
<br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/>
|
|
<br/> <br/> <br/> <br/> <br/> <br/>
|
|
</div>
|
|
</td>
|
|
|
|
<!-- page body -->
|
|
<td>
|
|
<div style="overflow-y:scroll;height:1500px">
|
|
<center> <titleinbody> Data examples </titleinbody> </center>
|
|
<p/>
|
|
|
|
<!-- BODY COPIED FROM content-for-data-examples.html BY poki -->
|
|
<div class="pokitoc">
|
|
<center><b>Contents:</b></center>
|
|
• <a href="#flins_data">flins data</a><br/>
|
|
• <a href="#Color/shape_data">Color/shape data</a><br/>
|
|
• <a href="#Program_timing">Program timing</a><br/>
|
|
</div>
|
|
<p/>
|
|
<a id="flins_data"/><h1>flins data</h1>
|
|
|
|
<p/> The <a href="data/flins.csv">flins.csv</a> file is some sample data
|
|
obtained from <a href="https://support.spatialkey.com/spatialkey-sample-csv-data">https://support.spatialkey.com/spatialkey-sample-csv-data</a>.
|
|
|
|
<p/>Vertical-tabular format is good for a quick look at CSV data layout — seeing what columns you have to work with:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ head -n 2 data/flins.csv | mlr --icsv --oxtab cat
|
|
policyID 119736
|
|
statecode FL
|
|
county CLAY COUNTY
|
|
eq_site_limit 498960
|
|
hu_site_limit 498960
|
|
fl_site_limit 498960
|
|
fr_site_limit 498960
|
|
tiv_2011 498960
|
|
tiv_2012 792148.9
|
|
eq_site_deductible 0
|
|
hu_site_deductible 9979.2
|
|
fl_site_deductible 0
|
|
fr_site_deductible 0
|
|
point_latitude 30.102261
|
|
point_longitude -81.711777
|
|
line Residential
|
|
construction Masonry
|
|
point_granularity 1
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/> A few simple queries:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint count-distinct -f county | head
|
|
county count
|
|
CLAY COUNTY 363
|
|
SUWANNEE COUNTY 154
|
|
NASSAU COUNTY 135
|
|
COLUMBIA COUNTY 125
|
|
ST JOHNS COUNTY 657
|
|
BAKER COUNTY 70
|
|
BRADFORD COUNTY 31
|
|
HAMILTON COUNTY 35
|
|
UNION COUNTY 15
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint count-distinct -f construction,line
|
|
construction line count
|
|
Masonry Residential 9257
|
|
Wood Residential 21581
|
|
Reinforced Concrete Commercial 1299
|
|
Reinforced Masonry Commercial 4225
|
|
Steel Frame Commercial 272
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/> Categorization of total insured value:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint stats1 -a min,mean,max -f tiv_2012
|
|
tiv_2012_min tiv_2012_mean tiv_2012_max
|
|
73.370000 2571004.097342 1701000000.000000
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint stats1 -a min,mean,max -f tiv_2012 -g construction,line
|
|
construction line tiv_2012_min tiv_2012_mean tiv_2012_max
|
|
Masonry Residential 261168.070000 1041986.129217 3234970.920000
|
|
Wood Residential 73.370000 113493.017049 649046.120000
|
|
Reinforced Concrete Commercial 6416016.010000 20212428.681840 60570000.000000
|
|
Reinforced Masonry Commercial 1287817.340000 4621372.981117 16650000.000000
|
|
Steel Frame Commercial 29790000.000000 133492500.000000 1701000000.000000
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --oxtab stats1 -a p0,p10,p50,p90,p95,p99,p100 -f hu_site_deductible
|
|
hu_site_deductible_p0 0.000000
|
|
hu_site_deductible_p10 0.000000
|
|
hu_site_deductible_p50 0.000000
|
|
hu_site_deductible_p90 76.500000
|
|
hu_site_deductible_p95 6829.200000
|
|
hu_site_deductible_p99 126270.000000
|
|
hu_site_deductible_p100 7380000.000000
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint stats1 -a p95,p99,p100 -f hu_site_deductible -g county then sort -f county | head
|
|
county hu_site_deductible_p95 hu_site_deductible_p99 hu_site_deductible_p100
|
|
ALACHUA COUNTY 30630.600000 107312.400000 1641375.000000
|
|
BAKER COUNTY 0.000000 0.000000 0.000000
|
|
BAY COUNTY 26131.500000 181912.500000 630000.000000
|
|
BRADFORD COUNTY 3355.200000 8163.000000 8163.000000
|
|
BREVARD COUNTY 5360.400000 78975.000000 1973461.500000
|
|
BROWARD COUNTY 0.000000 148500.000000 3258900.000000
|
|
CALHOUN COUNTY 0.000000 33339.600000 33339.600000
|
|
CHARLOTTE COUNTY 5400.000000 52650.000000 250994.700000
|
|
CITRUS COUNTY 1332.900000 79974.900000 483785.100000
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --oxtab stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
|
|
tiv_2011_tiv_2012_corr 0.973050
|
|
tiv_2011_tiv_2012_ols_m 0.983558
|
|
tiv_2011_tiv_2012_ols_b 433854.642897
|
|
tiv_2011_tiv_2012_ols_n 36634
|
|
tiv_2011_tiv_2012_r2 0.946826
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ cat data/flins.csv | mlr --icsv --opprint stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012 -g county
|
|
county tiv_2011_tiv_2012_corr tiv_2011_tiv_2012_ols_m tiv_2011_tiv_2012_ols_b tiv_2011_tiv_2012_ols_n tiv_2011_tiv_2012_r2
|
|
CLAY COUNTY 0.962716 1.090115 46450.531268 363 0.926822
|
|
SUWANNEE COUNTY 0.989208 1.074658 36253.003174 154 0.978533
|
|
NASSAU COUNTY 0.973135 1.296321 -45369.242673 135 0.946993
|
|
COLUMBIA COUNTY 0.999492 0.931447 117183.548383 125 0.998985
|
|
ST JOHNS COUNTY 0.966170 1.230056 -596.623856 657 0.933485
|
|
BAKER COUNTY 0.963515 0.942771 29063.065747 70 0.928360
|
|
BRADFORD COUNTY 0.999766 0.849029 69544.341944 31 0.999533
|
|
HAMILTON COUNTY 0.987026 1.224952 1045.052170 35 0.974220
|
|
UNION COUNTY 0.997745 1.432575 -56.125738 15 0.995495
|
|
MADISON COUNTY 0.985213 1.512114 -84278.028498 81 0.970645
|
|
LAFAYETTE COUNTY 0.967499 1.134289 9904.860798 68 0.936055
|
|
FLAGLER COUNTY 0.984854 1.007922 95340.508354 204 0.969937
|
|
DUVAL COUNTY 0.978815 1.245630 -60831.675023 1894 0.958079
|
|
LAKE COUNTY 0.999727 1.293864 -107695.848518 206 0.999455
|
|
VOLUSIA COUNTY 0.994636 1.202247 -36277.755477 1367 0.989300
|
|
PUTNAM COUNTY 0.961167 1.176294 6405.060826 268 0.923841
|
|
MARION COUNTY 0.975774 1.175642 20434.945602 1138 0.952136
|
|
SUMTER COUNTY 0.989760 1.372395 -62648.989750 158 0.979625
|
|
LEON COUNTY 0.978644 1.259681 -90816.033261 246 0.957743
|
|
FRANKLIN COUNTY 0.989430 1.048513 36026.508852 37 0.978972
|
|
LIBERTY COUNTY 0.995175 1.369834 -79755.544362 36 0.990373
|
|
GADSDEN COUNTY 0.997898 1.180585 7335.013009 196 0.995801
|
|
WAKULLA COUNTY 0.978267 1.192350 44607.922080 85 0.957006
|
|
JEFFERSON COUNTY 0.976543 0.976066 74884.170791 57 0.953637
|
|
TAYLOR COUNTY 0.981770 1.386188 -56856.945239 113 0.963873
|
|
BAY COUNTY 0.975404 1.004452 373000.300167 403 0.951412
|
|
WALTON COUNTY 0.985855 1.319583 -83273.091503 288 0.971909
|
|
JACKSON COUNTY 0.991195 1.171538 8128.438198 208 0.982468
|
|
CALHOUN COUNTY 0.967974 1.274077 -739.602262 68 0.936973
|
|
HOLMES COUNTY 0.997366 1.159384 42610.647058 40 0.994738
|
|
WASHINGTON COUNTY 0.982582 1.213413 -13125.214494 116 0.965468
|
|
GULF COUNTY 0.990367 1.135626 26094.474571 72 0.980826
|
|
ESCAMBIA COUNTY 0.986666 1.195336 46106.277408 494 0.973509
|
|
SANTA ROSA COUNTY 0.972696 1.013849 30496.045069 856 0.946138
|
|
OKALOOSA COUNTY 0.970781 1.462083 -116127.032201 1115 0.942416
|
|
ALACHUA COUNTY 0.982825 1.142748 52671.269211 973 0.965945
|
|
GILCHRIST COUNTY 0.977467 1.375740 -15309.425813 39 0.955442
|
|
LEVY COUNTY 0.956302 1.200506 265.391211 126 0.914513
|
|
DIXIE COUNTY 0.995780 1.640150 -98273.767115 40 0.991578
|
|
SEMINOLE COUNTY 0.985925 0.880108 427892.123991 1100 0.972048
|
|
ORANGE COUNTY 0.990658 0.872027 1298970.668186 1811 0.981403
|
|
BREVARD COUNTY 0.978015 1.271225 -19295.177646 872 0.956513
|
|
INDIAN RIVER COUNTY 0.985673 1.284620 -116579.613922 380 0.971550
|
|
MIAMI DADE COUNTY 0.987833 1.293106 -237168.505282 4315 0.975815
|
|
BROWARD COUNTY 0.983847 1.187689 81931.896276 3193 0.967954
|
|
MONROE COUNTY 0.982555 1.013142 455469.576218 152 0.965414
|
|
PALM BEACH COUNTY 0.982591 1.247594 -77252.429421 2791 0.965485
|
|
MARTIN COUNTY 0.975896 1.032873 8668.746202 109 0.952374
|
|
HENDRY COUNTY 0.971645 0.969699 208613.031856 74 0.944093
|
|
PASCO COUNTY 0.986556 1.288225 -152936.104164 790 0.973294
|
|
GLADES COUNTY 0.983518 0.982993 125666.627729 22 0.967308
|
|
HILLSBOROUGH COUNTY 0.985446 1.211620 214512.927989 1166 0.971103
|
|
HERNANDO COUNTY 0.974068 0.759748 701096.129434 120 0.948809
|
|
PINELLAS COUNTY 0.987215 1.154797 38609.763660 1774 0.974593
|
|
POLK COUNTY 0.979963 1.094848 153371.308143 1629 0.960327
|
|
North Fort Myers - nan nan 1 -
|
|
Orlando - nan nan 1 -
|
|
HIGHLANDS COUNTY 0.993054 1.528760 -300198.361569 369 0.986157
|
|
HARDEE COUNTY 0.977999 1.323440 -98513.434797 81 0.956482
|
|
MANATEE COUNTY 0.967526 1.068496 137190.708238 518 0.936106
|
|
OSCEOLA COUNTY - nan nan 1 -
|
|
LEE COUNTY 0.978945 1.252722 -16843.109269 678 0.958334
|
|
CHARLOTTE COUNTY 0.979024 1.013211 178461.328878 414 0.958488
|
|
COLLIER COUNTY 0.958031 1.169759 110270.385201 787 0.917824
|
|
SARASOTA COUNTY 0.984781 1.292514 -109939.723017 417 0.969793
|
|
DESOTO COUNTY 0.980130 1.286205 -9987.042982 108 0.960654
|
|
CITRUS COUNTY 0.989943 0.965940 138635.818880 384 0.979986
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<a id="Color/shape_data"/><h1>Color/shape data</h1>
|
|
|
|
<p/> The <a href="data/colored-shapes.dkvp">colored-shapes.dkvp</a> file is some sample data produced by the
|
|
<a href="https://github.com/johnkerl/miller/blob/master/doc/datagen/mkdat2">mkdat2</a> script. The idea is
|
|
<ul>
|
|
<li> Produce some data with known distributions and correlations, and verify that Miller recovers those properties empirically.
|
|
<li> Each record is labeled with one of a few colors and one of a few shapes.
|
|
<li> The <tt>flag</tt> field is 0 or 1, with probability dependent on color
|
|
<li> The <tt>u</tt> field is plain uniform on the unit interval.
|
|
<li> The <tt>v</tt> field is the same, except tightly correlated with <tt>u</tt> for red circles.
|
|
<li> The <tt>w</tt> field is autocorrelated for each color/shape pair.
|
|
<li> The <tt>x</tt> field is boring Gaussian with mean 5 and standard deviation about 1.2, with no dependence on color or shape.
|
|
</ul>
|
|
|
|
<p/> Peek at the data:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ wc -l data/colored-shapes.dkvp
|
|
10078 data/colored-shapes.dkvp
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ head -n 6 data/colored-shapes.dkvp | mlr --opprint cat
|
|
color shape flag i u v w x
|
|
yellow triangle 1 11 0.6321695890307647 0.9887207810889004 0.4364983936735774 5.7981881667050565
|
|
red square 1 15 0.21966833570651523 0.001257332190235938 0.7927778364718627 2.944117399716207
|
|
red circle 1 16 0.20901671281497636 0.29005231936593445 0.13810280912907674 5.065034003400998
|
|
red square 0 48 0.9562743938458542 0.7467203085342884 0.7755423050923582 7.117831369597269
|
|
purple triangle 0 51 0.4355354501763202 0.8591292672156728 0.8122903963006748 5.753094629505863
|
|
red square 0 64 0.2015510269821953 0.9531098083420033 0.7719912015786777 5.612050466474166
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<p/> Look at uncategorized stats (using <a href="https://github.com/johnkerl/scripts/blob/master/fundam/creach"><tt>creach</tt></a> for spacing).
|
|
Here it looks reasonable that <tt>u</tt> is unit-uniform; something’s up with <tt>v</tt> but we can't yet see what:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --oxtab stats1 -a min,mean,max -f flag,u,v data/colored-shapes.dkvp | creach 3
|
|
flag_min 0.000000
|
|
flag_mean 0.398889
|
|
flag_max 1.000000
|
|
|
|
u_min 0.000044
|
|
u_mean 0.498326
|
|
u_max 0.999969
|
|
|
|
v_min -0.092709
|
|
v_mean 0.497787
|
|
v_max 1.072500
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>The histogram shows the different distribution of 0/1 flags:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --opprint histogram -f flag,u,v --lo -0.1 --hi 1.1 --nbins 12 data/colored-shapes.dkvp
|
|
bin_lo bin_hi flag_count u_count v_count
|
|
-0.100000 0.000000 6058 0 36
|
|
0.000000 0.100000 0 1062 988
|
|
0.100000 0.200000 0 985 1003
|
|
0.200000 0.300000 0 1024 1014
|
|
0.300000 0.400000 0 1002 991
|
|
0.400000 0.500000 0 989 1041
|
|
0.500000 0.600000 0 1001 1016
|
|
0.600000 0.700000 0 972 962
|
|
0.700000 0.800000 0 1035 1070
|
|
0.800000 0.900000 0 995 993
|
|
0.900000 1.000000 4020 1013 939
|
|
1.000000 1.100000 0 0 25
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<p/> Look at univariate stats by color and shape. In particular,
|
|
color-dependent flag probabilities pop out, aligning with their original
|
|
Bernoulli probablities from the data-generator script:
|
|
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --opprint stats1 -a min,mean,max -f flag,u,v -g color then sort -f color data/colored-shapes.dkvp
|
|
color flag_min flag_mean flag_max u_min u_mean u_max v_min v_mean v_max
|
|
blue 0.000000 0.584354 1.000000 0.000044 0.517717 0.999969 0.001489 0.491056 0.999576
|
|
green 0.000000 0.209197 1.000000 0.000488 0.504861 0.999936 0.000501 0.499085 0.999676
|
|
orange 0.000000 0.521452 1.000000 0.001235 0.490532 0.998885 0.002449 0.487764 0.998475
|
|
purple 0.000000 0.090193 1.000000 0.000266 0.494005 0.999647 0.000364 0.497051 0.999975
|
|
red 0.000000 0.303167 1.000000 0.000671 0.492560 0.999882 -0.092709 0.496535 1.072500
|
|
yellow 0.000000 0.892427 1.000000 0.001300 0.497129 0.999923 0.000711 0.510627 0.999919
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --opprint stats1 -a min,mean,max -f flag,u,v -g shape then sort -f shape data/colored-shapes.dkvp
|
|
shape flag_min flag_mean flag_max u_min u_mean u_max v_min v_mean v_max
|
|
circle 0.000000 0.399846 1.000000 0.000044 0.498555 0.999923 -0.092709 0.495524 1.072500
|
|
square 0.000000 0.396112 1.000000 0.000188 0.499385 0.999969 0.000089 0.496538 0.999975
|
|
triangle 0.000000 0.401542 1.000000 0.000881 0.496859 0.999661 0.000717 0.501050 0.999995
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<p/> Look at bivariate stats by color and shape. In particular, <tt>u,v</tt> pairwise correlation for red circles pops out:
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --opprint --right stats2 -a corr -f u,v,w,x data/colored-shapes.dkvp
|
|
u_v_corr w_x_corr
|
|
0.133418 -0.011320
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ mlr --opprint --right stats2 -a corr -f u,v,w,x -g color,shape then sort -nr u_v_corr data/colored-shapes.dkvp
|
|
color shape u_v_corr w_x_corr
|
|
red circle 0.980798 -0.018565
|
|
orange square 0.176858 -0.071044
|
|
green circle 0.057644 0.011795
|
|
red square 0.055745 -0.000680
|
|
yellow triangle 0.044573 0.024605
|
|
yellow square 0.043792 -0.044623
|
|
purple circle 0.035874 0.134112
|
|
blue square 0.032412 -0.053508
|
|
blue triangle 0.015356 -0.000608
|
|
orange circle 0.010519 -0.162795
|
|
red triangle 0.008098 0.012486
|
|
purple triangle 0.005155 -0.045058
|
|
purple square -0.025680 0.057694
|
|
green square -0.025776 -0.003265
|
|
orange triangle -0.030457 -0.131870
|
|
yellow circle -0.064773 0.073695
|
|
blue circle -0.102348 -0.030529
|
|
green triangle -0.109018 -0.048488
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
|
|
<a id="Program_timing"/><h1>Program timing</h1>
|
|
|
|
This admittedly artificial example demonstrates using Miller time and stats
|
|
functions to introspectly acquire some information about Miller’s own
|
|
runtime. The <tt>delta</tt> function computes the difference between successive
|
|
timestamps.
|
|
|
|
<p/>
|
|
<div class="pokipanel">
|
|
<pre>
|
|
$ ruby -e '10000.times{|i|puts "i=#{i+1}"}' > lines.txt
|
|
|
|
$ head -n 5 lines.txt
|
|
i=1
|
|
i=2
|
|
i=3
|
|
i=4
|
|
i=5
|
|
|
|
mlr --ofmt '%.9le' --opprint put '$t=systime()' then step -a delta -f t lines.txt | head -n 7
|
|
i t t_delta
|
|
1 1430603027.018016 1.430603027e+09
|
|
2 1430603027.018043 2.694129944e-05
|
|
3 1430603027.018048 5.006790161e-06
|
|
4 1430603027.018052 4.053115845e-06
|
|
5 1430603027.018055 2.861022949e-06
|
|
6 1430603027.018058 3.099441528e-06
|
|
|
|
mlr --ofmt '%.9le' --oxtab \
|
|
put '$t=systime()' then \
|
|
step -a delta -f t then \
|
|
filter '$i>1' then \
|
|
stats1 -a min,mean,max -f t_delta \
|
|
lines.txt
|
|
t_delta_min 2.861022949e-06
|
|
t_delta_mean 4.077508505e-06
|
|
t_delta_max 5.388259888e-05
|
|
</pre>
|
|
</div>
|
|
<p/>
|
|
</div>
|
|
</td>
|
|
|
|
</table>
|
|
</body>
|
|
</html>
|