miller/docs6/docs/_build/html/dkvp-examples.html

289 lines
No EOL
12 KiB
HTML

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DKVP I/O examples &#8212; Miller 6.0.0-alpha documentation</title>
<link rel="stylesheet" href="_static/scrolls.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/print.css" type="text/css" />
<script id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/language_data.js"></script>
<script src="_static/theme_extras.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Programming-language examples" href="programming-examples.html" />
<link rel="prev" title="Two-pass algorithms" href="two-pass-algorithms.html" />
</head><body>
<div id="content">
<div class="header">
<h1 class="heading"><a href="index.html"
title="back to the documentation overview"><span>DKVP I/O examples</span></a></h1>
</div>
<div class="relnav" role="navigation" aria-label="related navigation">
<a href="two-pass-algorithms.html">&laquo; Two-pass algorithms</a> |
<a href="#">DKVP I/O examples</a>
| <a href="programming-examples.html">Programming-language examples &raquo;</a>
</div>
<div id="contentwrapper">
<div id="toc" role="navigation" aria-label="table of contents navigation">
<h3>Table of Contents</h3>
<ul>
<li><a class="reference internal" href="#">DKVP I/O examples</a><ul>
<li><a class="reference internal" href="#dkvp-i-o-in-python">DKVP I/O in Python</a></li>
<li><a class="reference internal" href="#dkvp-i-o-in-ruby">DKVP I/O in Ruby</a></li>
</ul>
</li>
</ul>
</div>
<div role="main">
<div class="section" id="dkvp-i-o-examples">
<h1>DKVP I/O examples<a class="headerlink" href="#dkvp-i-o-examples" title="Permalink to this headline"></a></h1>
<div class="section" id="dkvp-i-o-in-python">
<h2>DKVP I/O in Python<a class="headerlink" href="#dkvp-i-o-in-python" title="Permalink to this headline"></a></h2>
<p>Here are the I/O routines:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>#!/usr/bin/env python
# ================================================================
# Example of DKVP I/O using Python.
#
# Key point: Use Miller for what it&#39;s good at; pass data into/out of tools in
# other languages to do what they&#39;re good at.
#
# bash$ python -i dkvp_io.py
#
# # READ
# &gt;&gt;&gt; map = dkvpline2map(&#39;x=1,y=2&#39;, &#39;=&#39;, &#39;,&#39;)
# &gt;&gt;&gt; map
# OrderedDict([(&#39;x&#39;, &#39;1&#39;), (&#39;y&#39;, &#39;2&#39;)])
#
# # MODIFY
# &gt;&gt;&gt; map[&#39;z&#39;] = map[&#39;x&#39;] + map[&#39;y&#39;]
# &gt;&gt;&gt; map
# OrderedDict([(&#39;x&#39;, &#39;1&#39;), (&#39;y&#39;, &#39;2&#39;), (&#39;z&#39;, 3)])
#
# # WRITE
# &gt;&gt;&gt; line = map2dkvpline(map, &#39;=&#39;, &#39;,&#39;)
# &gt;&gt;&gt; line
# &#39;x=1,y=2,z=3&#39;
#
# ================================================================
import re
import collections
# ----------------------------------------------------------------
# ips and ifs (input pair separator and input field separator) are nominally &#39;=&#39; and &#39;,&#39;.
def dkvpline2map(line, ips, ifs):
pairs = re.split(ifs, line)
map = collections.OrderedDict()
for pair in pairs:
key, value = re.split(ips, pair, 1)
# Type inference:
try:
value = int(value)
except:
try:
value = float(value)
except:
pass
map[key] = value
return map
# ----------------------------------------------------------------
# ops and ofs (output pair separator and output field separator) are nominally &#39;=&#39; and &#39;,&#39;.
def map2dkvpline(map , ops, ofs):
line = &#39;&#39;
pairs = []
for key in map:
pairs.append(str(key) + ops + str(map[key]))
return str.join(ofs, pairs)
</pre></div>
</div>
<p>And here is an example using them:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> cat polyglot-dkvp-io/example.py
</span> #!/usr/bin/env python
import sys
import re
import copy
import dkvp_io
while True:
# Read the original record:
line = sys.stdin.readline().strip()
if line == &#39;&#39;:
break
map = dkvp_io.dkvpline2map(line, &#39;=&#39;, &#39;,&#39;)
# Drop a field:
map.pop(&#39;x&#39;)
# Compute some new fields:
map[&#39;ab&#39;] = map[&#39;a&#39;] + map[&#39;b&#39;]
map[&#39;iy&#39;] = map[&#39;i&#39;] + map[&#39;y&#39;]
# Add new fields which show type of each already-existing field:
omap = copy.copy(map) # since otherwise the for-loop will modify what it loops over
keys = omap.keys()
for key in keys:
# Convert &quot;&lt;type &#39;int&#39;&gt;&quot; to just &quot;int&quot;, etc.:
type_string = str(map[key].__class__)
type_string = re.sub(&quot;&lt;type &#39;&quot;, &quot;&quot;, type_string) # python2
type_string = re.sub(&quot;&lt;class &#39;&quot;, &quot;&quot;, type_string) # python3
type_string = re.sub(&quot;&#39;&gt;&quot;, &quot;&quot;, type_string)
map[&#39;t&#39;+key] = type_string
# Write the modified record:
print(dkvp_io.map2dkvpline(map, &#39;=&#39;, &#39;,&#39;))
</pre></div>
</div>
<p>Run as-is:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> python polyglot-dkvp-io/example.py &lt; data/small
</span> a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
</pre></div>
</div>
<p>Run as-is, then pipe to Miller for pretty-printing:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> python polyglot-dkvp-io/example.py &lt; data/small | mlr --opprint cat
</span> a b i y ab iy ta tb ti ty tab tiy
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 str str int float str float
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 str str int float str float
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 str str int float str float
eks wye 4 0.13418874328430463 ekswye 4.134188743284304 str str int float str float
wye pan 5 0.8636244699032729 wyepan 5.863624469903273 str str int float str float
</pre></div>
</div>
</div>
<div class="section" id="dkvp-i-o-in-ruby">
<h2>DKVP I/O in Ruby<a class="headerlink" href="#dkvp-i-o-in-ruby" title="Permalink to this headline"></a></h2>
<p>Here are the I/O routines:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>#!/usr/bin/env ruby
# ================================================================
# Example of DKVP I/O using Ruby.
#
# Key point: Use Miller for what it&#39;s good at; pass data into/out of tools in
# other languages to do what they&#39;re good at.
#
# bash$ irb -I. -r dkvp_io.rb
#
# # READ
# irb(main):001:0&gt; map = dkvpline2map(&#39;x=1,y=2&#39;, &#39;=&#39;, &#39;,&#39;)
# =&gt; {&quot;x&quot;=&gt;&quot;1&quot;, &quot;y&quot;=&gt;&quot;2&quot;}
#
# # MODIFY
# irb(main):001:0&gt; map[&#39;z&#39;] = map[&#39;x&#39;] + map[&#39;y&#39;]
# =&gt; 3
#
# # WRITE
# irb(main):002:0&gt; line = map2dkvpline(map, &#39;=&#39;, &#39;,&#39;)
# =&gt; &quot;x=1,y=2,z=3&quot;
#
# ================================================================
# ----------------------------------------------------------------
# ips and ifs (input pair separator and input field separator) are nominally &#39;=&#39; and &#39;,&#39;.
def dkvpline2map(line, ips, ifs)
map = {}
line.split(ifs).each do |pair|
(k, v) = pair.split(ips, 2)
# Type inference:
begin
v = Integer(v)
rescue ArgumentError
begin
v = Float(v)
rescue ArgumentError
# Leave as string
end
end
map[k] = v
end
map
end
# ----------------------------------------------------------------
# ops and ofs (output pair separator and output field separator) are nominally &#39;=&#39; and &#39;,&#39;.
def map2dkvpline(map, ops, ofs)
map.collect{|k,v| k.to_s + ops + v.to_s}.join(ofs)
end
</pre></div>
</div>
<p>And here is an example using them:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> cat polyglot-dkvp-io/example.rb
</span> #!/usr/bin/env ruby
require &#39;dkvp_io&#39;
ARGF.each do |line|
# Read the original record:
map = dkvpline2map(line.chomp, &#39;=&#39;, &#39;,&#39;)
# Drop a field:
map.delete(&#39;x&#39;)
# Compute some new fields:
map[&#39;ab&#39;] = map[&#39;a&#39;] + map[&#39;b&#39;]
map[&#39;iy&#39;] = map[&#39;i&#39;] + map[&#39;y&#39;]
# Add new fields which show type of each already-existing field:
keys = map.keys
keys.each do |key|
map[&#39;t&#39;+key] = map[key].class
end
# Write the modified record:
puts map2dkvpline(map, &#39;=&#39;, &#39;,&#39;)
end
</pre></div>
</div>
<p>Run as-is:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
</span> a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
</pre></div>
</div>
<p>Run as-is, then pipe to Miller for pretty-printing:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
</span> a b i y ab iy ta tb ti ty tab tiy
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 String String Integer Float String Float
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 String String Integer Float String Float
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 String String Integer Float String Float
eks wye 4 0.13418874328430463 ekswye 4.134188743284304 String String Integer Float String Float
wye pan 5 0.8636244699032729 wyepan 5.863624469903273 String String Integer Float String Float
</pre></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="footer" role="contentinfo">
&#169; Copyright 2021, John Kerl.
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 3.2.1.
</div>
</body>
</html>