mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-24 02:36:15 +00:00
289 lines
No EOL
12 KiB
HTML
289 lines
No EOL
12 KiB
HTML
|
|
<!DOCTYPE html>
|
|
|
|
<html>
|
|
<head>
|
|
<meta charset="utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>DKVP I/O examples — Miller 6.0.0-alpha documentation</title>
|
|
|
|
<link rel="stylesheet" href="_static/scrolls.css" type="text/css" />
|
|
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
|
<link rel="stylesheet" href="_static/print.css" type="text/css" />
|
|
|
|
<script id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
|
|
<script src="_static/jquery.js"></script>
|
|
<script src="_static/underscore.js"></script>
|
|
<script src="_static/doctools.js"></script>
|
|
<script src="_static/language_data.js"></script>
|
|
<script src="_static/theme_extras.js"></script>
|
|
<link rel="index" title="Index" href="genindex.html" />
|
|
<link rel="search" title="Search" href="search.html" />
|
|
<link rel="next" title="Programming-language examples" href="programming-examples.html" />
|
|
<link rel="prev" title="Two-pass algorithms" href="two-pass-algorithms.html" />
|
|
</head><body>
|
|
<div id="content">
|
|
<div class="header">
|
|
<h1 class="heading"><a href="index.html"
|
|
title="back to the documentation overview"><span>DKVP I/O examples</span></a></h1>
|
|
</div>
|
|
<div class="relnav" role="navigation" aria-label="related navigation">
|
|
<a href="two-pass-algorithms.html">« Two-pass algorithms</a> |
|
|
<a href="#">DKVP I/O examples</a>
|
|
| <a href="programming-examples.html">Programming-language examples »</a>
|
|
</div>
|
|
<div id="contentwrapper">
|
|
<div id="toc" role="navigation" aria-label="table of contents navigation">
|
|
<h3>Table of Contents</h3>
|
|
<ul>
|
|
<li><a class="reference internal" href="#">DKVP I/O examples</a><ul>
|
|
<li><a class="reference internal" href="#dkvp-i-o-in-python">DKVP I/O in Python</a></li>
|
|
<li><a class="reference internal" href="#dkvp-i-o-in-ruby">DKVP I/O in Ruby</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
|
|
</div>
|
|
<div role="main">
|
|
|
|
<div class="section" id="dkvp-i-o-examples">
|
|
<h1>DKVP I/O examples<a class="headerlink" href="#dkvp-i-o-examples" title="Permalink to this headline">¶</a></h1>
|
|
<div class="section" id="dkvp-i-o-in-python">
|
|
<h2>DKVP I/O in Python<a class="headerlink" href="#dkvp-i-o-in-python" title="Permalink to this headline">¶</a></h2>
|
|
<p>Here are the I/O routines:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>#!/usr/bin/env python
|
|
|
|
# ================================================================
|
|
# Example of DKVP I/O using Python.
|
|
#
|
|
# Key point: Use Miller for what it's good at; pass data into/out of tools in
|
|
# other languages to do what they're good at.
|
|
#
|
|
# bash$ python -i dkvp_io.py
|
|
#
|
|
# # READ
|
|
# >>> map = dkvpline2map('x=1,y=2', '=', ',')
|
|
# >>> map
|
|
# OrderedDict([('x', '1'), ('y', '2')])
|
|
#
|
|
# # MODIFY
|
|
# >>> map['z'] = map['x'] + map['y']
|
|
# >>> map
|
|
# OrderedDict([('x', '1'), ('y', '2'), ('z', 3)])
|
|
#
|
|
# # WRITE
|
|
# >>> line = map2dkvpline(map, '=', ',')
|
|
# >>> line
|
|
# 'x=1,y=2,z=3'
|
|
#
|
|
# ================================================================
|
|
|
|
import re
|
|
import collections
|
|
|
|
# ----------------------------------------------------------------
|
|
# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
|
|
def dkvpline2map(line, ips, ifs):
|
|
pairs = re.split(ifs, line)
|
|
map = collections.OrderedDict()
|
|
for pair in pairs:
|
|
key, value = re.split(ips, pair, 1)
|
|
|
|
# Type inference:
|
|
try:
|
|
value = int(value)
|
|
except:
|
|
try:
|
|
value = float(value)
|
|
except:
|
|
pass
|
|
|
|
map[key] = value
|
|
return map
|
|
|
|
# ----------------------------------------------------------------
|
|
# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
|
|
def map2dkvpline(map , ops, ofs):
|
|
line = ''
|
|
pairs = []
|
|
for key in map:
|
|
pairs.append(str(key) + ops + str(map[key]))
|
|
return str.join(ofs, pairs)
|
|
</pre></div>
|
|
</div>
|
|
<p>And here is an example using them:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> cat polyglot-dkvp-io/example.py
|
|
</span> #!/usr/bin/env python
|
|
|
|
import sys
|
|
import re
|
|
import copy
|
|
import dkvp_io
|
|
|
|
while True:
|
|
# Read the original record:
|
|
line = sys.stdin.readline().strip()
|
|
if line == '':
|
|
break
|
|
map = dkvp_io.dkvpline2map(line, '=', ',')
|
|
|
|
# Drop a field:
|
|
map.pop('x')
|
|
|
|
# Compute some new fields:
|
|
map['ab'] = map['a'] + map['b']
|
|
map['iy'] = map['i'] + map['y']
|
|
|
|
# Add new fields which show type of each already-existing field:
|
|
omap = copy.copy(map) # since otherwise the for-loop will modify what it loops over
|
|
keys = omap.keys()
|
|
for key in keys:
|
|
# Convert "<type 'int'>" to just "int", etc.:
|
|
type_string = str(map[key].__class__)
|
|
type_string = re.sub("<type '", "", type_string) # python2
|
|
type_string = re.sub("<class '", "", type_string) # python3
|
|
type_string = re.sub("'>", "", type_string)
|
|
map['t'+key] = type_string
|
|
|
|
# Write the modified record:
|
|
print(dkvp_io.map2dkvpline(map, '=', ','))
|
|
</pre></div>
|
|
</div>
|
|
<p>Run as-is:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> python polyglot-dkvp-io/example.py < data/small
|
|
</span> a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
|
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
|
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
|
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
|
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
|
</pre></div>
|
|
</div>
|
|
<p>Run as-is, then pipe to Miller for pretty-printing:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat
|
|
</span> a b i y ab iy ta tb ti ty tab tiy
|
|
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 str str int float str float
|
|
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 str str int float str float
|
|
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 str str int float str float
|
|
eks wye 4 0.13418874328430463 ekswye 4.134188743284304 str str int float str float
|
|
wye pan 5 0.8636244699032729 wyepan 5.863624469903273 str str int float str float
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="section" id="dkvp-i-o-in-ruby">
|
|
<h2>DKVP I/O in Ruby<a class="headerlink" href="#dkvp-i-o-in-ruby" title="Permalink to this headline">¶</a></h2>
|
|
<p>Here are the I/O routines:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>#!/usr/bin/env ruby
|
|
|
|
# ================================================================
|
|
# Example of DKVP I/O using Ruby.
|
|
#
|
|
# Key point: Use Miller for what it's good at; pass data into/out of tools in
|
|
# other languages to do what they're good at.
|
|
#
|
|
# bash$ irb -I. -r dkvp_io.rb
|
|
#
|
|
# # READ
|
|
# irb(main):001:0> map = dkvpline2map('x=1,y=2', '=', ',')
|
|
# => {"x"=>"1", "y"=>"2"}
|
|
#
|
|
# # MODIFY
|
|
# irb(main):001:0> map['z'] = map['x'] + map['y']
|
|
# => 3
|
|
#
|
|
# # WRITE
|
|
# irb(main):002:0> line = map2dkvpline(map, '=', ',')
|
|
# => "x=1,y=2,z=3"
|
|
#
|
|
# ================================================================
|
|
|
|
# ----------------------------------------------------------------
|
|
# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
|
|
def dkvpline2map(line, ips, ifs)
|
|
map = {}
|
|
line.split(ifs).each do |pair|
|
|
(k, v) = pair.split(ips, 2)
|
|
|
|
# Type inference:
|
|
begin
|
|
v = Integer(v)
|
|
rescue ArgumentError
|
|
begin
|
|
v = Float(v)
|
|
rescue ArgumentError
|
|
# Leave as string
|
|
end
|
|
end
|
|
|
|
map[k] = v
|
|
end
|
|
map
|
|
end
|
|
|
|
# ----------------------------------------------------------------
|
|
# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
|
|
def map2dkvpline(map, ops, ofs)
|
|
map.collect{|k,v| k.to_s + ops + v.to_s}.join(ofs)
|
|
end
|
|
</pre></div>
|
|
</div>
|
|
<p>And here is an example using them:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> cat polyglot-dkvp-io/example.rb
|
|
</span> #!/usr/bin/env ruby
|
|
|
|
require 'dkvp_io'
|
|
|
|
ARGF.each do |line|
|
|
# Read the original record:
|
|
map = dkvpline2map(line.chomp, '=', ',')
|
|
|
|
# Drop a field:
|
|
map.delete('x')
|
|
|
|
# Compute some new fields:
|
|
map['ab'] = map['a'] + map['b']
|
|
map['iy'] = map['i'] + map['y']
|
|
|
|
# Add new fields which show type of each already-existing field:
|
|
keys = map.keys
|
|
keys.each do |key|
|
|
map['t'+key] = map[key].class
|
|
end
|
|
|
|
# Write the modified record:
|
|
puts map2dkvpline(map, '=', ',')
|
|
end
|
|
</pre></div>
|
|
</div>
|
|
<p>Run as-is:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
|
|
</span> a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
|
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
|
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
|
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
|
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
|
</pre></div>
|
|
</div>
|
|
<p>Run as-is, then pipe to Miller for pretty-printing:</p>
|
|
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span><span class="hll"> ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
|
|
</span> a b i y ab iy ta tb ti ty tab tiy
|
|
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 String String Integer Float String Float
|
|
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 String String Integer Float String Float
|
|
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 String String Integer Float String Float
|
|
eks wye 4 0.13418874328430463 ekswye 4.134188743284304 String String Integer Float String Float
|
|
wye pan 5 0.8636244699032729 wyepan 5.863624469903273 String String Integer Float String Float
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="footer" role="contentinfo">
|
|
© Copyright 2021, John Kerl.
|
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 3.2.1.
|
|
</div>
|
|
</body>
|
|
</html> |