diff --git a/.gitignore b/.gitignore
index 7e0744274..23591aa5c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,7 @@ catc0
catm
gmon.out
*.o
+*.pyc
.swp
.swo
.*.swp
diff --git a/doc/10-min.html b/doc/10-min.html
index aa65f8bee..89cc952f7 100644
--- a/doc/10-min.html
+++ b/doc/10-min.html
@@ -113,6 +113,7 @@ Miller commands were run with pretty-print-tabular output format.
• Internationalization
Using Miller:
• FAQ
+
• Sharing data with other languages
• Cookbook part 1
• Cookbook part 2
• Cookbook part 3
diff --git a/doc/build.html b/doc/build.html
index 8922f900f..e5d838310 100644
--- a/doc/build.html
+++ b/doc/build.html
@@ -113,6 +113,7 @@ Miller commands were run with pretty-print-tabular output format.
• Internationalization
Using Miller:
• FAQ
+
• Sharing data with other languages
• Cookbook part 1
• Cookbook part 2
• Cookbook part 3
diff --git a/doc/contact.html b/doc/contact.html
index c980ca796..4074da6e5 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -113,6 +113,7 @@ Miller commands were run with pretty-print-tabular output format.
• Internationalization
Using Miller:
• FAQ
+
• Sharing data with other languages
• Cookbook part 1
• Cookbook part 2
• Cookbook part 3
diff --git a/doc/content-for-data-sharing.html b/doc/content-for-data-sharing.html
new file mode 100644
index 000000000..1776839c0
--- /dev/null
+++ b/doc/content-for-data-sharing.html
@@ -0,0 +1,58 @@
+POKI_PUT_TOC_HERE
+
+
|
+
+
+
+
+ Overview: + • About Miller + • Miller in 10 minutes + • File formats + • Miller features in the context of the Unix toolkit + • Record-heterogeneity + • Internationalization + Using Miller: + • FAQ + • Sharing data with other languages + • Cookbook part 1 + • Cookbook part 2 + • Cookbook part 3 + • Data-diving examples + • Manpage + • Reference + • Reference: Verbs + • Reference: DSL + • Documents by release + • Installation, portability, dependencies, and testing + Background: + • Why? + • Why C? + • Why call it Miller? + • How original is Miller? + • Performance + Repository: + • Things to do + • Contact information + • GitHub repo + + + + |
+
+
+
+
+
+
+
+
+ DKVP I/O in Python+ +
+
+
+Here are the I/O routines:
+
+
+
+
+
+
+
+And here is an example using them:
+
+
+
+#!/usr/bin/env python
+
+# ================================================================
+# Example of DKVP I/O using Python.
+#
+# Key point: Use Miller for what it's good at; pass data into/out of tools in
+# other languages to do what they're good at.
+#
+# bash$ python -i dkvp_io.py
+#
+# # READ
+# >>> map = dkvpline2map('x=1,y=2', '=', ',')
+# >>> map
+# OrderedDict([('x', '1'), ('y', '2')])
+#
+# # MODIFY
+# >>> map['z'] = map['x'] + map['y']
+# >>> map
+# OrderedDict([('x', '1'), ('y', '2'), ('z', 3)])
+#
+# # WRITE
+# >>> line = map2dkvpline(map, '=', ',')
+# >>> line
+# 'x=1,y=2,z=3'
+#
+# ================================================================
+
+import re
+import collections
+
+# ----------------------------------------------------------------
+# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
+def dkvpline2map(line, ips, ifs):
+ pairs = re.split(ifs, line)
+ map = collections.OrderedDict()
+ for pair in pairs:
+ key, value = re.split(ips, pair, 1)
+
+ # Type inference:
+ try:
+ value = int(value)
+ except:
+ try:
+ value = float(value)
+ except:
+ pass
+
+ map[key] = value
+ return map
+
+# ----------------------------------------------------------------
+# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
+def map2dkvpline(map , ops, ofs):
+ line = ''
+ pairs = []
+ for key in map:
+ pairs.append(str(key) + ops + str(map[key]))
+ return str.join(ofs, pairs)
+
+
+
+
+
+Run as-is:
+
+
+
+$ cat polyglot-dkvp-io/example.py
+#!/usr/bin/env ruby
+
+import sys
+import re
+import dkvp_io
+
+while True:
+ # Read the original record:
+ line = sys.stdin.readline().strip()
+ if line == '':
+ break
+ map = dkvp_io.dkvpline2map(line, '=', ',')
+
+ # Drop a field:
+ map.pop('x')
+
+ # Compute some new fields:
+ map['ab'] = map['a'] + map['b']
+ map['iy'] = map['i'] + map['y']
+
+ # Add new fields which show type of each already-existing field:
+ keys = map.keys()
+ for key in keys:
+ # Convert "<type 'int'>" to just "int", etc.:
+ type_string = str(map[key].__class__)
+ type_string = re.sub("<type '", "", type_string)
+ type_string = re.sub("'>", "", type_string)
+ map['t'+key] = type_string
+
+ # Write the modified record:
+ print dkvp_io.map2dkvpline(map, '=', ',')
+
+
+
+
+
+Run as-is, then pipe to Miller for pretty-printing:
+
+
++$ python polyglot-dkvp-io/example.py < data/small +a=pan,b=pan,i=1,y=0.726802862743,ab=panpan,iy=1.72680286274,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float +a=eks,b=pan,i=2,y=0.522151108333,ab=ekspan,iy=2.52215110833,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float +a=wye,b=wye,i=3,y=0.338318525517,ab=wyewye,iy=3.33831852552,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float +a=eks,b=wye,i=4,y=0.134188743284,ab=ekswye,iy=4.13418874328,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float +a=wye,b=pan,i=5,y=0.863624469903,ab=wyepan,iy=5.8636244699,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float ++
+
+
+
++$ python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat +a b i y ab iy ta tb ti ty tab tiy +pan pan 1 0.726802862743 panpan 1.72680286274 str str int float str float +eks pan 2 0.522151108333 ekspan 2.52215110833 str str int float str float +wye wye 3 0.338318525517 wyewye 3.33831852552 str str int float str float +eks wye 4 0.134188743284 ekswye 4.13418874328 str str int float str float +wye pan 5 0.863624469903 wyepan 5.8636244699 str str int float str float ++ DKVP I/O in Ruby+ +
+
+
+Here are the I/O routines:
+
+
+
+
+
+
+
+And here is an example using them:
+
+
+
+#!/usr/bin/env ruby
+
+# ================================================================
+# Example of DKVP I/O using Ruby.
+#
+# Key point: Use Miller for what it's good at; pass data into/out of tools in
+# other languages to do what they're good at.
+#
+# bash$ irb -I. -r dkvp_io.rb
+#
+# # READ
+# irb(main):001:0> map = dkvpline2map('x=1,y=2', '=', ',')
+# => {"x"=>"1", "y"=>"2"}
+#
+# # MODIFY
+# irb(main):001:0> map['z'] = map['x'] + map['y']
+# => 3
+#
+# # WRITE
+# irb(main):002:0> line = map2dkvpline(map, '=', ',')
+# => "x=1,y=2,z=3"
+#
+# ================================================================
+
+# ----------------------------------------------------------------
+# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
+def dkvpline2map(line, ips, ifs)
+ map = {}
+ line.split(ifs).each do |pair|
+ (k, v) = pair.split(ips, 2)
+
+ # Type inference:
+ begin
+ v = Integer(v)
+ rescue ArgumentError
+ begin
+ v = Float(v)
+ rescue ArgumentError
+ # Leave as string
+ end
+ end
+
+ map[k] = v
+ end
+ map
+end
+
+# ----------------------------------------------------------------
+# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
+def map2dkvpline(map, ops, ofs)
+ map.collect{|k,v| k.to_s + ops + v.to_s}.join(ofs)
+end
+
+
+
+
+
+Run as-is:
+
+
+
+$ cat polyglot-dkvp-io/example.rb
+#!/usr/bin/env ruby
+
+require 'dkvp_io'
+
+ARGF.each do |line|
+ # Read the original record:
+ map = dkvpline2map(line.chomp, '=', ',')
+
+ # Drop a field:
+ map.delete('x')
+
+ # Compute some new fields:
+ map['ab'] = map['a'] + map['b']
+ map['iy'] = map['i'] + map['y']
+
+ # Add new fields which show type of each already-existing field:
+ keys = map.keys
+ keys.each do |key|
+ map['t'+key] = map[key].class
+ end
+
+ # Write the modified record:
+ puts map2dkvpline(map, '=', ',')
+end
+
+
+
+
+
+Run as-is, then pipe to Miller for pretty-printing:
+
+
++$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small +a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=String,tb=String,ti=Fixnum,ty=Float,tab=String,tiy=Float +a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=String,tb=String,ti=Fixnum,ty=Float,tab=String,tiy=Float +a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=String,tb=String,ti=Fixnum,ty=Float,tab=String,tiy=Float +a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=String,tb=String,ti=Fixnum,ty=Float,tab=String,tiy=Float +a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=String,tb=String,ti=Fixnum,ty=Float,tab=String,tiy=Float ++
+
+
+
++$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat +a b i y ab iy ta tb ti ty tab tiy +pan pan 1 0.7268028627434533 panpan 1.7268028627434533 String String Fixnum Float String Float +eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 String String Fixnum Float String Float +wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 String String Fixnum Float String Float +eks wye 4 0.13418874328430463 ekswye 4.134188743284304 String String Fixnum Float String Float +wye pan 5 0.8636244699032729 wyepan 5.863624469903273 String String Fixnum Float String Float ++ |
+
+