Compare commits

..

No commits in common. "main" and "v5.9.1" have entirely different histories.
main ... v5.9.1

19631 changed files with 371661 additions and 1531280 deletions

View file

@ -1,397 +0,0 @@
{
"projectName": "miller",
"projectOwner": "johnkerl",
"repoType": "github",
"repoHost": "https://github.com",
"files": [
"README.md"
],
"imageSize": 50,
"commit": true,
"commitConvention": "none",
"contributors": [
{
"login": "aborruso",
"name": "Andrea Borruso",
"avatar_url": "https://avatars.githubusercontent.com/u/30607?v=4",
"profile": "https://github.com/aborruso",
"contributions": [
"ideas",
"design"
]
},
{
"login": "sjackman",
"name": "Shaun Jackman",
"avatar_url": "https://avatars.githubusercontent.com/u/291551?v=4",
"profile": "https://sjackman.ca/",
"contributions": [
"ideas"
]
},
{
"login": "ftrotter",
"name": "Fred Trotter",
"avatar_url": "https://avatars.githubusercontent.com/u/83133?v=4",
"profile": "http://www.fredtrotter.com/",
"contributions": [
"ideas",
"design"
]
},
{
"login": "Komosa",
"name": "komosa",
"avatar_url": "https://avatars.githubusercontent.com/u/10688154?v=4",
"profile": "https://github.com/Komosa",
"contributions": [
"ideas"
]
},
{
"login": "jungle-boogie",
"name": "jungle-boogie",
"avatar_url": "https://avatars.githubusercontent.com/u/1111743?v=4",
"profile": "https://github.com/jungle-boogie",
"contributions": [
"ideas"
]
},
{
"login": "0-wiz-0",
"name": "Thomas Klausner",
"avatar_url": "https://avatars.githubusercontent.com/u/2221844?v=4",
"profile": "https://github.com/0-wiz-0",
"contributions": [
"infra"
]
},
{
"login": "skitt",
"name": "Stephen Kitt",
"avatar_url": "https://avatars.githubusercontent.com/u/2128935?v=4",
"profile": "https://github.com/skitt",
"contributions": [
"platform"
]
},
{
"login": "leahneukirchen",
"name": "Leah Neukirchen",
"avatar_url": "https://avatars.githubusercontent.com/u/139?v=4",
"profile": "http://leahneukirchen.org/",
"contributions": [
"ideas"
]
},
{
"login": "lgbaldoni",
"name": "Luigi Baldoni",
"avatar_url": "https://avatars.githubusercontent.com/u/1450716?v=4",
"profile": "https://github.com/lgbaldoni",
"contributions": [
"platform"
]
},
{
"login": "yutannihilation",
"name": "Hiroaki Yutani",
"avatar_url": "https://avatars.githubusercontent.com/u/1978793?v=4",
"profile": "https://yutani.rbind.io/",
"contributions": [
"ideas"
]
},
{
"login": "dmd",
"name": "Daniel M. Drucker",
"avatar_url": "https://avatars.githubusercontent.com/u/41439?v=4",
"profile": "https://3e.org/",
"contributions": [
"ideas"
]
},
{
"login": "NikosAlexandris",
"name": "Nikos Alexandris",
"avatar_url": "https://avatars.githubusercontent.com/u/7046639?v=4",
"profile": "https://github.com/NikosAlexandris",
"contributions": [
"ideas"
]
},
{
"login": "kundeng",
"name": "kundeng",
"avatar_url": "https://avatars.githubusercontent.com/u/89032?v=4",
"profile": "https://github.com/kundeng",
"contributions": [
"platform"
]
},
{
"login": "singalen",
"name": "Victor Sergienko",
"avatar_url": "https://avatars.githubusercontent.com/u/151199?v=4",
"profile": "http://victorsergienko.com/",
"contributions": [
"platform"
]
},
{
"login": "gromgit",
"name": "Adrian Ho",
"avatar_url": "https://avatars.githubusercontent.com/u/215702?v=4",
"profile": "https://github.com/gromgit",
"contributions": [
"design"
]
},
{
"login": "Zachp",
"name": "zachp",
"avatar_url": "https://avatars.githubusercontent.com/u/1316442?v=4",
"profile": "https://github.com/Zachp",
"contributions": [
"platform"
]
},
{
"login": "davidselassie",
"name": "David Selassie",
"avatar_url": "https://avatars.githubusercontent.com/u/921669?v=4",
"profile": "https://dsel.net/",
"contributions": [
"ideas"
]
},
{
"login": "joelparkerhenderson",
"name": "Joel Parker Henderson",
"avatar_url": "https://avatars.githubusercontent.com/u/27145?v=4",
"profile": "http://www.joelparkerhenderson.com/",
"contributions": [
"ideas"
]
},
{
"login": "divtiply",
"name": "Michel Ace",
"avatar_url": "https://avatars.githubusercontent.com/u/5359679?v=4",
"profile": "https://github.com/divtiply",
"contributions": [
"ideas"
]
},
{
"login": "Fuco1",
"name": "Matus Goljer",
"avatar_url": "https://avatars.githubusercontent.com/u/2664959?v=4",
"profile": "http://fuco1.github.io/sitemap.html",
"contributions": [
"ideas"
]
},
{
"login": "terorie",
"name": "Richard Patel",
"avatar_url": "https://avatars.githubusercontent.com/u/21371810?v=4",
"profile": "https://github.com/terorie",
"contributions": [
"platform"
]
},
{
"login": "kub1x",
"name": "Jakub Podlaha",
"avatar_url": "https://avatars.githubusercontent.com/u/1833840?v=4",
"profile": "https://blog.kub1x.org/",
"contributions": [
"design"
]
},
{
"login": "majkinetor",
"name": "Miodrag Milić",
"avatar_url": "https://avatars.githubusercontent.com/u/85767?v=4",
"profile": "https://goo.gl/ZGZynx",
"contributions": [
"platform"
]
},
{
"login": "derekmahar",
"name": "Derek Mahar",
"avatar_url": "https://avatars.githubusercontent.com/u/6047?v=4",
"profile": "https://github.com/derekmahar",
"contributions": [
"ideas"
]
},
{
"login": "spmundi",
"name": "spmundi",
"avatar_url": "https://avatars.githubusercontent.com/u/38196185?v=4",
"profile": "https://github.com/spmundi",
"contributions": [
"ideas"
]
},
{
"login": "koernepr",
"name": "Peter Körner",
"avatar_url": "https://avatars.githubusercontent.com/u/24551942?v=4",
"profile": "https://github.com/koernepr",
"contributions": [
"security"
]
},
{
"login": "rubyFeedback",
"name": "rubyFeedback",
"avatar_url": "https://avatars.githubusercontent.com/u/46686565?v=4",
"profile": "https://github.com/rubyFeedback",
"contributions": [
"ideas"
]
},
{
"login": "rbolsius",
"name": "rbolsius",
"avatar_url": "https://avatars.githubusercontent.com/u/2106964?v=4",
"profile": "https://github.com/rbolsius",
"contributions": [
"platform"
]
},
{
"login": "awildturtok",
"name": "awildturtok",
"avatar_url": "https://avatars.githubusercontent.com/u/1553491?v=4",
"profile": "https://github.com/awildturtok",
"contributions": [
"ideas"
]
},
{
"login": "agguser",
"name": "agguser",
"avatar_url": "https://avatars.githubusercontent.com/u/1206106?v=4",
"profile": "https://github.com/agguser",
"contributions": [
"ideas"
]
},
{
"login": "jganong",
"name": "jganong",
"avatar_url": "https://avatars.githubusercontent.com/u/2783890?v=4",
"profile": "https://github.com/jganong",
"contributions": [
"ideas"
]
},
{
"login": "trantor",
"name": "Fulvio Scapin",
"avatar_url": "https://avatars.githubusercontent.com/u/69568?v=4",
"profile": "https://www.linkedin.com/in/fulvio-scapin",
"contributions": [
"ideas"
]
},
{
"login": "torbiak",
"name": "Jordan Torbiak",
"avatar_url": "https://avatars.githubusercontent.com/u/109347?v=4",
"profile": "https://github.com/torbiak",
"contributions": [
"ideas"
]
},
{
"login": "Andy1978",
"name": "Andreas Weber",
"avatar_url": "https://avatars.githubusercontent.com/u/240064?v=4",
"profile": "https://github.com/Andy1978",
"contributions": [
"ideas"
]
},
{
"login": "vapniks",
"name": "vapniks",
"avatar_url": "https://avatars.githubusercontent.com/u/174330?v=4",
"profile": "https://github.com/vapniks",
"contributions": [
"platform"
]
},
{
"login": "89z",
"name": "Zombo",
"avatar_url": "https://avatars.githubusercontent.com/u/73562167?v=4",
"profile": "https://github.com/89z",
"contributions": [
"platform"
]
},
{
"login": "BEFH",
"name": "Brian Fulton-Howard",
"avatar_url": "https://avatars.githubusercontent.com/u/3386600?v=4",
"profile": "https://github.com/BEFH",
"contributions": [
"platform"
]
},
{
"login": "ChCyrill",
"name": "ChCyrill",
"avatar_url": "https://avatars.githubusercontent.com/u/2165604?v=4",
"profile": "https://github.com/ChCyrill",
"contributions": [
"ideas"
]
},
{
"login": "jauderho",
"name": "Jauder Ho",
"avatar_url": "https://avatars.githubusercontent.com/u/13562?v=4",
"profile": "https://github.com/jauderho",
"contributions": [
"code"
]
},
{
"login": "psacawa",
"name": "Paweł Sacawa",
"avatar_url": "https://avatars.githubusercontent.com/u/21274063?v=4",
"profile": "https://github.com/psacawa",
"contributions": [
"bug"
]
},
{
"login": "schragge",
"name": "schragge",
"avatar_url": "https://avatars.githubusercontent.com/u/4294278?v=4",
"profile": "https://github.com/schragge",
"contributions": [
"doc"
]
},
{
"login": "Poshi",
"name": "Jordi",
"avatar_url": "https://avatars.githubusercontent.com/u/1780510?v=4",
"profile": "https://github.com/Poshi",
"contributions": [
"doc",
"ideas"
]
}
],
"contributorsPerLine": 7,
"skipCi": true
}

View file

@ -1,12 +0,0 @@
denom
inTerm
inout
iput
nd
nin
numer
Wit
te
wee
RO
falsy

View file

@ -1,18 +0,0 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
# Maintain dependencies for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
# Maintain dependencies for Go
- package-ecosystem: "gomod"
directory: /
schedule:
interval: "daily"

View file

@ -1,68 +0,0 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ main ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ main ]
schedule:
- cron: '22 1 * * 0'
workflow_dispatch:
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
language: [ 'go' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
# Learn more:
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@cdefb33c0f6224e58673d9004f47f7cb3e328b89
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@cdefb33c0f6224e58673d9004f47f7cb3e328b89
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@cdefb33c0f6224e58673d9004f47f7cb3e328b89

View file

@ -1,45 +0,0 @@
name: Codespell
# Documentation:
# https://help.github.com/en/articles/workflow-syntax-for-github-actions
# Start the job on all pushes
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
# Set the Job
jobs:
build:
name: Codespell
# Set the agent to run on
runs-on: ubuntu-latest
# Load all steps
steps:
# Check out the code base
- name: Check out code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
# Full git history is needed to get a proper list of changed files within `super-linter`
fetch-depth: 0
# Run linter against code base
# https://github.com/codespell-project/codespell
- name: Codespell
uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579
with:
check_filenames: true
ignore_words_file: .codespellignore
skip: "*.csv,*.dkvp,*.txt,*.js,*.html,*.map,*.z,./tags,./test/cases,./docs/src/shapes-of-data.md.in,./docs/src/shapes-of-data.md,test/input/latin1.xtab"
# As of August 2023 or so, Codespell started exiting with status 1 just _examining_ the
# latin1.xtab file which is (intentionally) not UTF-8. Before, it said
#
# Warning: WARNING: Cannot decode file using encoding "utf-8": ./test/input/latin1.xtab
# WARNING: Trying next encoding "iso-8859-1"
#
# but would exit 0. After, it started exiting with a 1. This is annoying as it makes
# every PR red in CI. So we have to use warning mode now.
only_warn: 1

View file

@ -1,47 +0,0 @@
name: Miller-Go
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
- name: Set up Go
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5
with:
go-version: 1.24
- name: Build
run: make build
- name: Unit tests
run: make unit-test
- name: Regression tests
# We run these with a convoluted path to ensure the tests don't
# rely on a specific invocation
run: test/../mlr regtest -S
- name: PrepareArtifactNonWindows
if: matrix.os != 'windows-latest'
run: mkdir -p bin/${{matrix.os}} && cp mlr bin/${{matrix.os}}
- name: PrepareArtifactWindows
if: matrix.os == 'windows-latest'
run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: mlr-${{matrix.os}}
path: bin/${{matrix.os}}/*

View file

@ -1,29 +0,0 @@
name: Release for Snap
on:
push:
tags:
- v*
workflow_dispatch:
jobs:
snap:
strategy:
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Build snap
uses: snapcore/action-build@v1
id: build
- name: Publish to Snap Store
uses: snapcore/action-publish@v1
env:
SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_TOKEN }}
with:
snap: ${{ steps.build.outputs.snap }}
# release: stable # or edge, beta, candidate
release: stable

View file

@ -1,49 +0,0 @@
name: Release for GitHub
on:
push:
tags:
- v*
workflow_dispatch:
env:
GO_VERSION: 1.24.5
jobs:
release:
name: Release
strategy:
matrix:
platform: [ubuntu-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Set up Go
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5
with:
go-version: ${{ env.GO_VERSION }}
id: go
- name: Check out code into the Go module directory
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
fetch-depth: 0
# https://github.com/marketplace/actions/cache
- name: Cache Go modules
uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
# https://goreleaser.com/ci/actions/
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@e435ccd777264be153ace6237001ef4d979d3a7a
#if: startsWith(github.ref, 'refs/tags/v')
with:
version: latest
args: release -f .goreleaser.yml --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View file

@ -1,28 +0,0 @@
name: 🧪 Snap Builds
on:
push:
branches: '*'
pull_request:
branches: '*'
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [20.x]
steps:
- uses: actions/checkout@v6
- uses: snapcore/action-build@v1
id: build
- uses: diddlesnaps/snapcraft-review-action@v1
with:
snap: ${{ steps.build.outputs.snap }}
isClassic: 'false'
# Plugs and Slots declarations to override default denial (requires store assertion to publish)
# plugs: ./plug-declaration.json
# slots: ./slot-declaration.json

92
.gitignore vendored
View file

@ -1,16 +1,84 @@
/mlr
.sw?
.*.sw?
autom4te.cache
mlr
mlrd
mlrg
mlrp
test-argparse
test-byte-readers
test-line-readers
test-dsl-stack
test-join-bucket-keeper
test-json-parser
test-lrec
test-rval-evaluators
test-mlhmmv
test-mlrregex
test-mlrutil
test-multiple-containers
test-parse-trie
test-peek-file-reader
test-string-builder
test_rval_evaluators
test_argparse
test_byte_readers
test_line_readers
test_join_bucket_keeper
test_json_parser
test_lrec
test_mlhmmv
test_mlrregex
test_mlrutil
test_multiple_containers
test_parse_trie
test_peek_file_reader
test_string_builder
output-regtest
termcvt
a.out
*.dSYM
catc
catc0
catm
gmon.out
*.o
*.pyc
.swp
.swo
.*.swp
.*.swo
c/parsing/lemon
c/parsing/mlr_dsl_lexer.c
c/parsing/mlr_dsl_lexer.h
c/parsing/mlr_dsl_parse.c
c/parsing/mlr_dsl_parse.h
c/parsing/mlr_dsl_parse.out
c/reg_test/output
c/output/out
tags
*.la
*.lo
*.log
*.trs
*~
.deps/
.libs/
Makefile
config.h
config.log
config.status
libtool
stamp-h1
ap
getl
lemon_prepared
lrim
mlr-[0-9.]*.tar.*
c/jkopush
c/run_mlr
c/mlr_expect_fail
c/run_mlr_for_auxents
c/push2
push2
data/.gitignore
man/man1
data/big.*
data/nmc?.*
docs/src/polyglot-dkvp-io/__pycache__
docs/site/
doc/jkopush
doc/push2

View file

@ -1,93 +0,0 @@
# .goreleaser.yml
release:
github:
owner: johnkerl
name: miller
snapshot:
name_template: SNAPSHOT-{{ .Commit }}
#gomod:
# proxy: true
before:
hooks:
- go mod tidy
builds:
- env:
- CGO_ENABLED=0
goos:
- linux
- windows
- darwin
- freebsd
- aix
goarch:
- 386
- amd64
- arm
- arm64
- s390x
- ppc64le
- ppc64
- riscv64
goarm:
- 6
- 7
ignore:
- goos: linux
goarch: ppc64
- goos: darwin
goarch: arm
- goos: darwin
goarch: ppc64le
- goos: darwin
goarch: s390x
- goos: windows
goarch: arm64
- goos: windows
goarch: arm
- goos: windows
goarch: ppc64le
- goos: windows
goarch: s390x
- goos: freebsd
goarch: arm
- goos: freebsd
goarch: s390x
dir: .
main: ./cmd/mlr
binary: mlr
flags:
- -trimpath
#ldflags:
# - -s -w
archives:
- format: tar.gz
wrap_in_directory: true
format_overrides:
- goos: windows
format: zip
name_template: '{{ .ProjectName }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}'
files:
- LICENSE.txt
- README.md
nfpms:
-
id: miller-nfpms
package_name: miller
file_name_template: "{{ .ProjectName }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
homepage: https://github.com/johnkerl/miller
maintainer: "John Kerl <johnkerl@users.noreply.github.com>"
description: Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, tabular JSON etc
license: BSD
formats:
- deb
- rpm
checksum:
algorithm: sha256
name_template: '{{ .ProjectName }}-{{ .Version }}-checksums.txt'

View file

@ -1,21 +0,0 @@
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
tools:
python: "3.8"
python:
install:
- requirements: docs/requirements.txt
mkdocs:
configuration: docs/mkdocs.yml
formats: all

28
.travis.yml Normal file
View file

@ -0,0 +1,28 @@
language: c
# Use newer travis container-based infrastructure
sudo: false
# Install flex dependency
addons:
apt:
packages:
- flex
- autoconf
- libtool
# Build using gcc and clang since it is supported
# Note: 'clang -fsanitize=address' is not the right way to get an ASAN build.
# This needs work.
compiler:
- gcc
os:
- linux
- osx
before_script: df -h && autoreconf -fiv
script: ./configure && make && make check && make distcheck && make CC=$CC -C c -f Makefile.no-autoconfig
after_failure: wc -l c/reg_test/test-suite.log && cat c/reg_test/test-suite.log

5
.vimrc
View file

@ -1,5 +0,0 @@
map \d :w<C-m>:!clear;echo Building ...; echo; make mlr<C-m>
map \f :w<C-m>:!clear;echo Building ...; echo; make ut<C-m>
"map \r :w<C-m>:!clear;echo Building ...; echo; make ut-scan ut-mlv<C-m>
map \r :w<C-m>:!clear;echo Building ...; echo; make ut-lib<C-m>
map \t :w<C-m>:!clear;go test github.com/johnkerl/miller/internal/pkg/transformers/...<C-m>

View file

@ -25,4 +25,4 @@ of this software, even if advised of the possibility of such damage.
I am providing code in this repository to you under an open-source license.
Because this is my personal repository, the license you receive to my code is
from me and not from my employer.
from me and not from my employer (Facebook).

127
Makefile
View file

@ -1,127 +0,0 @@
PREFIX=/usr/local
INSTALLDIR=$(PREFIX)/bin
# ================================================================
# General-use targets
# This must remain the first target in this file, which is what 'make' with no
# arguments will run.
build:
go build github.com/johnkerl/miller/v6/cmd/mlr
@echo "Build complete. The Miller executable is ./mlr (or .\mlr.exe on Windows)."
@echo "You can use 'make check' to run tests".
quiet:
@go build github.com/johnkerl/miller/v6/cmd/mlr
# For interactive use, 'mlr regtest' offers more options and transparency.
check: unit-test regression-test
@echo "Tests complete. You can use 'make install' if you like, optionally preceded"
@echo "by './configure --prefix=/your/install/path' if you wish to install to"
@echo "somewhere other than /usr/local/bin -- the default prefix is /usr/local."
# DESTDIR is for package installs; nominally blank when this is run interactively.
# See also https://www.gnu.org/prep/standards/html_node/DESTDIR.html
install: build
mkdir -p $(DESTDIR)/$(INSTALLDIR)
cp mlr $(DESTDIR)/$(INSTALLDIR)/
make -C man install
# ================================================================
# Dev targets
# ----------------------------------------------------------------
# Unit tests (small number)
unit-test ut: build
go test github.com/johnkerl/miller/v6/pkg/...
ut-lib:build
go test github.com/johnkerl/miller/v6/pkg/lib...
ut-scan:build
go test github.com/johnkerl/miller/v6/pkg/scan/...
ut-mlv:build
go test github.com/johnkerl/miller/v6/pkg/mlrval/...
ut-bifs:build
go test github.com/johnkerl/miller/v6/pkg/bifs/...
ut-input:build
go test github.com/johnkerl/miller/v6/pkg/input/...
bench:build
go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/...
bench-mlv:build
go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/mlrval/...
bench-input:build
go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/input/...
# ----------------------------------------------------------------
# Regression tests (large number)
#
# See ./regression_test.go for information on how to get more details
# for debugging. TL;DR is for CI jobs, we have 'go test -v'; for
# interactive use, instead of 'go test -v' simply use 'mlr regtest
# -vvv' or 'mlr regtest -s 20'. See also pkg/terminals/regtest.
regression-test: build
go test -v regression_test.go
# ----------------------------------------------------------------
# Formatting
# go fmt ./... finds experimental C files which we want to ignore.
fmt format:
-go fmt ./cmd/...
-go fmt ./pkg/...
-go fmt ./regression_test.go
# ----------------------------------------------------------------
# Static analysis
# Needs first: go install honnef.co/go/tools/cmd/staticcheck@latest
# See also: https://staticcheck.io
staticcheck:
staticcheck ./...
# ----------------------------------------------------------------
# For developers before pushing to GitHub.
#
# These steps are done in a particular order:
# go:
# * builds the mlr executable
# man:
# * creates manpage mlr.1 and manpage.txt using mlr from the $PATH
# * copies the latter to docs/src
# docs:
# * turns *.md.in into *.md (live code samples), using mlr from the $PATH
# * note the man/manpage.txt becomes some of the HTML content
# * turns *.md into docs/site HTML and CSS files
dev:
-make fmt
make build
make check
make -C man build
make -C docs/src forcebuild
make -C docs
@echo DONE
docs: build
make -C docs/src forcebuild
make -C docs
# ----------------------------------------------------------------
# Keystroke-savers
sure: build check
it: build check
so: install
mlr:
go build github.com/johnkerl/miller/v6/cmd/mlr
# ----------------------------------------------------------------
# Please see comments in ./create-release-tarball as well as
# https://miller.readthedocs.io/en/latest/build/#creating-a-new-release-for-developers
release_tarball: build check
./create-release-tarball
# ================================================================
# Go does its own dependency management, outside of make.
.PHONY: build mlr check unit_test regression_test bench fmt staticcheck dev docs

6
Makefile.am Normal file
View file

@ -0,0 +1,6 @@
# not GPL, thus no COPYING file
AUTOMAKE_OPTIONS=foreign
EXTRA_DIST= LICENSE.txt README.md
SUBDIRS=c doc

823
Makefile.in Normal file
View file

@ -0,0 +1,823 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(am__DIST_COMMON)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
cscope distdir dist dist-all distcheck
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
$(LISP)config.h.in
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \
$(top_srcdir)/autotools/ar-lib $(top_srcdir)/autotools/compile \
$(top_srcdir)/autotools/config.guess \
$(top_srcdir)/autotools/config.sub \
$(top_srcdir)/autotools/install-sh \
$(top_srcdir)/autotools/ltmain.sh \
$(top_srcdir)/autotools/missing autotools/ar-lib \
autotools/compile autotools/config.guess autotools/config.sub \
autotools/install-sh autotools/ltmain.sh autotools/missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
if test -d "$(distdir)"; then \
find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -rf "$(distdir)" \
|| { sleep 5 && rm -rf "$(distdir)"; }; \
else :; fi
am__post_remove_distdir = $(am__remove_distdir)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
DIST_TARGETS = dist-gzip
distuninstallcheck_listfiles = find . -type f -print
am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
| sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
# not GPL, thus no COPYING file
AUTOMAKE_OPTIONS = foreign
EXTRA_DIST = LICENSE.txt README.md
SUBDIRS = c doc
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
am--refresh: Makefile
@:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \
$(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
echo ' $(SHELL) ./config.status'; \
$(SHELL) ./config.status;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
$(SHELL) ./config.status --recheck
$(top_srcdir)/configure: $(am__configure_deps)
$(am__cd) $(srcdir) && $(AUTOCONF)
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
$(am__aclocal_m4_deps):
config.h: stamp-h1
@test -f $@ || rm -f stamp-h1
@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
cd $(top_builddir) && $(SHELL) ./config.status config.h
$(srcdir)/config.h.in: $(am__configure_deps)
($(am__cd) $(top_srcdir) && $(AUTOHEADER))
rm -f stamp-h1
touch $@
distclean-hdr:
-rm -f config.h stamp-h1
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
distclean-libtool:
-rm -f libtool config.lt
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscope: cscope.files
test ! -s cscope.files \
|| $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
clean-cscope:
-rm -f cscope.files
cscope.files: clean-cscope cscopelist
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
distdir: $(DISTFILES)
$(am__remove_distdir)
test -d "$(distdir)" || mkdir "$(distdir)"
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
-test -n "$(am__skip_mode_fix)" \
|| find "$(distdir)" -type d ! -perm -755 \
-exec chmod u+rwx,go+rx {} \; -o \
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r "$(distdir)"
dist-gzip: distdir
tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
$(am__post_remove_distdir)
dist-bzip2: distdir
tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
$(am__post_remove_distdir)
dist-lzip: distdir
tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
$(am__post_remove_distdir)
dist-xz: distdir
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
$(am__post_remove_distdir)
dist-tarZ: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
$(am__post_remove_distdir)
dist-zip: distdir
-rm -f $(distdir).zip
zip -rq $(distdir).zip $(distdir)
$(am__post_remove_distdir)
dist dist-all:
$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
$(am__post_remove_distdir)
# This target untars the dist file and tries a VPATH configuration. Then
# it guarantees that the distribution is self-contained by making another
# tarfile.
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lz*) \
lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
chmod -R a-w $(distdir)
chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build/sub \
&& ../../configure \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=../.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \
&& $(MAKE) $(AM_MAKEFLAGS) install \
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
distuninstallcheck \
&& chmod -R a-w "$$dc_install_base" \
&& ({ \
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
} || { rm -rf "$$dc_destdir"; exit 1; }) \
&& rm -rf "$$dc_destdir" \
&& $(MAKE) $(AM_MAKEFLAGS) dist \
&& rm -rf $(DIST_ARCHIVES) \
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
&& cd "$$am__cwd" \
|| exit 1
$(am__post_remove_distdir)
@(echo "$(distdir) archives ready for distribution: "; \
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
@test -n '$(distuninstallcheck_dir)' || { \
echo 'ERROR: trying to run $@ with an empty' \
'$$(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
$(am__cd) '$(distuninstallcheck_dir)' || { \
echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
fi ; \
$(distuninstallcheck_listfiles) ; \
exit 1; } >&2
distcleancheck: distclean
@if test '$(srcdir)' = . ; then \
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
exit 1 ; \
fi
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left in build directory after distclean:" ; \
$(distcleancheck_listfiles) ; \
exit 1; } >&2
check-am: all-am
check: check-recursive
all-am: Makefile config.h
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-hdr \
distclean-libtool distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) all install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
am--refresh check check-am clean clean-cscope clean-generic \
clean-libtool cscope cscopelist-am ctags ctags-am dist \
dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \
dist-xz dist-zip distcheck distclean distclean-generic \
distclean-hdr distclean-libtool distclean-tags distcleancheck \
distdir distuninstallcheck dvi dvi-am html html-am info \
info-am install install-am install-data install-data-am \
install-dvi install-dvi-am install-exec install-exec-am \
install-html install-html-am install-info install-info-am \
install-man install-pdf install-pdf-am install-ps \
install-ps-am install-strip installcheck installcheck-am \
installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

33
Makefile.no-autoconfig Normal file
View file

@ -0,0 +1,33 @@
# ================================================================
# NOTE: This makefile is not intended to be used in a packaging system --
# rather, Miller uses autconfig for that. This makefile is intended for users
# who prefer (for whatever reason) to bypass autoconfig. Please also see
# http://johnkerl.org/miller/doc/build.html#Without_using_autoconfig
# ================================================================
MANDIR ?= /usr/share/man
DESTDIR ?=
all: c manpage
devall: c install doc
# TODO: the install target exists to put most-recent mlr executable in the
# path to be picked up by the mlr-execs in the docs dir. better would be to
# export PATH here with ./c at its head.
c: .always
make -C c -f Makefile.no-autoconfig top
doc: .always
cd doc && poki
install: .always
make -C c -f Makefile.no-autoconfig install
install -d -m 0755 $(DESTDIR)/$(MANDIR)
install -m 0644 doc/miller.1 $(DESTDIR)/$(MANDIR)
clean: .always
make -C c -f Makefile.no-autoconfig clean
.PHONY: manpage
# OSX:
# * brew install asciidoc
# * export XML_CATALOG_FILES=/usr/local/etc/xml/catalog
manpage: doc/miller.1.txt
( cd doc && a2x -d manpage -f manpage miller.1.txt )
.always:
@true

View file

@ -4,10 +4,12 @@ Grab `miller.spec` and go to town.
## Instructions to make Miller source/binary RPMs for the RPM-inexperienced
### One-time setup
Change `6.2.0` to desired version. Release-package example:
https://github.com/johnkerl/miller/releases/download/v6.2.0/miller-6.2.0.tar.gz
### One-time setup
Change `3.3.2` to desired version. Release-package example:
https://github.com/johnkerl/miller/releases/download/v3.3.2/mlr-3.3.2.tar.gz
```
cd
@ -15,7 +17,7 @@ mkdir ~/rpmbuild
mkdir ~/rpmbuild/SPECS
mkdir ~/rpmbuild/SOURCES
cp /your/path/to/miller/clone/miller.spec ~/rpmbuild/SPECS
cp /your/path/to/miller-6.2.0.tar.gz ~/rpmbuild/SOURCES
cp /your/path/to/mlr-3.3.2.tar.gz ~/rpmbuild/SOURCES
cd ~/rpmbuild/SPECS
```
@ -38,20 +40,32 @@ sudo yum install rpmlint
### Build source-RPM only
```
rpmbuild -bs miller.spec
Wrote: /your/home/dir/rpmbuild/SRPMS/miller-6.2.0-1.el6.src.rpm
Wrote: /your/home/dir/rpmbuild/SRPMS/miller-3.3.2-1.el6.src.rpm
```
```
rpm -qpl ../SRPMS/miller-6.2.0-1.el6.src.rpm
miller-6.2.0.tar.gz
rpm -qpl ../SRPMS/miller-3.3.2-1.el6.src.rpm
mlr-3.3.2.tar.gz
miller.spec
```
```
rpm -qpi ../SRPMS/miller-6.2.0-1.el6.src.rpm
rpm -qpi ../SRPMS/miller-3.3.2-1.el6.src.rpm
Name : mlr Relocations: (not relocatable)
Version : 6.2.0 Vendor: (none)
...
Version : 3.3.2 Vendor: (none)
Release : 1.el6 Build Date: Sun 07 Feb 2016 09:43:39 PM EST
Install Date: (not installed) Build Host: host.name.goes.here
Group : Applications/Text Source RPM: (none)
Size : 774430 License: BSD2
Signature : (none)
URL : http://johnkerl.org/miller/doc
Summary : Name-indexed data processing tool
Description :
Miller (mlr) allows name-indexed data such as CSV and JSON files to be
processed with functions equivalent to sed, awk, cut, join, sort etc. It can
convert between formats, preserves headers when sorting or reversing, and
streams data where possible so its memory requirements stay small. It works
well with pipes and can feed "tail -f".
```
### Build source and binary RPMs
@ -61,21 +75,30 @@ rpmbuild -ba miller.spec
```
```
rpm -qpl ../RPMS/x86_64//miller-6.2.0-1.el6.x86_64.rpm
rpm -qpl ../RPMS/x86_64//miller-3.3.2-1.el6.x86_64.rpm
/usr/bin/mlr
/usr/share/man/man1/mlr.1.gz
```
```
sudo rpm -ivh ../RPMS/x86_64/miller-6.2.0-1.el6.x86_64.rpm
sudo rpm -ivh ../RPMS/x86_64/miller-3.3.2-1.el6.x86_64.rpm
Preparing... ########################################### [100%]
1:mlr ########################################### [100%]
```
```
/usr/bin/mlr --version
Miller 6.2.0
Miller 3.3.2
man -M /usr/share/man mlr
```
and check the version in the DESCRIPTION section.
### Some handy references
* https://github.com/bonzini/grep/blob/master/grep.spec
* http://www.rpm.org/max-rpm/s1-rpm-build-creating-spec-file.html
* http://www.rpm.org/max-rpm/s1-rpm-inside-files-list-directives.html
* http://www.tldp.org/HOWTO/RPM-HOWTO/build.html
* http://www.tldp.org/LDP/solrhe/Securing-Optimizing-Linux-RH-Edition-v1.3/chap3sec20.html
* https://fedoraproject.org/wiki/How_to_create_a_GNU_Hello_RPM_package

11
README-appveyor.md Normal file
View file

@ -0,0 +1,11 @@
## Wndows builds
This uses `appveyor.yml` and https://ci.appveyor.com/project/johnkerl/miller.
Unfortunately, I understand next to nothing about what I'm doing here --
whenever the AppVeyor build breaks (and the Travis build doesn't) I end up
googling for various things in the
https://ci.appveyor.com/project/johnkerl/miller build-log output, then
iteratively updating `appveyor.yml` until I can get a build again.
If anyone has expertise in this area, I'd love to chat! :)

View file

@ -1,187 +0,0 @@
# Quickstart for developers
* `make`, `make check`, `make docs`, etc: see [Makefile](Makefile) in the repo base directory.
* Software-testing methodology: see [./test/README.md](./test/README.md).
* Source-code indexing: please see [https://sourcegraph.com/github.com/johnkerl/miller](https://sourcegraph.com/github.com/johnkerl/miller)
* Godoc As of September 2021, `godoc` support is minimal: package-level synopses exist; most `func`/`const`/etc content lacks `godoc`-style comments. To view doc material, you can:
* `go get golang.org/x/tools/cmd/godoc`
* `cd go`
* `godoc -http=:6060 -goroot .`
* Browse to `http://localhost:6060`
* Note: control-C and restart the server, then reload in the browser, to pick up edits to source files
# Continuous integration
The Go implementation is auto-built using GitHub Actions: see [.github/workflows/go.yml](.github/workflows/go.yml). This works splendidly on Linux, MacOS, and Windows.
# Benefits of porting to Go
* The lack of a streaming (record-by-record) JSON reader in the C implementation ([issue 99](https://github.com/johnkerl/miller/issues/99)) is immediately solved in the Go implementation.
* In the C implementation, arrays were not supported in the DSL; in the Go implementation they are.
* Flattening nested map structures to output records was clumsy. Now, Miller will be a JSON-to-JSON processor, if your inputs and outputs are both JSON; JSON input and output will be idiomatic.
* The quoted-DKVP feature from [issue 266](https://github.com/johnkerl/miller/issues/266) will be easily addressed.
* String/number-formatting issues in [issue 211](https://github.com/johnkerl/miller/issues/211), [issue 178](https://github.com/johnkerl/miller/issues/178), [issue 151](https://github.com/johnkerl/miller/issues/151), and [issue 259](https://github.com/johnkerl/miller/issues/259) will be fixed during the Go port.
* I think some DST/timezone issues such as [issue 359](https://github.com/johnkerl/miller/issues/359) will be easier to fix using the Go datetime library than using the C datetime library
* The code will be easier to read and, I hope, easier for others to contribute to. What this means is it should be quicker and easier to add new features to Miller -- after the development-time cost of the port itself is paid, of course.
# Why Go
* As noted above, multiple Miller issues will benefit from stronger library support.
* Channels/goroutines are an excellent for Miller's reader/mapper/mapper/mapper/writer record-stream architecture.
* Since I did timing experiments in 2015, I found Go to be faster than it was then.
* In terms of CPU-cycle-count, Go is a bit slower than C (it does more things, like bounds-checking arrays and so on) -- but by leveraging concurrency over a couple processors, I find that it's competitive in terms of wall-time.
* Go is an up-and-coming language, with good reason -- it's mature, stable, with few of C's weaknesses and many of C's strengths.
* The source code will be easier to read/maintain/write, by myself and others.
# Efficiency of the Go port
As I wrote [here](https://johnkerl.org//miller-docs-by-release/1.0.0/performance.html) back in 2015 I couldn't get Rust or Go (or any other language I tried) to do some test-case processing as quickly as C, so I stuck with C.
Either Go has improved since 2015, or I'm a better Go programmer than I used to be, or both -- but as of 2020 I can get Go-Miller to process data about as quickly as C-Miller.
Note: in some sense Go-Miller is *less* efficient but in a way that doesn't significantly affect wall time. Namely, doing `mlr cat` on a million-record data file on my bargain-value MacBook Pro, the C version takes about 2.5 seconds and the Go version takes about 3 seconds. So in terms of wall time -- which is what we care most about, how long we have to wait -- it's about the same.
A way to look a little deeper at resource usage is to run `htop`, while processing a 10x larger file, so it'll take 25 or 30 seconds rather than 2.5 or 3. This way we can look at the steady-state resource consumption. I found that the C version -- which is purely single-threaded -- is taking 100% CPU. And the Go version, which uses concurrency and channels and `MAXPROCS=4`, with reader/transformer/writer each on their own CPU, is taking about 240% CPU. So Go-Miller is taking up not just a little more CPU, but a lot more -- yet, it does more work in parallel, and finishes the job in about the same amount of time.
Even commodity hardware has multiple CPUs these days -- and the Go code is *much* easier to read, extend, and improve than the C code -- so I'll call this a net win for Miller.
# Developer information
## Source-code goals
Donald Knuth famously said: *Programs are meant to be read by humans and only incidentally for computers to execute.*
During the coding of Miller, I've been guided by the following:
* *Miller should be pleasant to read.*
* If you want to fix a bug, you should be able to quickly and confidently find out where and how.
* If you want to learn something about Go channels, or lexing/parsing in Go -- especially if you don't already know much about them -- the comments should help you learn what you want to.
* If you're the kind of person who reads other people's code for fun, well, the code should be fun, as well as readable.
* `README.md` files throughout the directory tree are intended to give you a sense of what is where, what to read first and what doesn't need reading right away, and so on -- so you spend a minimum of time being confused or frustrated.
* Names of files, variables, functions, etc. should be fully spelled out (e.g. `NewEvaluableLeafNode`), except for a small number of most-used names where a longer name would cause unnecessary line-wraps (e.g. `Mlrval` instead of `MillerValue` since this appears very very often).
* Code should not be too clever. This includes some reasonable amounts of code duplication from time to time, to keep things inline, rather than lasagna code.
* Things should be transparent. For example, the `-v` in `mlr -n put -v '$y = 3 + 0.1 * $x'` shows you the abstract syntax tree derived from the DSL expression.
* Comments should be robust with respect to reasonably anticipated changes. For example, one package should cross-link to another in its comments, but I try to avoid mentioning specific filenames too much in the comments and README files since these may change over time. I make an exception for stable points such as [cmd/mlr/main.go](./cmd/mlr/main.go), [mlr.bnf](./pkg/parsing/mlr.bnf), [stream.go](./pkg/stream/stream.go), etc.
* *Miller should be pleasant to write.*
* It should be quick to answer the question *Did I just break anything?* -- hence `mlr regtest` functionality.
* It should be quick to find out what to do next as you iteratively develop -- see for example [cst/README.md](./pkg/dsl/cst/README.md).
* *The language should be an asset, not a liability.*
* One of the reasons I chose Go is that (personally anyway) I find it to be reasonably efficient, well-supported with standard libraries, straightforward, and fun. I hope you enjoy it as much as I have.
## Directory structure
Information here is for the benefit of anyone reading/using the Miller Go code. To use the Miller tool at the command line, you don't need to know any of this if you don't want to. :)
## Directory-structure overview
Miller is a multi-format record-stream processor, where a **record** is a
sequence of key-value pairs. The basic **stream** operation is:
* **read** records in some specified file format;
* **transform** the input records to output records in some user-specified way, using a **chain** of **transformers** (also sometimes called **verbs**) -- sort, filter, cut, put, etc.;
* **write** the records in some specified file format.
So, in broad overview, the key packages are:
* [pkg/stream](./pkg/stream) -- connect input -> transforms -> output via Go channels
* [pkg/input](./pkg/input) -- read input records
* [pkg/transformers](./pkg/transformers) -- transform input records to output records
* [pkg/output](./pkg/output) -- write output records
* The rest are details to support this.
## Directory-structure details
### Dependencies
* Miller dependencies are all in the Go standard library, except two:
* GOCC lexer/parser code-generator from [github.com/goccmack/gocc](https://github.com/goccmack/gocc):
* Forked at [github.com/johnkerl/gocc](github.com/johnkerl/gocc).
* This package defines the grammar for Miller's domain-specific language (DSL) for the Miller `put` and `filter` verbs. And, GOCC is a joy to use. :)
* It is used on the terms of its open-source license.
* [golang.org/x/term](https://pkg.go.dev/golang.org/x/term):
* Just a one-line Miller callsite for is-a-terminal checking for the [Miller REPL](./pkg/terminals/repl/README.md).
* It is used on the terms of its open-source license.
* See also [./go.mod](go.mod). Setup:
* `go get github.com/johnkerl/gocc`
* `go get golang.org/x/term`
### Miller per se
* The main entry point is [cmd/mlr/main.go](./cmd/mlr/main.go); everything else in [pkg](./pkg).
* [pkg/entrypoint](./pkg/entrypoint): All the usual contents of `main()` are here, for ease of testing.
* [pkg/platform](./pkg/platform): Platform-dependent code, which as of early 2021 is the command-line parser. Handling single quotes and double quotes is different on Windows unless particular care is taken, which is what this package does.
* [pkg/lib](./pkg/lib):
* Implementation of the [`Mlrval`](./pkg/types/mlrval.go) datatype which includes string/int/float/boolean/void/absent/error types. These are used for record values, as well as expression/variable values in the Miller `put`/`filter` DSL. See also below for more details.
* [`Mlrmap`](./pkg/types/mlrmap.go) is the sequence of key-value pairs which represents a Miller record. The key-lookup mechanism is optimized for Miller read/write usage patterns -- please see [mlrmap.go](./pkg/types/mlrmap.go) for more details.
* [`context`](./pkg/types/context.go) supports AWK-like variables such as `FILENAME`, `NF`, `NR`, and so on.
* [pkg/cli](./pkg/cli) is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer-chain of `put` then `filter`, and a JSON record-writer.
* [pkg/climain](./pkg/climain) contains a layer which invokes `pkg/cli`, which was split out to avoid a Go package-import cycle.
* [pkg/stream](./pkg/stream) is as above -- it uses Go channels to pipe together file-reads, to record-reading/parsing, to a chain of record-transformers, to record-writing/formatting, to terminal standard output.
* [pkg/input](./pkg/input) is as above -- one record-reader type per supported input file format, and a factory method.
* [pkg/output](./pkg/output) is as above -- one record-writer type per supported output file format, and a factory method.
* [pkg/transformers](./pkg/transformers) contains the abstract record-transformer interface datatype, as well as the Go-channel chaining mechanism for piping one transformer into the next. It also contains all the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on.
* [pkg/parsing](./pkg/parsing) contains a single source file, `mlr.bnf`, which is the lexical/semantic grammar file for the Miller `put`/`filter` DSL using the GOCC framework. All subdirectories of `pkg/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. If you need to edit `mlr.bnf`, please use [tools/build-dsl](./tools/build-dsl) to autogenerate Go code from it (using the GOCC tool). (This takes several minutes to run.)
* [pkg/dsl](./pkg/dsl) contains [`ast_types.go`](pkg/dsl/ast_types.go) which is the abstract syntax tree datatype shared between GOCC and Miller. I didn't use a `pkg/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle.
* [pkg/dsl/cst](./pkg/dsl/cst) is the concrete syntax tree, constructed from an AST produced by GOCC. The CST is what is actually executed on every input record when you do things like `$z = $x * 0.3 * $y`. Please see the [pkg/dsl/cst/README.md](./pkg/dsl/cst/README.md) for more information.
## Nil-record conventions
Through out the code, records are passed by reference (as are most things, for
that matter, to reduce unnecessary data copies). In particular, records can be
nil through the reader/transformer/writer sequence.
* Record-readers produce an end-of-stream marker (within the `RecordAndContext` struct) to signify end of input stream.
* Each transformer takes a record-pointer as input and produces a sequence of zero or more record-pointers.
* Many transformers, such as `cat`, `cut`, `rename`, etc. produce one output record per input record.
* The `filter` transformer produces one or zero output records per input record depending on whether the record passed the filter.
* The `nothing` transformer produces zero output records.
* The `sort` and `tac` transformers are *non-streaming* -- they produce zero output records per input record, and instead retain each input record in a list. Then, when the end-of-stream marker is received, they sort/reverse the records and emit them, then they emit the end-of-stream marker.
* Many transformers such as `stats1` and `count` also retain input records, then produce output once there is no more input to them.
* An end-of-stream marker is passed to record-writers so that they may produce final output.
* Most writers produce their output one record at a time.
* The pretty-print writer produces no output until end of stream (or schema change), since it needs to compute the max width down each column.
## Memory management
* Go has garbage collection which immediately simplifies the coding compared to the C port.
* Pointers are used freely for record-processing: record-readers allocate pointed records; pointed records are passed on Go channels from record-readers to record-transformers to record-writers.
* Any transformer which passes an input record through is fine -- be it unmodified as in `mlr cat` or modified as in `mlr cut`.
* If a transformer drops a record (`mlr filter` in false cases, for example, or `mlr nothing`) it will be GCed.
* One caveat is any transformer which produces multiples, e.g. `mlr repeat` -- this needs to explicitly copy records instead of producing multiple pointers to the same record.
* Right-hand-sides of DSL expressions all pass around pointers to records and Mlrvals.
* Lvalue expressions return pointed `*types.Mlrmap` so they can be assigned to; rvalue expressions return non-pointed `types.Mlrval` but these are very shallow copies -- the int/string/etc types are copied but maps/arrays are passed by reference in the rvalue expression-evaluators.
* Copy-on-write is done on map/array put -- for example, in the assignment phase of a DSL statement, where an rvalue is assigned to an lvalue.
## More about mlrvals
[`Mlrval`](./pkg/types/mlrval.go) is the datatype of record values, as well as expression/variable values in the Miller `put`/`filter` DSL. It includes string/int/float/boolean/void/absent/error types, not unlike PHP's `zval`.
* Miller's `absent` type is like Javascript's `undefined` -- it's for times when there is no such key, as in a DSL expression `$out = $foo` when the input record is `$x=3,y=4` -- there is no `$foo` so `$foo` has `absent` type. Nothing is written to the `$out` field in this case. See also [here](https://miller.readthedocs.io/en/latest/reference-main-null-data) for more information.
* Miller's `void` type is like Javascript's `null` -- it's for times when there is a key with no value, as in `$out = $x` when the input record is `$x=,$y=4`. This is an overlap with `string` type, since a void value looks like an empty string. I've gone back and forth on this (including when I was writing the C implementation) -- whether to retain `void` as a distinct type from empty-string, or not. I ended up keeping it as it made the `Mlrval` logic easier to understand.
* Miller's `error` type is for things like doing type-uncoerced addition of strings. Data-dependent errors are intended to result in `(error)`-valued output, rather than crashing Miller. See also [here](https://miller.readthedocs.io/en/latest/reference-main-data-types) for more information.
* Miller's number handling makes auto-overflow from int to float transparent, while preserving the possibility of 64-bit bitwise arithmetic.
* This is different from JavaScript, which has only double-precision floats and thus no support for 64-bit numbers (note however that there is now [`BigInt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt)).
* This is also different from C and Go, wherein casts are necessary -- without which int arithmetic overflows.
* See also [here](https://miller.readthedocs.io/en/latest/reference-main-arithmetic) for the semantics of Miller arithmetic, which the [`Mlrval`](./pkg/types/mlrval.go) class implements.
## Performance optimizations
Key performance-related PRs for the Go port include:
* [#424](https://github.com/johnkerl/miller/pull/424): In C, lots of little mallocs are fine. In Go, not so much. This is not the garbage-collection penalty -- it's the penalty of _allocating_ -- lots of `duffcopy` and `madvise` appearing in the flame graphs. The idea was to reduce data-copies in the DSL.
* [#765](https://github.com/johnkerl/miller/pull/765): In C, prints to `stdout` are buffered a line at a time if the output is to the terminal, or a few KB at a time if not (i.e. file or pipe). Note the cost is how often the process does a `write` system call with associated overhead of context-switching into the kernel and back out. The C behavior is the right thing to do. In the Go port, very early on writes were all unbuffered -- several per record. Then buffering was soon switched to per-record, which was an improvement. But as of #765, the buffering is done at the library level, and it's done C-style -- much less frequently when output is not to a terminal.
* [#774](https://github.com/johnkerl/miller/pull/774): For CSV-lite and DKVP, this avoids using regexes to split strings when `strings.Split` will do.
* [#779](https://github.com/johnkerl/miller/pull/779): The basic idea of the Miller Go port was that the record-reader writes a record at a time over a channel to the first verb; the first verb writes records one at a time to the second verb, and so on; the last verb writes records one at a time to the record-writer. This is very simple, but for large files, the Go runtime scheduler overhead is too large -- data are chopped up into too many pieces. On #779 records are written 500 (or fewer) per batch, and all the channels from record-reader, to verbs, to record-writer are on record-batches. This lets Miller spend more time doing its job and less time yielding to the goroutine scheduler.
* [#786](https://github.com/johnkerl/miller/pull/787): In the C version, all values were strings until operated on specifically (expliclitly) by a verb. In the Go port, initially, all values were type-inferred on read, with types retained throughout the processing chain. This was an incredibly elegant and empowering design decision -- central to the Go port, in fact -- but it came with the cost that _all_ fields were being scanned as float/int even if they weren't used in the processing chain. On #786, fields are left as raw strings with type "pending", only just-in-time inferred to string/int/float only when used within the processing chain.
* [#787](https://github.com/johnkerl/miller/pull/787): This removed an unnecessary data copy in the `mlrval.String()` method. Originally this method had non-pointer receiver to conform with the `fmt.Stringer` interface. Hoewver, that's a false economy: `fmt.Println(someMlrval)` is a corner case, and stream processing is the primary concern. Implementing this as a pointer-receiver method was a performance improvement.
* [#809](https://github.com/johnkerl/miller/pull/809): This reduced the number of passes through fields for just-in-time type-inference. For example, for `$y = $x + 1`, each record's `$x` field's raw string (if not already accessed in the processing chain) needs to be checked to see if it's int (like `123`), float (like `123.4` or `1.2e3`), or string (anything else). Previously, succinct calls to built-in Go library functions were used. That was easy to code, but made too many expensive calls that were avoidable by lighter peeking of strings. In particular, an is-octal regex was being invoked unnecessarily on every field type-infer operation.
See also [./README-profiling.md](./README-profiling.md) and [https://miller.readthedocs.io/en/latest/new-in-miller-6/#performance-benchmarks](https://miller.readthedocs.io/en/latest/new-in-miller-6/#performance-benchmarks).
In summary:
* [#765](https://github.com/johnkerl/miller/pull/765), [#774](https://github.com/johnkerl/miller/pull/774), and [#787](https://github.com/johnkerl/miller/pull/787) were low-hanging fruit.
* [#424](https://github.com/johnkerl/miller/pull/424) was a bit more involved, and reveals that memory allocation -- not just GC -- needs to be handled more mindfully in Go than in C.
* [#779](https://github.com/johnkerl/miller/pull/779) was a bit more involved, and reveals that Go's elegant goroutine/channel processing model comes with the caveat that channelized data should not be organized in many, small pieces.
* [#809](https://github.com/johnkerl/miller/pull/809) was also bit more involved, and reveals that library functions are convenient, but profiling and analysis can sometimes reveal an opportunity for an impact, custom solution.
* [#786](https://github.com/johnkerl/miller/pull/786) was a massive refactor involving about 10KLOC -- in hindsight it would have been best to do this work at the start of the Go port, not at the end.

View file

@ -1,62 +0,0 @@
# Miller docs
## Why use Mkdocs
* Connects to https://miller.readthedocs.io so people can get their docmods onto the web instead of the self-hosted https://johnkerl.org/miller/doc. Thanks to @pabloab for the great advice!
* More standard look and feel -- lots of people use readthedocs for other things so this should feel familiar.
* We get a Search feature for free.
* Mkdocs vs Sphinx: these are similar tools, but I find that I more easily get better desktop+mobile formatting using Mkdocs.
## Contributing
* You need `pip install mkdocs` (or `pip3 install mkdocs`) as well as `pip install mkdocs-material`.
* The docs include lots of live code examples which will be invoked using `mlr` which must be somewhere in your `$PATH`.
* Clone https://github.com/johnkerl/miller and cd into `docs/` within your clone.
* Overview of flow:
* `docs/src` has `*.md.in` files containing markdown as well as directives for auto-generating code samples.
* A `genmds` script reads `docs/src/*.md.in` and writes `docs/src/*.md`.
* The `mkdocs build` tools reads `docs/src/*.md` and writes HTML files in `docs/site`.
* Running `make` within the `docs` directory handles both of those steps.
* TL;DR just `make docs` from the Miller base directory
* Quick-editing loop:
* In one terminal, cd to the `docs` directory and leave `mkdocs serve` running.
* In another terminal, cd to the `docs/src` subdirectory and edit `*.md.in`.
* Run `genmds` to re-create all the `*.md` files, or `genmds foo.md.in` to just re-create the `foo.md.in` file you just edited, or (simplest) just `make` within the `docs/src` subdirectory.
* In your browser, visit http://127.0.0.1:8000
* This doesn't write HTML in `docs/site`; HTML is served up directly in the browser -- this is nice for previewing interactive edits.
* For-publish editing loop:
* cd to the `src` subdirectory of `docs` and edit `*.md.in`.
* `make -C ..`
* This does write HTML in `docs/site`.
* In your browser, visit `file:///your/path/to/miller/docs/site/index.html`
* Link-checking:
* `sudo pip3 install git+https://github.com/linkchecker/linkchecker.git`
* `cd site` and `linkchecker .`
* Submitting:
* Do the for-publish editing steps -- in particular, `docs/src/*.md.in` and `docs/src/*.md` are both checked in to source control.
* TL;DR edit `docs/src/foo.md.in` and run `make docs`
* If you don't want to do `pip install mkdocs` then feel free to put up a PR which edits a `foo.md.in` as well as its `foo.md`.
* `git add` your modified files (`*.md.in` as well as `*.md), `git commit`, `git push`, and submit a PR at https://github.com/johnkerl/miller.
## Notes
* Miller documents use the Oxford comma: not _red, yellow and green_, but rather _red, yellow, and green_.
* CSS:
* I used the Mkdocs "material" theme which I like a lot. I customized `docs/src/extra.css` for Miller coloring/branding.
* Live code:
* I didn't find a way to include non-Python live-code examples within Mkdocs so I adapted the pre-Mkdocs Miller-doc strategy which is to have a generator script read a template file (here, `foo.md.in`), run the marked lines, and generate the output file (`foo.md`). This is `genmds`.
* Edit the `*.md.in` files, not `*.md` directly.
* Within the `*.md.in` files are lines like `GENMD_RUN_COMMAND`. These will be run, and their output included, by `genmds` which calls the `genmds` script for you.
* readthedocs:
* https://readthedocs.org/
* https://readthedocs.org/projects/miller/
* https://readthedocs.org/projects/miller/builds/
* https://miller.readthedocs.io/en/latest/
## readthedocs website
* Published to https://miller.readthedocs.io/en/latest on each commit to `main` in this repo
* https://readthedocs.org/projects/miller/
* https://readthedocs.org/api/v2/webhook/miller/134065/
* https://readthedocs.org/projects/miller/builds/
* https://readthedocs.org/

View file

@ -1 +0,0 @@
Moved to [README-dev.md](README-dev.md).

View file

@ -1,74 +0,0 @@
# See also
* [https://github.com/johnkerl/miller/blob/readme-profiling/README-dev.md#performance-optimizations](https://github.com/johnkerl/miller/blob/readme-profiling/README-dev.md#performance-optimizations)
* [https://miller.readthedocs.io/en/latest/new-in-miller-6/#performance-benchmarks](https://miller.readthedocs.io/en/latest/new-in-miller-6/#performance-benchmarks).
* `make bench` to run Go benchmarks for Miller
# How to view profile data
Run the profiler:
```
mlr --cpuprofile cpu.pprof --csv put -f scripts/chain-1.mlr ~/tmp/big.csv > /dev/null
```
(or whatever command-line flags for Miller).
Text mode:
```
go tool pprof mlr cpu.pprof
top10
```
Graphical mode:
```
go tool pprof -http=:8080 cpu.pprof
```
and let it pop open a browser window. Then navigate there -- I personally find _View_ -> _Flame Graph_ most useful:
![flame-graph](./docs/src/profiling/flame1.png)
Note that you can drill into subcomponents of the flame graph:
![flame-graph](./docs/src/profiling/flame2.png)
# Benchmark scripts
Scripts:
* [./scripts/make-big-files](./scripts/make-big-files) -- Create million-record data files in various formats.
* [./scripts/chain-cmps.sh](./scripts/chain-cmps.sh) -- Run a few processing scenarios on the million-record CSV file.
* [./scripts/chain-1.mlr](./scripts/chain-1.mlr) -- An example `mlr put` used by the previous script
* [./scripts/time-big-files](./scripts/time-big-files) -- Runs `mlr cat` for million-record files of various file formats. Catting files isn't intrinsically interesting but it shows how input and output processing vary over file formats.
* [./scripts/time-big-file](./scripts/time-big-file) -- Helper script for the former.
* [./scripts/chain-lengths.sh](./scripts/chain-lengths.sh) -- Run longer and longer chains of `scripts/chain1.mlr`, showing how Miller handles multicore and concurrency.
* [./scripts/make-data-stream](./scripts/make-data-stream) -- Create an endless stream of data to be piped into Miller for steady-state load-testing: e.g. `scripts/make-data-stream | mlr ...` then look at `htop` in another window.
Notes:
* Any of the above can be run using the profiler. I find Flame Graph mode particularly informative for drill-down.
* The above refer to `mlr5` and `~/tmp/miller/mlr` as well as `./mlr`. The idea is I have a copy of Miller 5.10.3 (the C implementation) saved off in my path as `mlr5`. Then I keep `~/tmp/miller` on recent HEAD. Then I have `.` on a dev branch. Comparing `mlr5` to `./mlr` shows relative performance of the C and Go implementations. Comparing `~/tmp/miller/mlr` to `./mlr` shows relative performance of whatever optimization I'm currently working on.
* Several of the above scripts use [justtime](https://github.com/johnkerl/scripts/blob/main/fundam/justtime) to get one-line timing information.
# How to vary compiler versions
* [./scripts/compiler-versions-install](./scripts/compiler-versions-install)
* [./scripts/compiler-versions-build](./scripts/compiler-versions-build)
* [./scripts/compiler-versions-time](./scripts/compiler-versions-time)
# How to control garbage collection
```
# Note 100 is the default
# Raise the bar for GC threshold:
GOGC=200 GODEBUG=gctrace=1 mlr -n put -q -f u/mand.mlr 1> /dev/null
# Raise the bar higher for GC threshold:
GOGC=1000 GODEBUG=gctrace=1 mlr -n put -q -f u/mand.mlr 1> /dev/null
# Turn off GC entirely and see where time is spent:
GOGC=off GODEBUG=gctrace=1 mlr -n put -q -f u/mand.mlr 1> /dev/null
```

View file

@ -1,3 +0,0 @@
[https://repology.org/project/miller](https://repology.org/project/miller)
[![Packaging status](https://repology.org/badge/vertical-allrepos/miller.svg)](https://repology.org/project/miller/versions)

299
README.md
View file

@ -1,16 +1,82 @@
# What is Miller?
**Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, JSON, JSON Lines, and positionally-indexed.**
**Miller is like awk, sed, cut, join, and sort for name-indexed data such as CSV, TSV, and tabular JSON.**
# Build status
[![Linux build status](https://travis-ci.org/johnkerl/miller.svg?branch=master)](https://travis-ci.org/johnkerl/miller)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/johnkerl/miller?branch=master&svg=true)](https://ci.appveyor.com/project/johnkerl/miller)
[![License](http://img.shields.io/badge/license-BSD2-blue.svg)](https://github.com/johnkerl/miller/blob/master/LICENSE.txt)
[![Docs](https://img.shields.io/badge/docs-here-yellow.svg)](http://johnkerl.org/miller/doc)
# Distributions
There's a good chance you can get Miller pre-built for your system:
[![Ubuntu](https://img.shields.io/badge/distros-ubuntu-db4923.svg)](https://launchpad.net/ubuntu/+source/miller)
[![Ubuntu 16.04 LTS](https://img.shields.io/badge/distros-ubuntu1604lts-db4923.svg)](https://launchpad.net/ubuntu/xenial/+package/miller)
[![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://apps.fedoraproject.org/packages/miller)
[![Debian](https://img.shields.io/badge/distros-debian-c70036.svg)](https://packages.debian.org/stable/miller)
[![Gentoo](https://img.shields.io/badge/distros-gentoo-4e4371.svg)](https://packages.gentoo.org/packages/sys-apps/miller)
[![Pro-Linux](https://img.shields.io/badge/distros-prolinux-3a679d.svg)](http://www.pro-linux.de/cgi-bin/DBApp/check.cgi?ShowApp..20427.100)
[![Arch Linux](https://img.shields.io/badge/distros-archlinux-1792d0.svg)](https://aur.archlinux.org/packages/miller-git)
[![NetBSD](https://img.shields.io/badge/distros-netbsd-f26711.svg)](http://pkgsrc.se/textproc/miller)
[![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/)
[![Homebrew/MacOSX](https://img.shields.io/badge/distros-macosxbrew-ba832b.svg)](https://github.com/Homebrew/homebrew-core/search?utf8=%E2%9C%93&q=miller)
[![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller)
[![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller)
|OS|Installation command|
|---|---|
|Linux|`yum install miller`<br/> `apt-get install miller`|
|Mac|`brew install miller`<br/>`port install miller`|
|Windows|`choco install miller`|
See also [building from source](http://johnkerl.org/miller/doc/build.html#Building_from_source).
# What can Miller do for me?
With Miller, you get to use named fields without needing to count positional
indices, using familiar formats such as CSV, TSV, JSON, JSON Lines, and
positionally-indexed. Then, on the fly, you can add new fields which are
functions of existing fields, drop fields, sort, aggregate statistically,
pretty-print, and more.
indices, using familiar formats such as CSV, TSV, JSON, and positionally-indexed.
![cover-art](./docs/src/coverart/cover-combined.png)
For example, suppose you have a CSV data file like this:
```
county,tiv_2011,tiv_2012,line,construction
SEMINOLE,22890.55,20848.71,Residential,Wood
MIAMI DADE,1158674.85,1076001.08,Residential,Masonry
PALM BEACH,1174081.5,1856589.17,Residential,Masonry
MIAMI DADE,2850980.31,2650932.72,Commercial,Reinforced Masonry
HIGHLANDS,23006.41,19757.91,Residential,Wood
HIGHLANDS,49155.16,47362.96,Residential,Wood
DUVAL,1731888.18,2785551.63,Residential,Masonry
ST. JOHNS,29589.12,35207.53,Residential,Wood
```
Then, on the fly, you can add new fields which are functions of existing fields, drop fields, sort, aggregate statistically, pretty-print, and more:
```
$ mlr --icsv --opprint --barred \
put '$tiv_delta = $tiv_2012 - $tiv_2011; unset $tiv_2011, $tiv_2012' \
then sort -nr tiv_delta flins.csv
+------------+-------------+----------------+
| county | line | tiv_delta |
+------------+-------------+----------------+
| Duval | Residential | 1053663.450000 |
| Palm Beach | Residential | 682507.670000 |
| St. Johns | Residential | 5618.410000 |
| Highlands | Residential | -1792.200000 |
| Seminole | Residential | -2041.840000 |
| Highlands | Residential | -3248.500000 |
| Miami Dade | Residential | -82673.770000 |
| Miami Dade | Commercial | -200047.590000 |
+------------+-------------+----------------+
```
This is something the Unix toolkit always could have done, and arguably always
should have done.
* Miller operates on **key-value-pair data** while the familiar
Unix tools operate on integer-indexed fields: if the natural data structure for
@ -18,110 +84,10 @@ the latter is the array, then Miller's natural data structure is the
insertion-ordered hash map.
* Miller handles a **variety of data formats**,
including but not limited to the familiar **CSV**, **TSV**, and **JSON**/**JSON Lines**.
including but not limited to the familiar **CSV**, **TSV**, and **JSON**.
(Miller can handle **positionally-indexed data** too!)
In the above image you can see how Miller embraces the common themes of
key-value-pair data in a variety of data formats.
# Getting started
* [Miller in 10 minutes](https://miller.readthedocs.io/en/latest/10min)
* [A Guide To Command-Line Data Manipulation](https://www.smashingmagazine.com/2022/12/guide-command-line-data-manipulation-cli-miller)
* [A quick tutorial on Miller](https://www.ict4g.net/adolfo/notes/data-analysis/miller-quick-tutorial.html)
* [Miller Exercises](https://github.com/GuilloteauQ/miller-exercises)
* [Tools to manipulate CSV files from the Command Line](https://www.ict4g.net/adolfo/notes/data-analysis/tools-to-manipulate-csv.html)
* [www.togaware.com/linux/survivor/CSV_Files.html](https://www.togaware.com/linux/survivor/CSV_Files.html)
* [MLR for CSV manipulation](https://guillim.github.io/terminal/2018/06/19/MLR-for-CSV-manipulation.html)
* [Linux Magazine: Process structured text files with Miller](https://www.linux-magazine.com/Issues/2016/187/Miller)
* [Miller: Command Line CSV File Processing](https://onepointzero.app/posts/miller-command-line-csv-file-processing/)
* [Miller - A Swiss Army Chainsaw for CSV Data, Data Science and Data Munging](https://fuzzyblog.io/blog/data_science/2022/05/13/miller-a-swiss-army-chainsaw-for-csv-data-data-science-and-data-munging.html)
* [Pandas Killer: mlr, the Scientist](https://xvzftube.xyz/posts/pandas_killers/#mlr%3A-the-scientist)
# More documentation links
* [**Full documentation**](https://miller.readthedocs.io/)
* [Miller's license is two-clause BSD](https://github.com/johnkerl/miller/blob/main/LICENSE.txt)
* [Notes about issue-labeling in the Github repo](https://github.com/johnkerl/miller/wiki/Issue-labeling)
* [Active issues](https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)
# Installing
There's a good chance you can get Miller pre-built for your system:
[![Ubuntu](https://img.shields.io/badge/distros-ubuntu-db4923.svg)](https://launchpad.net/ubuntu/+source/miller)
[![Ubuntu 16.04 LTS](https://img.shields.io/badge/distros-ubuntu1604lts-db4923.svg)](https://launchpad.net/ubuntu/xenial/+package/miller)
[![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://packages.fedoraproject.org/pkgs/miller/miller/)
[![Debian](https://img.shields.io/badge/distros-debian-c70036.svg)](https://packages.debian.org/stable/miller)
[![Gentoo](https://img.shields.io/badge/distros-gentoo-4e4371.svg)](https://packages.gentoo.org/packages/sys-apps/miller)
[![Pro-Linux](https://img.shields.io/badge/distros-prolinux-3a679d.svg)](http://www.pro-linux.de/cgi-bin/DBApp/check.cgi?ShowApp..20427.100)
[![Arch Linux](https://img.shields.io/badge/distros-archlinux-1792d0.svg)](https://aur.archlinux.org/packages/miller-git)
[![NetBSD](https://img.shields.io/badge/distros-netbsd-f26711.svg)](http://pkgsrc.se/textproc/miller)
[![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/)
[![Anaconda](https://img.shields.io/badge/distros-anaconda-63ad41.svg)](https://anaconda.org/conda-forge/miller/)
[![Snap](https://img.shields.io/badge/distros-snap-d85f33.svg)](https://snapcraft.io/miller)
[![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller)
[![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller)
[![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller)
[![WinGet](https://img.shields.io/badge/distros-winget-392f55.svg)](https://github.com/microsoft/winget-pkgs/tree/master/manifests/m/Miller/Miller)
|OS|Installation command|
|---|---|
|Linux|`yum install miller`<br/> `apt-get install miller`<br/> `snap install miller`|
|Mac|`brew install miller`<br/>`port install miller`|
|Windows|`choco install miller`<br/>`winget install Miller.Miller`<br/>`scoop install main/miller`|
See also [README-versions.md](./README-versions.md) for a full list of package versions. Note that long-term-support (LtS) releases will likely be on older versions.
See also [building from source](https://miller.readthedocs.io/en/latest/build.html).
# Community
[![GitHub stars](https://img.shields.io/github/stars/johnkerl/miller.svg?label=GitHub%20stars)](https://github.com/johnkerl/miller/stargazers)
[![Homebrew downloads](https://badges.weareopensource.me:/homebrew/installs/dy/miller?label=Homebrew%20downloads)](https://formulae.brew.sh/formula/miller)
[![Conda downloads](https://anaconda.org/conda-forge/miller/badges/downloads.svg?label=Conda%20downloads)](https://anaconda.org/conda-forge/miller)
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
[![All Contributors](https://img.shields.io/badge/all_contributors-41-orange.svg?style=flat-square)](#contributors-)
<!-- ALL-CONTRIBUTORS-BADGE:END -->
* Discussion forum: https://github.com/johnkerl/miller/discussions
* Feature requests / bug reports: https://github.com/johnkerl/miller/issues
* How to contribute: [https://miller.readthedocs.io/en/latest/contributing/](https://miller.readthedocs.io/en/latest/contributing/)
# Build status
[![Multi-platform build status](https://github.com/johnkerl/miller/actions/workflows/go.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/go.yml)
[![CodeQL status](https://github.com/johnkerl/miller/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/codeql-analysis.yml)
[![Codespell status](https://github.com/johnkerl/miller/actions/workflows/codespell.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/codespell.yml)
[![🧪 Snap Builds](https://github.com/johnkerl/miller/actions/workflows/test-snap-can-build.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/test-snap-can-build.yml)
<!--
[![Release status](https://github.com/johnkerl/miller/actions/workflows/release.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/release.yml)
-->
# Building from source
* First:
* `cd /where/you/want/to/put/the/source`
* `git clone https://github.com/johnkerl/miller`
* `cd miller`
* With `make`:
* To build: `make`. This takes just a few seconds and produces the Miller executable, which is `./mlr` (or `.\mlr.exe` on Windows).
* To run tests: `make check`.
* To install: `make install`. This installs the executable `/usr/local/bin/mlr` and manual page `/usr/local/share/man/man1/mlr.1` (so you can do `man mlr`).
* You can do `./configure --prefix=/some/install/path` before `make install` if you want to install somewhere other than `/usr/local`.
* Without `make`:
* To build: `go build github.com/johnkerl/miller/v6/cmd/mlr`.
* To run tests: `go test github.com/johnkerl/miller/v6/pkg/...` and `mlr regtest`.
* To install: `go install github.com/johnkerl/miller/v6/cmd/mlr@latest` will install to _GOPATH_`/bin/mlr`.
* See also the doc page on [building from source](https://miller.readthedocs.io/en/latest/build).
* For more developer information please see [README-dev.md](./README-dev.md).
# For developers
* [README-dev.md](README-dev.md)
* [How to contribute](https://miller.readthedocs.io/en/latest/contributing/)
# License
[License: BSD2](https://github.com/johnkerl/miller/blob/main/LICENSE.txt)
For a few more examples please see [Miller in 10 minutes](http://johnkerl.org/miller/doc/10-min.html).
# Features
@ -157,110 +123,29 @@ Miller retains only as much data as needed. This means that whenever
functionally possible, you can operate on files which are larger than your
system&rsquo;s available RAM, and you can use Miller in **tail -f** contexts.
* Miller is **pipe-friendly** and interoperates with the Unix toolkit.
* Miller is **pipe-friendly** and interoperates with the Unix toolkit
* Miller's I/O formats include **tabular pretty-printing**, **positionally
indexed** (Unix-toolkit style), CSV, TSV, JSON, JSON Lines, and others.
indexed** (Unix-toolkit style), CSV, JSON, and others
* Miller does **conversion** between formats.
* Miller does **conversion** between formats
* Miller's **processing is format-aware**: e.g. CSV `sort` and `tac` keep header lines first.
* Miller's **processing is format-aware**: e.g. CSV `sort` and `tac` keep header
lines first
* Miller has high-throughput **performance** on par with the Unix toolkit.
* Miller has high-throughput **performance** on par with the Unix toolkit
* Miller is written in portable, modern Go, with **zero runtime dependencies**.
You can download or compile a single binary, `scp` it to a faraway machine,
and expect it to work.
* Not unlike `jq` (http://stedolan.github.io/jq/) for JSON, Miller is written
in portable, modern C, with **zero runtime dependencies**. You can download or
compile a single binary, `scp` it to a faraway machine, and expect it to work.
# What people are saying about Miller
# Documentation links
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Today I discovered Miller—it&#39;s like jq but for CSV: <a href="https://t.co/pn5Ni241KM">https://t.co/pn5Ni241KM</a><br><br>Also, &quot;Miller complements data-analysis tools such as R, pandas, etc.: you can use Miller to clean and prepare your data.&quot; <a href="https://twitter.com/GreatBlueC?ref_src=twsrc%5Etfw">@GreatBlueC</a> <a href="https://twitter.com/nfmcclure?ref_src=twsrc%5Etfw">@nfmcclure</a></p>&mdash; Adrien Trouillaud (@adrienjt) <a href="https://twitter.com/adrienjt/status/1308963056592891904?ref_src=twsrc%5Etfw">September 24, 2020</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Underappreciated swiss-army command-line chainsaw.<br><br>&quot;Miller is like awk, sed, cut, join, and sort for [...] CSV, TSV, and [...] JSON.&quot; <a href="https://t.co/TrQqSUK3KK">https://t.co/TrQqSUK3KK</a></p>&mdash; Dirk Eddelbuettel (@eddelbuettel) <a href="https://twitter.com/eddelbuettel/status/836555980771061760?ref_src=twsrc%5Etfw">February 28, 2017</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Miller looks like a great command line tool for working with CSV data. Sed, awk, cut, join all rolled into one: <a href="http://t.co/9BBb6VCZ6Y">http://t.co/9BBb6VCZ6Y</a></p>&mdash; Mike Loukides (@mikeloukides) <a href="https://twitter.com/mikeloukides/status/632885317389950976?ref_src=twsrc%5Etfw">August 16, 2015</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Miller is like sed, awk, cut, join, and sort for name-indexed data such as CSV: <a href="http://t.co/1zPbfg6B2W">http://t.co/1zPbfg6B2W</a> - handy tool!</p>&mdash; Ilya Grigorik (@igrigorik) <a href="https://twitter.com/igrigorik/status/635134857283153920?ref_src=twsrc%5Etfw">August 22, 2015</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Btw, I think Miller is the best CLI tool to deal with CSV. I used to use this when I need to preprocess too big CSVs to load into R (now we have vroom, so such cases might be rare, though...)<a href="https://t.co/kUjrSSGJoT">https://t.co/kUjrSSGJoT</a></p>&mdash; Hiroaki Yutani (@yutannihilat_en) <a href="https://twitter.com/yutannihilat_en/status/1252392795676934144?ref_src=twsrc%5Etfw">April 21, 2020</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Miller: a *format-aware* data munging tool By <a href="https://twitter.com/__jo_ker__?ref_src=twsrc%5Etfw">@__jo_ker__</a> to overcome limitations with *line-aware* workshorses like awk, sed et al <a href="https://t.co/LCyPkhYvt9">https://t.co/LCyPkhYvt9</a><br><br>The project website is a fantastic example of good software documentation!!</p>&mdash; Donny Daniel (@dnnydnl) <a href="https://twitter.com/dnnydnl/status/1038883999391932416?ref_src=twsrc%5Etfw">September 9, 2018</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Holy holly data swiss army knife batman! How did no one suggest Miller <a href="https://t.co/JGQpmRAZLv">https://t.co/JGQpmRAZLv</a> for solving database cleaning / ETL issues to me before <br><br>Congrats to <a href="https://twitter.com/__jo_ker__?ref_src=twsrc%5Etfw">@__jo_ker__</a> for amazingly intuitive tool for critical data management tasks!<a href="https://twitter.com/hashtag/DataScienceandLaw?src=hash&amp;ref_src=twsrc%5Etfw">#DataScienceandLaw</a> <a href="https://twitter.com/hashtag/ComputationalLaw?src=hash&amp;ref_src=twsrc%5Etfw">#ComputationalLaw</a></p>&mdash; James Miller (@japanlawprof) <a href="https://twitter.com/japanlawprof/status/1006547451409518597?ref_src=twsrc%5Etfw">June 12, 2018</a></blockquote>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">🤯<a href="https://twitter.com/__jo_ker__?ref_src=twsrc%5Etfw">@__jo_ker__</a>&#39;s Miller easily reads, transforms, + writes all sorts of tabular data. It&#39;s standalone, fast, and built for streaming data (operating on one line at a time, so you can work on files larger than memory).<br><br>And the docs are dream. I&#39;ve been reading them all morning! <a href="https://t.co/Be2pGPZK6t">https://t.co/Be2pGPZK6t</a></p>&mdash; Benjamin Wolfe (he/him) (@BenjaminWolfe) <a href="https://twitter.com/BenjaminWolfe/status/1435966268499128324?ref_src=twsrc%5Etfw">September 9, 2021</a></blockquote>
## Contributors ✨
Thanks to all the fine people who help make Miller better ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
<!-- prettier-ignore-start -->
<!-- markdownlint-disable -->
<table>
<tr>
<td align="center"><a href="https://github.com/aborruso"><img src="https://avatars.githubusercontent.com/u/30607?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Andrea Borruso</b></sub></a><br /><a href="#ideas-aborruso" title="Ideas, Planning, & Feedback">🤔</a> <a href="#design-aborruso" title="Design">🎨</a></td>
<td align="center"><a href="https://sjackman.ca/"><img src="https://avatars.githubusercontent.com/u/291551?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Shaun Jackman</b></sub></a><br /><a href="#ideas-sjackman" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="http://www.fredtrotter.com/"><img src="https://avatars.githubusercontent.com/u/83133?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Fred Trotter</b></sub></a><br /><a href="#ideas-ftrotter" title="Ideas, Planning, & Feedback">🤔</a> <a href="#design-ftrotter" title="Design">🎨</a></td>
<td align="center"><a href="https://github.com/Komosa"><img src="https://avatars.githubusercontent.com/u/10688154?v=4?s=50" width="50px;" alt=""/><br /><sub><b>komosa</b></sub></a><br /><a href="#ideas-Komosa" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/jungle-boogie"><img src="https://avatars.githubusercontent.com/u/1111743?v=4?s=50" width="50px;" alt=""/><br /><sub><b>jungle-boogie</b></sub></a><br /><a href="#ideas-jungle-boogie" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/0-wiz-0"><img src="https://avatars.githubusercontent.com/u/2221844?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Thomas Klausner</b></sub></a><br /><a href="#infra-0-wiz-0" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a></td>
<td align="center"><a href="https://github.com/skitt"><img src="https://avatars.githubusercontent.com/u/2128935?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Stephen Kitt</b></sub></a><br /><a href="#platform-skitt" title="Packaging/porting to new platform">📦</a></td>
</tr>
<tr>
<td align="center"><a href="http://leahneukirchen.org/"><img src="https://avatars.githubusercontent.com/u/139?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Leah Neukirchen</b></sub></a><br /><a href="#ideas-leahneukirchen" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/lgbaldoni"><img src="https://avatars.githubusercontent.com/u/1450716?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Luigi Baldoni</b></sub></a><br /><a href="#platform-lgbaldoni" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="https://yutani.rbind.io/"><img src="https://avatars.githubusercontent.com/u/1978793?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Hiroaki Yutani</b></sub></a><br /><a href="#ideas-yutannihilation" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://3e.org/"><img src="https://avatars.githubusercontent.com/u/41439?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Daniel M. Drucker</b></sub></a><br /><a href="#ideas-dmd" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/NikosAlexandris"><img src="https://avatars.githubusercontent.com/u/7046639?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Nikos Alexandris</b></sub></a><br /><a href="#ideas-NikosAlexandris" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/kundeng"><img src="https://avatars.githubusercontent.com/u/89032?v=4?s=50" width="50px;" alt=""/><br /><sub><b>kundeng</b></sub></a><br /><a href="#platform-kundeng" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="http://victorsergienko.com/"><img src="https://avatars.githubusercontent.com/u/151199?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Victor Sergienko</b></sub></a><br /><a href="#platform-singalen" title="Packaging/porting to new platform">📦</a></td>
</tr>
<tr>
<td align="center"><a href="https://github.com/gromgit"><img src="https://avatars.githubusercontent.com/u/215702?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Adrian Ho</b></sub></a><br /><a href="#design-gromgit" title="Design">🎨</a></td>
<td align="center"><a href="https://github.com/Zachp"><img src="https://avatars.githubusercontent.com/u/1316442?v=4?s=50" width="50px;" alt=""/><br /><sub><b>zachp</b></sub></a><br /><a href="#platform-Zachp" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="https://dsel.net/"><img src="https://avatars.githubusercontent.com/u/921669?v=4?s=50" width="50px;" alt=""/><br /><sub><b>David Selassie</b></sub></a><br /><a href="#ideas-davidselassie" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="http://www.joelparkerhenderson.com/"><img src="https://avatars.githubusercontent.com/u/27145?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Joel Parker Henderson</b></sub></a><br /><a href="#ideas-joelparkerhenderson" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/divtiply"><img src="https://avatars.githubusercontent.com/u/5359679?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Michel Ace</b></sub></a><br /><a href="#ideas-divtiply" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="http://fuco1.github.io/sitemap.html"><img src="https://avatars.githubusercontent.com/u/2664959?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Matus Goljer</b></sub></a><br /><a href="#ideas-Fuco1" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/terorie"><img src="https://avatars.githubusercontent.com/u/21371810?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Richard Patel</b></sub></a><br /><a href="#platform-terorie" title="Packaging/porting to new platform">📦</a></td>
</tr>
<tr>
<td align="center"><a href="https://blog.kub1x.org/"><img src="https://avatars.githubusercontent.com/u/1833840?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Jakub Podlaha</b></sub></a><br /><a href="#design-kub1x" title="Design">🎨</a></td>
<td align="center"><a href="https://goo.gl/ZGZynx"><img src="https://avatars.githubusercontent.com/u/85767?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Miodrag Milić</b></sub></a><br /><a href="#platform-majkinetor" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="https://github.com/derekmahar"><img src="https://avatars.githubusercontent.com/u/6047?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Derek Mahar</b></sub></a><br /><a href="#ideas-derekmahar" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/spmundi"><img src="https://avatars.githubusercontent.com/u/38196185?v=4?s=50" width="50px;" alt=""/><br /><sub><b>spmundi</b></sub></a><br /><a href="#ideas-spmundi" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/koernepr"><img src="https://avatars.githubusercontent.com/u/24551942?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Peter Körner</b></sub></a><br /><a href="#security-koernepr" title="Security">🛡️</a></td>
<td align="center"><a href="https://github.com/rubyFeedback"><img src="https://avatars.githubusercontent.com/u/46686565?v=4?s=50" width="50px;" alt=""/><br /><sub><b>rubyFeedback</b></sub></a><br /><a href="#ideas-rubyFeedback" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/rbolsius"><img src="https://avatars.githubusercontent.com/u/2106964?v=4?s=50" width="50px;" alt=""/><br /><sub><b>rbolsius</b></sub></a><br /><a href="#platform-rbolsius" title="Packaging/porting to new platform">📦</a></td>
</tr>
<tr>
<td align="center"><a href="https://github.com/awildturtok"><img src="https://avatars.githubusercontent.com/u/1553491?v=4?s=50" width="50px;" alt=""/><br /><sub><b>awildturtok</b></sub></a><br /><a href="#ideas-awildturtok" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/agguser"><img src="https://avatars.githubusercontent.com/u/1206106?v=4?s=50" width="50px;" alt=""/><br /><sub><b>agguser</b></sub></a><br /><a href="#ideas-agguser" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/jganong"><img src="https://avatars.githubusercontent.com/u/2783890?v=4?s=50" width="50px;" alt=""/><br /><sub><b>jganong</b></sub></a><br /><a href="#ideas-jganong" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://www.linkedin.com/in/fulvio-scapin"><img src="https://avatars.githubusercontent.com/u/69568?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Fulvio Scapin</b></sub></a><br /><a href="#ideas-trantor" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/torbiak"><img src="https://avatars.githubusercontent.com/u/109347?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Jordan Torbiak</b></sub></a><br /><a href="#ideas-torbiak" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/Andy1978"><img src="https://avatars.githubusercontent.com/u/240064?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Andreas Weber</b></sub></a><br /><a href="#ideas-Andy1978" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/vapniks"><img src="https://avatars.githubusercontent.com/u/174330?v=4?s=50" width="50px;" alt=""/><br /><sub><b>vapniks</b></sub></a><br /><a href="#platform-vapniks" title="Packaging/porting to new platform">📦</a></td>
</tr>
<tr>
<td align="center"><a href="https://github.com/89z"><img src="https://avatars.githubusercontent.com/u/73562167?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Zombo</b></sub></a><br /><a href="#platform-89z" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="https://github.com/BEFH"><img src="https://avatars.githubusercontent.com/u/3386600?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Brian Fulton-Howard</b></sub></a><br /><a href="#platform-BEFH" title="Packaging/porting to new platform">📦</a></td>
<td align="center"><a href="https://github.com/ChCyrill"><img src="https://avatars.githubusercontent.com/u/2165604?v=4?s=50" width="50px;" alt=""/><br /><sub><b>ChCyrill</b></sub></a><br /><a href="#ideas-ChCyrill" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/jauderho"><img src="https://avatars.githubusercontent.com/u/13562?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Jauder Ho</b></sub></a><br /><a href="https://github.com/johnkerl/miller/commits?author=jauderho" title="Code">💻</a></td>
<td align="center"><a href="https://github.com/psacawa"><img src="https://avatars.githubusercontent.com/u/21274063?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Paweł Sacawa</b></sub></a><br /><a href="https://github.com/johnkerl/miller/issues?q=author%3Apsacawa" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/schragge"><img src="https://avatars.githubusercontent.com/u/4294278?v=4?s=50" width="50px;" alt=""/><br /><sub><b>schragge</b></sub></a><br /><a href="https://github.com/johnkerl/miller/commits?author=schragge" title="Documentation">📖</a></td>
<td align="center"><a href="https://github.com/Poshi"><img src="https://avatars.githubusercontent.com/u/1780510?v=4?s=50" width="50px;" alt=""/><br /><sub><b>Jordi</b></sub></a><br /><a href="https://github.com/johnkerl/miller/commits?author=Poshi" title="Documentation">📖</a> <a href="#ideas-Poshi" title="Ideas, Planning, & Feedback">🤔</a></td>
</tr>
</table>
<!-- markdownlint-restore -->
<!-- prettier-ignore-end -->
<!-- ALL-CONTRIBUTORS-LIST:END -->
<a href="https://github.com/johnkerl/miller/graphs/contributors">
<img src="https://contributors-img.web.app/image?repo=johnkerl/miller" />
</a>
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind are welcome!
* [**Full documentation for latest release**](http://johnkerl.org/miller/doc)
* [Head docs](http://johnkerl.org/miller-releases/miller-head/doc/index.html) match
[head code](https://github.com/johnkerl/miller); [release-specific docs](http://johnkerl.org/miller/doc/release-docs.html)
match [release-specific code](https://github.com/johnkerl/miller/tags).
* [Miller's license is two-clause BSD](https://github.com/johnkerl/miller/blob/master/LICENSE.txt).
* [Build information including dependencies](http://johnkerl.org/miller/doc/build.html)
* [Notes about issue-labeling in the Github repo](https://github.com/johnkerl/miller/wiki/Issue-labeling)
* [Active issues](https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)

1235
aclocal.m4 vendored Normal file

File diff suppressed because it is too large Load diff

33
appveyor.yml Normal file
View file

@ -0,0 +1,33 @@
install:
- set PATH=C:\msys64\usr\bin;%PATH%
- bash -lc "curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
- bash -lc "curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz.sig"
- bash -lc "pacman-key --init"
- bash -lc "pacman-key --populate msys2"
- bash -lc "pacman-key --verify msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz.sig"
- bash -lc "pacman --ask 20 -U msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
- ps: Get-Process | Where-Object {$_.path -like 'C:\msys64*'} | Stop-Process
- bash -lc "pacman --needed --ask 20 -Syu"
- bash -lc "pacman --needed --ask 20 -Sy"
- bash -lc "pacman --noconfirm -S base-devel"
- bash -lc "pacman --noconfirm -S msys2-devel"
- bash -lc "pacman --noconfirm -S mingw-w64-x86_64-toolchain"
- bash -lc "pacman --noconfirm -S mingw-w64-x86_64-pcre"
- bash -lc "pacman --noconfirm -S msys2-runtime"
- bash -lc "pacman --noconfirm -S isl"
build_script:
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; pwd"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; ls -l"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; which gcc"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; gcc --version"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; pacman -Q"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; cat msys2-build.sh"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; bash -x msys2-build.sh || (cat config.log && exit 1)"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER; cp /usr/bin/msys-2.0.dll ."
artifacts:
- path: ./c/mlr.exe
name: Miller executable
- path: ./msys-2.0.dll
name: MSYS2 DLL

270
autotools/ar-lib Executable file
View file

@ -0,0 +1,270 @@
#! /bin/sh
# Wrapper for Microsoft lib.exe
me=ar-lib
scriptversion=2012-03-01.08; # UTC
# Copyright (C) 2010-2017 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# func_error message
func_error ()
{
echo "$me: $1" 1>&2
exit 1
}
file_conv=
# func_file_conv build_file
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv in
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_at_file at_file operation archive
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
# for each of them.
# When interpreting the content of the @FILE, do NOT use func_file_conv,
# since the user would need to supply preconverted file names to
# binutils ar, at least for MinGW.
func_at_file ()
{
operation=$2
archive=$3
at_file_contents=`cat "$1"`
eval set x "$at_file_contents"
shift
for member
do
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
done
}
case $1 in
'')
func_error "no command. Try '$0 --help' for more information."
;;
-h | --h*)
cat <<EOF
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
Members may be specified in a file named with @FILE.
EOF
exit $?
;;
-v | --v*)
echo "$me, version $scriptversion"
exit $?
;;
esac
if test $# -lt 3; then
func_error "you must specify a program, an action and an archive"
fi
AR=$1
shift
while :
do
if test $# -lt 2; then
func_error "you must specify a program, an action and an archive"
fi
case $1 in
-lib | -LIB \
| -ltcg | -LTCG \
| -machine* | -MACHINE* \
| -subsystem* | -SUBSYSTEM* \
| -verbose | -VERBOSE \
| -wx* | -WX* )
AR="$AR $1"
shift
;;
*)
action=$1
shift
break
;;
esac
done
orig_archive=$1
shift
func_file_conv "$orig_archive"
archive=$file
# strip leading dash in $action
action=${action#-}
delete=
extract=
list=
quick=
replace=
index=
create=
while test -n "$action"
do
case $action in
d*) delete=yes ;;
x*) extract=yes ;;
t*) list=yes ;;
q*) quick=yes ;;
r*) replace=yes ;;
s*) index=yes ;;
S*) ;; # the index is always updated implicitly
c*) create=yes ;;
u*) ;; # TODO: don't ignore the update modifier
v*) ;; # TODO: don't ignore the verbose modifier
*)
func_error "unknown action specified"
;;
esac
action=${action#?}
done
case $delete$extract$list$quick$replace,$index in
yes,* | ,yes)
;;
yesyes*)
func_error "more than one action specified"
;;
*)
func_error "no action specified"
;;
esac
if test -n "$delete"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
for member
do
case $1 in
@*)
func_at_file "${1#@}" -REMOVE "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
;;
esac
done
elif test -n "$extract"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
if test $# -gt 0; then
for member
do
case $1 in
@*)
func_at_file "${1#@}" -EXTRACT "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
;;
esac
done
else
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
do
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
done
fi
elif test -n "$quick$replace"; then
if test ! -f "$orig_archive"; then
if test -z "$create"; then
echo "$me: creating $orig_archive"
fi
orig_archive=
else
orig_archive=$archive
fi
for member
do
case $1 in
@*)
func_file_conv "${1#@}"
set x "$@" "@$file"
;;
*)
func_file_conv "$1"
set x "$@" "$file"
;;
esac
shift
shift
done
if test -n "$orig_archive"; then
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
else
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
fi
elif test -n "$list"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
$AR -NOLOGO -LIST "$archive" || exit $?
fi

347
autotools/compile Executable file
View file

@ -0,0 +1,347 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2012-10-14.11; # UTC
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

1480
autotools/config.guess vendored Executable file

File diff suppressed because it is too large Load diff

1801
autotools/config.sub vendored Executable file

File diff suppressed because it is too large Load diff

791
autotools/depcomp Executable file
View file

@ -0,0 +1,791 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2016-01-11.22; # UTC
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
aix_post_process_depfile
;;
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

508
autotools/install-sh Executable file
View file

@ -0,0 +1,508 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2014-09-12.12; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# 'make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
tab=' '
nl='
'
IFS=" $tab$nl"
# Set DOITPROG to "echo" to test this script.
doit=${DOITPROG-}
doit_exec=${doit:-exec}
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
is_target_a_directory=possibly
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t)
is_target_a_directory=always
dst_arg=$2
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
shift;;
-T) is_target_a_directory=never;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
# We allow the use of options -d and -T together, by making -d
# take the precedence; this is for compatibility with GNU install.
if test -n "$dir_arg"; then
if test -n "$dst_arg"; then
echo "$0: target directory not allowed when installing a directory." >&2
exit 1
fi
fi
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call 'install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
if test $# -gt 1 || test "$is_target_a_directory" = always; then
if test ! -d "$dst_arg"; then
echo "$0: $dst_arg: Is not a directory." >&2
exit 1
fi
fi
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names problematic for 'test' and other utilities.
case $src in
-* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test "$is_target_a_directory" = never; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
dstdir=`dirname "$dst"`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
# $RANDOM is not portable (e.g. dash); use it when possible to
# lower collision chance
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
# As "mkdir -p" follows symlinks and we work in /tmp possibly; so
# create the $tmpdir first (and fail if unsuccessful) to make sure
# that nobody tries to guess the $tmpdir name.
if (umask $mkdir_umask &&
$mkdirprog $mkdir_mode "$tmpdir" &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
test_tmpdir="$tmpdir/a"
ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
[-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
oIFS=$IFS
IFS=/
set -f
set fnord $dstdir
shift
set +f
IFS=$oIFS
prefixes=
for d
do
test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

11156
autotools/ltmain.sh Normal file

File diff suppressed because it is too large Load diff

215
autotools/missing Executable file
View file

@ -0,0 +1,215 @@
#! /bin/sh
# Common wrapper for a few potentially missing GNU programs.
scriptversion=2013-10-28.13; # UTC
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try '$0 --help' for more information"
exit 1
fi
case $1 in
--is-lightweight)
# Used by our autoconf macros to check whether the available missing
# script is modern enough.
exit 0
;;
--run)
# Back-compat with the calling convention used by older automake.
shift
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
to PROGRAM being missing or too old.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
Supported PROGRAM values:
aclocal autoconf autoheader autom4te automake makeinfo
bison yacc flex lex help2man
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
'g' are ignored when checking the name.
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: unknown '$1' option"
echo 1>&2 "Try '$0 --help' for more information"
exit 1
;;
esac
# Run the given program, remember its exit status.
"$@"; st=$?
# If it succeeded, we are done.
test $st -eq 0 && exit 0
# Also exit now if we it failed (or wasn't found), and '--version' was
# passed; such an option is passed most likely to detect whether the
# program is present and works.
case $2 in --version|--help) exit $st;; esac
# Exit code 63 means version mismatch. This often happens when the user
# tries to use an ancient version of a tool on a file that requires a
# minimum version.
if test $st -eq 63; then
msg="probably too old"
elif test $st -eq 127; then
# Program was missing.
msg="missing on your system"
else
# Program was found and executed, but failed. Give up.
exit $st
fi
perl_URL=http://www.perl.org/
flex_URL=http://flex.sourceforge.net/
gnu_software_URL=http://www.gnu.org/software
program_details ()
{
case $1 in
aclocal|automake)
echo "The '$1' program is part of the GNU Automake package:"
echo "<$gnu_software_URL/automake>"
echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/autoconf>"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
autoconf|autom4te|autoheader)
echo "The '$1' program is part of the GNU Autoconf package:"
echo "<$gnu_software_URL/autoconf/>"
echo "It also requires GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
esac
}
give_advice ()
{
# Normalize program name to check for.
normalized_program=`echo "$1" | sed '
s/^gnu-//; t
s/^gnu//; t
s/^g//; t'`
printf '%s\n' "'$1' is $msg."
configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
case $normalized_program in
autoconf*)
echo "You should only need it if you modified 'configure.ac',"
echo "or m4 files included by it."
program_details 'autoconf'
;;
autoheader*)
echo "You should only need it if you modified 'acconfig.h' or"
echo "$configure_deps."
program_details 'autoheader'
;;
automake*)
echo "You should only need it if you modified 'Makefile.am' or"
echo "$configure_deps."
program_details 'automake'
;;
aclocal*)
echo "You should only need it if you modified 'acinclude.m4' or"
echo "$configure_deps."
program_details 'aclocal'
;;
autom4te*)
echo "You might have modified some maintainer files that require"
echo "the 'autom4te' program to be rebuilt."
program_details 'autom4te'
;;
bison*|yacc*)
echo "You should only need it if you modified a '.y' file."
echo "You may want to install the GNU Bison package:"
echo "<$gnu_software_URL/bison/>"
;;
lex*|flex*)
echo "You should only need it if you modified a '.l' file."
echo "You may want to install the Fast Lexical Analyzer package:"
echo "<$flex_URL>"
;;
help2man*)
echo "You should only need it if you modified a dependency" \
"of a man page."
echo "You may want to install the GNU Help2man package:"
echo "<$gnu_software_URL/help2man/>"
;;
makeinfo*)
echo "You should only need it if you modified a '.texi' file, or"
echo "any other file indirectly affecting the aspect of the manual."
echo "You might want to install the Texinfo package:"
echo "<$gnu_software_URL/texinfo/>"
echo "The spurious makeinfo call might also be the consequence of"
echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
echo "want to install GNU make:"
echo "<$gnu_software_URL/make/>"
;;
*)
echo "You might have modified some files without having the proper"
echo "tools for further handling them. Check the 'README' file, it"
echo "often tells you about the needed prerequisites for installing"
echo "this package. You may also peek at any GNU archive site, in"
echo "case some other package contains this missing '$1' program."
;;
esac
}
give_advice "$1" | sed -e '1s/^/WARNING: /' \
-e '2,$s/^/ /' >&2
# Propagate the correct exit status (expected to be 127 for a program
# not found, 63 for a program that failed due to version mismatch).
exit $st
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

148
autotools/test-driver Executable file
View file

@ -0,0 +1,148 @@
#! /bin/sh
# test-driver - basic testsuite driver script.
scriptversion=2013-07-13.22; # UTC
# Copyright (C) 2011-2014 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# Make unconditional expansion of undefined variables an error. This
# helps a lot in preventing typo-related bugs.
set -u
usage_error ()
{
echo "$0: $*" >&2
print_usage >&2
exit 2
}
print_usage ()
{
cat <<END
Usage:
test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
[--expect-failure={yes|no}] [--color-tests={yes|no}]
[--enable-hard-errors={yes|no}] [--]
TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS]
The '--test-name', '--log-file' and '--trs-file' options are mandatory.
END
}
test_name= # Used for reporting.
log_file= # Where to save the output of the test script.
trs_file= # Where to save the metadata of the test run.
expect_failure=no
color_tests=no
enable_hard_errors=yes
while test $# -gt 0; do
case $1 in
--help) print_usage; exit $?;;
--version) echo "test-driver $scriptversion"; exit $?;;
--test-name) test_name=$2; shift;;
--log-file) log_file=$2; shift;;
--trs-file) trs_file=$2; shift;;
--color-tests) color_tests=$2; shift;;
--expect-failure) expect_failure=$2; shift;;
--enable-hard-errors) enable_hard_errors=$2; shift;;
--) shift; break;;
-*) usage_error "invalid option: '$1'";;
*) break;;
esac
shift
done
missing_opts=
test x"$test_name" = x && missing_opts="$missing_opts --test-name"
test x"$log_file" = x && missing_opts="$missing_opts --log-file"
test x"$trs_file" = x && missing_opts="$missing_opts --trs-file"
if test x"$missing_opts" != x; then
usage_error "the following mandatory options are missing:$missing_opts"
fi
if test $# -eq 0; then
usage_error "missing argument"
fi
if test $color_tests = yes; then
# Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
red='' # Red.
grn='' # Green.
lgn='' # Light green.
blu='' # Blue.
mgn='' # Magenta.
std='' # No color.
else
red= grn= lgn= blu= mgn= std=
fi
do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
trap "st=129; $do_exit" 1
trap "st=130; $do_exit" 2
trap "st=141; $do_exit" 13
trap "st=143; $do_exit" 15
# Test script is run here.
"$@" >$log_file 2>&1
estatus=$?
if test $enable_hard_errors = no && test $estatus -eq 99; then
tweaked_estatus=1
else
tweaked_estatus=$estatus
fi
case $tweaked_estatus:$expect_failure in
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
0:*) col=$grn res=PASS recheck=no gcopy=no;;
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
*:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
esac
# Report the test outcome and exit status in the logs, so that one can
# know whether the test passed or failed simply by looking at the '.log'
# file, without the need of also peaking into the corresponding '.trs'
# file (automake bug#11814).
echo "$res $test_name (exit status: $estatus)" >>$log_file
# Report outcome to console.
echo "${col}${res}${std}: $test_name"
# Register the test result, and other relevant metadata.
echo ":test-result: $res" > $trs_file
echo ":global-test-result: $res" >> $trs_file
echo ":recheck: $recheck" >> $trs_file
echo ":copy-in-global-log: $gcopy" >> $trs_file
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

15
c/.gitignore vendored Normal file
View file

@ -0,0 +1,15 @@
x
y
s
tt
tt-000-3.1.0
tt-001-rslls
tti
gcm.txt
reg_test/runex
reg_test/runv
timings.txt
tbin-1
tbin-2
tbin-3
stars.txt

5
c/.vimrc Normal file
View file

@ -0,0 +1,5 @@
map \r :w<C-m>:!bake install<C-m>
map \f :w<C-m>:!bake<C-m>
map \v :w<C-m>:!bake mlr<C-m>
set tw=120
set ts=4

72
c/Makefile.am Normal file
View file

@ -0,0 +1,72 @@
SUBDIRS= lib cli stream input dsl mapping containers output parsing auxents experimental . unit_test reg_test
AM_CPPFLAGS= -I${srcdir}
AM_CFLAGS= -Wall -std=gnu99
bin_PROGRAMS= mlr
noinst_PROGRAMS= mlrg
mlr_SOURCES= mlrmain.c
mlr_LDFLAGS= -static
mlr_LDADD= \
cli/libcli.la \
containers/libcontainers.la \
stream/libstream.la \
input/libinput.la \
dsl/libdsl.la \
mapping/libmapping.la \
output/liboutput.la \
lib/libmlr.la \
parsing/libdsl.la \
auxents/libauxents.la \
-lm
# Resulting link line:
# /bin/sh ../libtool --tag=CC --mode=link
# gcc
# -Wall -std=gnu99
# -g -O2
# <<<LDFLAGS GO HERE>>> <-------------------------------------
# -o mlr
# mlrmain.o
# cli/libcli.la
# containers/libcontainers.la
# stream/libstream.la
# input/libinput.la
# dsl/libdsl.la
# mapping/libmapping.la
# output/liboutput.la
# lib/libmlr.la
# parsing/libdsl.la
# auxents/libauxents.la
# <<<LDADD GOES HERE>>> <-------------------------------------
# Other executable variants
# Debug version:
mlrg_CFLAGS= -g ${AM_CFLAGS}
mlrg_LDFLAGS= ${mlr_LDFLAGS}
mlrg_LDADD= ${mlr_LDADD}
mlrg_SOURCES= ${mlr_SOURCES}
# Profile version. Usage:
# * make mlrp
# * mlrp {arguments>
# * gprof mlrp gmon.out > myfile.txt
# Note: works on Linux; not on OSX. On FreeBSD it sounds like it'll need an
# extra -lc on the link line.
mlrp_CFLAGS= -g -pg ${AM_CFLAGS}
mlrp_LDFLAGS= ${mlr_LDFLAGS}
mlrp_LDADD= ${mlr_LDADD}
mlrp_SOURCES= ${mlr_SOURCES}
# ================================================================
# Run this after unit-test expected output has changed, and is verified to be
# OK. (Example: after adding new test cases in test/run.)
regtest-copy:
cp output/out reg_test/expected
# ================================================================
perfclean profclean:
@rm -vf gmon.out perf.data perf.data.old

858
c/Makefile.in Normal file
View file

@ -0,0 +1,858 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
bin_PROGRAMS = mlr$(EXEEXT)
noinst_PROGRAMS = mlrg$(EXEEXT)
subdir = c
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
am_mlr_OBJECTS = mlrmain.$(OBJEXT)
mlr_OBJECTS = $(am_mlr_OBJECTS)
mlr_DEPENDENCIES = cli/libcli.la containers/libcontainers.la \
stream/libstream.la input/libinput.la dsl/libdsl.la \
mapping/libmapping.la output/liboutput.la lib/libmlr.la \
parsing/libdsl.la auxents/libauxents.la
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
mlr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(mlr_LDFLAGS) $(LDFLAGS) -o $@
am__objects_1 = mlrg-mlrmain.$(OBJEXT)
am_mlrg_OBJECTS = $(am__objects_1)
mlrg_OBJECTS = $(am_mlrg_OBJECTS)
am__DEPENDENCIES_1 = cli/libcli.la containers/libcontainers.la \
stream/libstream.la input/libinput.la dsl/libdsl.la \
mapping/libmapping.la output/liboutput.la lib/libmlr.la \
parsing/libdsl.la auxents/libauxents.la
mlrg_DEPENDENCIES = $(am__DEPENDENCIES_1)
mlrg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(mlrg_CFLAGS) $(CFLAGS) \
$(mlrg_LDFLAGS) $(LDFLAGS) -o $@
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/autotools/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(mlr_SOURCES) $(mlrg_SOURCES)
DIST_SOURCES = $(mlr_SOURCES) $(mlrg_SOURCES)
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in \
$(top_srcdir)/autotools/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
SUBDIRS = lib cli stream input dsl mapping containers output parsing auxents experimental . unit_test reg_test
AM_CPPFLAGS = -I${srcdir}
AM_CFLAGS = -Wall -std=gnu99
mlr_SOURCES = mlrmain.c
mlr_LDFLAGS = -static
mlr_LDADD = \
cli/libcli.la \
containers/libcontainers.la \
stream/libstream.la \
input/libinput.la \
dsl/libdsl.la \
mapping/libmapping.la \
output/liboutput.la \
lib/libmlr.la \
parsing/libdsl.la \
auxents/libauxents.la \
-lm
# Resulting link line:
# /bin/sh ../libtool --tag=CC --mode=link
# gcc
# -Wall -std=gnu99
# -g -O2
# <<<LDFLAGS GO HERE>>> <-------------------------------------
# -o mlr
# mlrmain.o
# cli/libcli.la
# containers/libcontainers.la
# stream/libstream.la
# input/libinput.la
# dsl/libdsl.la
# mapping/libmapping.la
# output/liboutput.la
# lib/libmlr.la
# parsing/libdsl.la
# auxents/libauxents.la
# <<<LDADD GOES HERE>>> <-------------------------------------
# Other executable variants
# Debug version:
mlrg_CFLAGS = -g ${AM_CFLAGS}
mlrg_LDFLAGS = ${mlr_LDFLAGS}
mlrg_LDADD = ${mlr_LDADD}
mlrg_SOURCES = ${mlr_SOURCES}
# Profile version. Usage:
# * make mlrp
# * mlrp {arguments>
# * gprof mlrp gmon.out > myfile.txt
# Note: works on Linux; not on OSX. On FreeBSD it sounds like it'll need an
# extra -lc on the link line.
mlrp_CFLAGS = -g -pg ${AM_CFLAGS}
mlrp_LDFLAGS = ${mlr_LDFLAGS}
mlrp_LDADD = ${mlr_LDADD}
mlrp_SOURCES = ${mlr_SOURCES}
all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu c/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu c/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-binPROGRAMS: $(bin_PROGRAMS)
@$(NORMAL_INSTALL)
@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
if test -n "$$list"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
$(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
fi; \
for p in $$list; do echo "$$p $$p"; done | \
sed 's/$(EXEEXT)$$//' | \
while read p p1; do if test -f $$p \
|| test -f $$p1 \
; then echo "$$p"; echo "$$p"; else :; fi; \
done | \
sed -e 'p;s,.*/,,;n;h' \
-e 's|.*|.|' \
-e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
sed 'N;N;N;s,\n, ,g' | \
$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
{ d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
if ($$2 == $$4) files[d] = files[d] " " $$1; \
else { print "f", $$3 "/" $$4, $$1; } } \
END { for (d in files) print "f", d, files[d] }' | \
while read type dir files; do \
if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
test -z "$$files" || { \
echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
$(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
} \
; done
uninstall-binPROGRAMS:
@$(NORMAL_UNINSTALL)
@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
files=`for p in $$list; do echo "$$p"; done | \
sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
-e 's/$$/$(EXEEXT)/' \
`; \
test -n "$$list" || exit 0; \
echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(bindir)" && rm -f $$files
clean-binPROGRAMS:
@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
echo " rm -f" $$list; \
rm -f $$list || exit $$?; \
test -n "$(EXEEXT)" || exit 0; \
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
clean-noinstPROGRAMS:
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
echo " rm -f" $$list; \
rm -f $$list || exit $$?; \
test -n "$(EXEEXT)" || exit 0; \
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
mlr$(EXEEXT): $(mlr_OBJECTS) $(mlr_DEPENDENCIES) $(EXTRA_mlr_DEPENDENCIES)
@rm -f mlr$(EXEEXT)
$(AM_V_CCLD)$(mlr_LINK) $(mlr_OBJECTS) $(mlr_LDADD) $(LIBS)
mlrg$(EXEEXT): $(mlrg_OBJECTS) $(mlrg_DEPENDENCIES) $(EXTRA_mlrg_DEPENDENCIES)
@rm -f mlrg$(EXEEXT)
$(AM_V_CCLD)$(mlrg_LINK) $(mlrg_OBJECTS) $(mlrg_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mlrg-mlrmain.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mlrmain.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mlrg-mlrmain.o: mlrmain.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mlrg_CFLAGS) $(CFLAGS) -MT mlrg-mlrmain.o -MD -MP -MF $(DEPDIR)/mlrg-mlrmain.Tpo -c -o mlrg-mlrmain.o `test -f 'mlrmain.c' || echo '$(srcdir)/'`mlrmain.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mlrg-mlrmain.Tpo $(DEPDIR)/mlrg-mlrmain.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlrmain.c' object='mlrg-mlrmain.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mlrg_CFLAGS) $(CFLAGS) -c -o mlrg-mlrmain.o `test -f 'mlrmain.c' || echo '$(srcdir)/'`mlrmain.c
mlrg-mlrmain.obj: mlrmain.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mlrg_CFLAGS) $(CFLAGS) -MT mlrg-mlrmain.obj -MD -MP -MF $(DEPDIR)/mlrg-mlrmain.Tpo -c -o mlrg-mlrmain.obj `if test -f 'mlrmain.c'; then $(CYGPATH_W) 'mlrmain.c'; else $(CYGPATH_W) '$(srcdir)/mlrmain.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mlrg-mlrmain.Tpo $(DEPDIR)/mlrg-mlrmain.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlrmain.c' object='mlrg-mlrmain.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mlrg_CFLAGS) $(CFLAGS) -c -o mlrg-mlrmain.obj `if test -f 'mlrmain.c'; then $(CYGPATH_W) 'mlrmain.c'; else $(CYGPATH_W) '$(srcdir)/mlrmain.c'; fi`
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile $(PROGRAMS)
installdirs: installdirs-recursive
installdirs-am:
for dir in "$(DESTDIR)$(bindir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-binPROGRAMS clean-generic clean-libtool \
clean-noinstPROGRAMS mostlyclean-am
distclean: distclean-recursive
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am: install-binPROGRAMS
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am: uninstall-binPROGRAMS
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-binPROGRAMS clean-generic clean-libtool \
clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-binPROGRAMS install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
.PRECIOUS: Makefile
# ================================================================
# Run this after unit-test expected output has changed, and is verified to be
# OK. (Example: after adding new test cases in test/run.)
regtest-copy:
cp output/out reg_test/expected
# ================================================================
perfclean profclean:
@rm -vf gmon.out perf.data perf.data.old
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

543
c/Makefile.no-autoconfig Normal file
View file

@ -0,0 +1,543 @@
# ================================================================
# NOTE: This makefile is not intended to be used in a packaging system --
# rather, Miller uses autconfig for that. This makefile is intended for users
# who prefer (for whatever reason) to bypass autoconfig. Please also see
# http://johnkerl.org/miller/doc/build.html#Without_using_autoconfig
# ================================================================
# ================================================================
# Travis does "export CC=gcc", "export CC=clang" so we can pick those up via
# "make -e" in ../.travis.yml. Note that "CC?=gcc", without make -e, results
# in CC being expanded to cc on my OSX laptop, which is not OK. Hence make -e.
CC=gcc
CFLAGS=-std=gnu99
IFLAGS=-I. -I..
WFLAGS=-Wall -Werror
# Worth exploring ... but needs handling for unused parameters in functions which comply with interfaces.
# Best option I'm aware of is to replace "void foo(int bar) {...}" with "void foo(int) {...}" throughout.
# WFLAGS=-Wall -Wextra -Werror
# WFLAGS=-Wall -Wextra -pedantic-errors -Werror
# WFLAGS=-Wall -Wextra -pedantic-errors -Werror=unused-variable
LFLAGS=-lm
# You can do make -e INSTALLDIR=/path/to/somewhere/else/bin
INSTALLDIR=/usr/local/bin
CCOPT=$(CC) $(CFLAGS) $(IFLAGS) $(WFLAGS) -O3
CCDEBUG=$(CC) -g $(CFLAGS) $(IFLAGS) $(WFLAGS)
CCASAN=clang -fsanitize=address -g $(CFLAGS) $(IFLAGS) $(WFLAGS)
# clang ASAN. Use -O1 for debug mode to (among other things) disable inlining.
#CCOPT=clang -fsanitize=address -fno-omit-frame-pointer $(CFLAGS) $(IFLAGS) $(WFLAGS)
#CCDEBUG=clang -g -fsanitize=address -fno-omit-frame-pointer $(CFLAGS) $(IFLAGS) $(WFLAGS)
# ----------------------------------------------------------------
# Miller source except DSL
NON_DSL_SRCS = \
*.c \
cli/*.c \
lib/*.c \
containers/*.c \
auxents/*.c \
stream/*.c \
input/*.c \
dsl/*.c \
mapping/*.c \
output/*.c
# DSL
DSL_OBJS = \
./parsing/mlr_dsl_parse.o \
./parsing/mlr_dsl_lexer.o \
./parsing/mlr_dsl_wrapper.o
# Unit-test code
TEST_ARGPARSE_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
cli/argparse.c \
containers/slls.c \
containers/sllv.c \
lib/string_array.c \
unit_test/test_argparse.c
TEST_BYTE_READERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/string_byte_reader.c \
input/stdio_byte_reader.c \
unit_test/test_byte_readers.c
TEST_LINE_READERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/context.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/line_readers.c \
unit_test/test_line_readers.c
TEST_PEEK_FILE_READER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/string_byte_reader.c \
input/peek_file_reader.c \
unit_test/test_peek_file_reader.c
TEST_LREC_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/context.c \
lib/mlrdatetime.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
lib/string_array.c \
lib/mlrval.c \
lib/mvfuncs.c \
containers/hss.c \
containers/lrec.c \
containers/header_keeper.c \
containers/sllv.c \
containers/slls.c \
containers/rslls.c \
containers/lhmsv.c \
containers/lhmslv.c \
containers/sllmv.c \
containers/mlhmmv.c \
input/line_readers.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
unit_test/test_lrec.c
TEST_MULTIPLE_CONTAINERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/context.c \
lib/mlrdatetime.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
containers/lrec.c \
containers/header_keeper.c \
containers/sllv.c \
containers/slls.c \
containers/rslls.c \
lib/string_array.c \
containers/hss.c \
lib/mlrval.c \
lib/mvfuncs.c \
containers/lhmsi.c \
containers/lhmsll.c \
containers/lhmss.c \
containers/lhmsv.c \
containers/lhms2v.c \
containers/lhmslv.c \
containers/lhmsmv.c \
containers/loop_stack.c \
containers/percentile_keeper.c \
containers/top_keeper.c \
containers/dheap.c \
input/line_readers.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
unit_test/test_multiple_containers.c
TEST_MLHMMV_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrdatetime.c \
lib/string_builder.c \
lib/string_array.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/mlrval.c \
lib/mvfuncs.c \
containers/hss.c \
containers/mlhmmv.c \
containers/sllmv.c \
containers/sllv.c \
containers/slls.c \
containers/lrec.c \
unit_test/test_mlhmmv.c
TEST_MLRUTIL_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/string_builder.c \
unit_test/test_mlrutil.c
TEST_MLRREGEX_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrregex.c \
lib/string_builder.c \
lib/string_array.c \
unit_test/test_mlrregex.c
TEST_STRING_BUILDER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlr_globals.c \
lib/string_builder.c \
unit_test/test_string_builder.c
TEST_PARSE_TRIE_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlr_globals.c \
lib/string_builder.c \
containers/parse_trie.c \
unit_test/test_parse_trie.c
TEST_RVAL_EVALUATORS_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrdatetime.c \
lib/mlrregex.c \
lib/mlrmath.c \
lib/string_builder.c \
lib/string_array.c \
lib/mlrval.c \
lib/mvfuncs.c \
containers/xvfuncs.c \
containers/sllv.c \
containers/slls.c \
containers/sllmv.c \
containers/lrec.c \
containers/lhmsv.c \
containers/lhmsi.c \
containers/lhmsll.c \
containers/mlhmmv.c \
containers/lhmsmv.c \
containers/lhmss.c \
containers/hss.c \
containers/mixutil.c \
containers/loop_stack.c \
containers/local_stack.c \
containers/type_decl.c \
dsl/mlr_dsl_ast.c \
dsl/function_manager.c \
dsl/keylist_evaluators.c \
dsl/rval_expr_evaluators.c \
dsl/rxval_expr_evaluators.c \
dsl/rval_func_evaluators.c \
dsl/rxval_func_evaluators.c \
dsl/rval_list_evaluators.c \
dsl/mlr_dsl_stack_allocate.c \
dsl/mlr_dsl_blocked_ast.c \
dsl/mlr_dsl_cst.c \
dsl/mlr_dsl_cst_condish_statements.c \
dsl/mlr_dsl_cst_for_map_statements.c \
dsl/mlr_dsl_cst_for_srec_statements.c \
dsl/mlr_dsl_cst_func_subr.c \
dsl/mlr_dsl_cst_keywords.c \
dsl/mlr_dsl_cst_loop_control_statements.c \
dsl/mlr_dsl_cst_map_assignment_statements.c \
dsl/mlr_dsl_cst_output_statements.c \
dsl/mlr_dsl_cst_return_statements.c \
dsl/mlr_dsl_cst_scalar_assignment_statements.c \
dsl/mlr_dsl_cst_statements.c \
dsl/mlr_dsl_cst_triple_for_statements.c \
dsl/mlr_dsl_cst_unset_statements.c \
output/lrec_writer_csv.c \
output/lrec_writer_csvlite.c \
output/lrec_writer_dkvp.c \
output/lrec_writer_json.c \
output/lrec_writer_markdown.c \
output/lrec_writer_nidx.c \
output/lrec_writer_pprint.c \
output/lrec_writer_xtab.c \
output/lrec_writers.c \
output/multi_lrec_writer.c \
output/multi_out.c \
unit_test/test_rval_evaluators.c
TEST_JOIN_BUCKET_KEEPER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/mlr_globals.c \
lib/string_builder.c \
lib/mlrregex.c \
lib/context.c \
lib/string_array.c \
containers/parse_trie.c \
containers/lrec.c \
containers/sllv.c \
containers/rslls.c \
containers/slls.c \
containers/lhmslv.c \
containers/lhmss.c \
containers/hss.c \
containers/mixutil.c \
containers/header_keeper.c \
containers/join_bucket_keeper.c \
input/stdio_byte_reader.c \
input/line_readers.c \
input/lrec_reader_gen.c \
input/lrec_reader_in_memory.c \
input/lrec_readers.c \
input/lrec_reader_stdio_csv.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/peek_file_reader.c \
unit_test/test_join_bucket_keeper.c
EXPERIMENTAL_READER_SRCS = \
lib/mlrutil.c \
lib/mlrdatetime.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
lib/context.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_array.c \
lib/string_builder.c \
input/stdio_byte_reader.c \
input/line_readers.c \
containers/parse_trie.c \
experimental/getlines.c
EXPERIMENTAL_JSON_VG_MEM_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/nlnet_timegm.c \
lib/netbsd_strptime.c \
lib/mtrand.c \
input/json_parser.c \
experimental/json_vg_mem.c
# ================================================================
# User-make: creates the executable and runs unit & regression tests
# This is the default target for anyone pulling the repo and trying to
# build it to be able to use it. It just needs flex and the C compiler.
#top: mlr tests
# xxx temp:
top: mlr mlrg tests
install: mlr tests
mkdir -p $(INSTALLDIR)
cp mlr $(INSTALLDIR)
installhome: mlr tests
mkdir -p $(HOME)/bin
cp mlr $(HOME)/bin
# ================================================================
tags: .always
ctags -R .
mlr: .always parsing
$(CCOPT) $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlr
# On x86_64/Centos I had to first do
# sudo yum install glibc-devel glibc-static
mlr.static: .always parsing
$(CCOPT) -static $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlr.static
parsing: .always
make -C parsing -f Makefile.no-autoconfig mlr_dsl_parse.o
make -C parsing -f Makefile.no-autoconfig mlr_dsl_lexer.o
make -C parsing -f Makefile.no-autoconfig mlr_dsl_wrapper.o
two: mlr mlrg
# ----------------------------------------------------------------
# Other executable variants
# Debug version
mlrg: .always parsing
$(CCDEBUG) $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrg
# Debug version with local-stack verbosity
mlrt: .always parsing
$(CCDEBUG) -DLOCAL_STACK_TRACE_ENABLE -DLOCAL_STACK_BOUNDS_CHECK_ENABLE \
$(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrt
# Profile version. Usage:
# * make mlrp
# * mlrp {arguments>
# * gprof mlrp gmon.out > myfile.txt
# Note: works on Linux; not on OSX.
mlrp: .always parsing
$(CCDEBUG) -g -pg $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrp
# ASAN version
mlra: .always parsing
$(CCASAN) $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlra
# ================================================================
tests: unit-test reg-test
unit-test: test-mlrutil test-mlrregex test-argparse test-line-readers test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-mlhmmv test-string-builder test-rval-evaluators test-join-bucket-keeper
./test-mlrutil
./test-mlrregex
./test-argparse
./test-line-readers
./test-byte-readers
./test-peek-file-reader
./test-parse-trie
./test-lrec
./test-multiple-containers
./test-mlhmmv
./test-string-builder
./test-rval-evaluators
./test-join-bucket-keeper
@echo
@echo DONE
reg-test:
./reg_test/run
# ----------------------------------------------------------------
# Run this after unit-test expected output has changed, and is verified to be
# OK. (Example: after adding new test cases in test/run.)
regtest-copy taters:
cp output-regtest/out reg_test/expected
regtest-copy-dev:
cp output-regtest/out-dev reg_test/expected
# ----------------------------------------------------------------
# Unit-test executables
test-argparse: .always
$(CCDEBUG) $(TEST_ARGPARSE_SRCS) $(LFLAGS) -o test-argparse
test-byte-readers: .always
$(CCDEBUG) $(TEST_BYTE_READERS_SRCS) $(LFLAGS) -o test-byte-readers
test-line-readers: .always
$(CCDEBUG) $(TEST_LINE_READERS_SRCS) $(LFLAGS) -o test-line-readers
test-peek-file-reader: .always
$(CCDEBUG) $(TEST_PEEK_FILE_READER_SRCS) $(LFLAGS) -o test-peek-file-reader
test-lrec: .always
$(CCDEBUG) $(TEST_LREC_SRCS) $(LFLAGS) -o test-lrec -lm
test-multiple-containers: .always
$(CCDEBUG) $(TEST_MULTIPLE_CONTAINERS_SRCS) $(LFLAGS) -o test-multiple-containers -lm
test-mlhmmv: .always
$(CCDEBUG) $(TEST_MLHMMV_SRCS) $(LFLAGS) -o test-mlhmmv -lm
test-mlrutil: .always
$(CCDEBUG) $(TEST_MLRUTIL_SRCS) $(LFLAGS) -o test-mlrutil -lm
test-mlrregex: .always
$(CCDEBUG) $(TEST_MLRREGEX_SRCS) $(LFLAGS) -o test-mlrregex
test-string-builder: .always
$(CCDEBUG) $(TEST_STRING_BUILDER_SRCS) $(LFLAGS) -o test-string-builder
test-parse-trie: .always
$(CCDEBUG) $(TEST_PARSE_TRIE_SRCS) $(LFLAGS) -o test-parse-trie
test-rval-evaluators: .always
$(CCDEBUG) $(TEST_RVAL_EVALUATORS_SRCS) $(LFLAGS) -o test-rval-evaluators -lm
test-join-bucket-keeper: .always
$(CCDEBUG) $(TEST_JOIN_BUCKET_KEEPER_SRCS) $(LFLAGS) -o test-join-bucket-keeper -lm
# ----------------------------------------------------------------
# Standalone mains
getl: .always
$(CCOPT) $(EXPERIMENTAL_READER_SRCS) $(LFLAGS) -o getl
json-vg-mem: .always
$(CCDEBUG) $(EXPERIMENTAL_JSON_VG_MEM_SRCS) $(LFLAGS) -o json-vg-mem
# ================================================================
# BSD can't handle rm -v, alas
clean:
@rm -f mlr mlrd mlrg mlrp tester
@make -C parsing -f Makefile.no-autoconfig clean
perfclean profclean:
@rm -f gmon.out perf.data perf.data.old
.always:
@true

499
c/Makefile.windows Normal file
View file

@ -0,0 +1,499 @@
# ================================================================
# NOTE: This makefile is not intended to be used in a packaging system --
# rather, Miller uses autconfig for that. This makefile is intended for users
# who prefer (for whatever reason) to bypass autoconfig. Please also see
# http://johnkerl.org/miller/doc/build.html#Without_using_autoconfig
# ================================================================
# ================================================================
# Travis does "export CC=gcc", "export CC=clang" so we can pick those up via
# "make -e" in ../.travis.yml. Note that "CC?=gcc", without make -e, results
# in CC being expanded to cc on my OSX laptop, which is not OK. Hence make -e.
CC=gcc
CFLAGS=-std=gnu99
IFLAGS=-I. -I..
WFLAGS=-Wall
# Worth exploring ... but needs handling for unused parameters in functions which comply with interfaces.
# Best option I'm aware of is to replace "void foo(int bar) {...}" with "void foo(int) {...}" throughout.
# WFLAGS=-Wall -Wextra -Werror
# WFLAGS=-Wall -Wextra -pedantic-errors -Werror
# WFLAGS=-Wall -Wextra -pedantic-errors -Werror=unused-variable
LFLAGS=-lm -lpcreposix
# You can do make -e INSTALLDIR=/path/to/somewhere/else/bin
INSTALLDIR=/usr/local/bin
CCOPT=$(CC) $(CFLAGS) $(IFLAGS) $(WFLAGS) -O3
CCDEBUG=$(CC) -g $(CFLAGS) $(IFLAGS) $(WFLAGS)
# clang ASAN. Use -O1 for debug mode to (among other things) disable inlining.
#CCOPT=clang -fsanitize=address -fno-omit-frame-pointer $(CFLAGS) $(IFLAGS) $(WFLAGS)
#CCDEBUG=clang -g -fsanitize=address -fno-omit-frame-pointer $(CFLAGS) $(IFLAGS) $(WFLAGS)
# ----------------------------------------------------------------
# Miller source except DSL
NON_DSL_SRCS = \
*.c \
cli/*.c \
lib/*.c \
containers/*.c \
aux/*.c \
stream/*.c \
input/*.c \
dsl/*.c \
mapping/*.c \
output/*.c
# DSL
DSL_OBJS = \
./parsing/mlr_dsl_parse.o \
./parsing/mlr_dsl_lexer.o \
./parsing/mlr_dsl_wrapper.o
# Unit-test code
TEST_ARGPARSE_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
cli/argparse.c \
containers/slls.c \
containers/sllv.c \
lib/string_array.c \
unit_test/test_argparse.c
TEST_BYTE_READERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/string_byte_reader.c \
input/stdio_byte_reader.c \
unit_test/test_byte_readers.c
TEST_LINE_READERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/context.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/line_readers.c \
unit_test/test_line_readers.c
TEST_PEEK_FILE_READER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlr_test_util.c \
lib/mlr_globals.c \
lib/string_builder.c \
input/string_byte_reader.c \
input/peek_file_reader.c \
unit_test/test_peek_file_reader.c
TEST_LREC_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/context.c \
lib/mlrdatetime.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
lib/string_array.c \
containers/mlrval.c \
containers/mvfuncs.c \
containers/lrec.c \
containers/header_keeper.c \
containers/sllv.c \
containers/slls.c \
containers/rslls.c \
containers/lhmsv.c \
containers/lhmslv.c \
containers/sllmv.c \
containers/mlhmmv.c \
input/line_readers.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
unit_test/test_lrec.c
TEST_MULTIPLE_CONTAINERS_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/context.c \
lib/mlrdatetime.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_builder.c \
containers/lrec.c \
containers/header_keeper.c \
containers/sllv.c \
containers/slls.c \
containers/rslls.c \
lib/string_array.c \
containers/hss.c \
containers/mlrval.c \
containers/mvfuncs.c \
containers/lhmsi.c \
containers/lhmsll.c \
containers/lhmss.c \
containers/lhmsv.c \
containers/lhms2v.c \
containers/lhmslv.c \
containers/lhmsmv.c \
containers/loop_stack.c \
containers/percentile_keeper.c \
containers/top_keeper.c \
containers/dheap.c \
input/line_readers.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
unit_test/test_multiple_containers.c
TEST_MLHMMV_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrdatetime.c \
lib/string_builder.c \
lib/string_array.c \
lib/mlrregex.c \
lib/mlr_globals.c \
containers/mlrval.c \
containers/mvfuncs.c \
containers/mlhmmv.c \
containers/sllmv.c \
containers/sllv.c \
containers/slls.c \
containers/lrec.c \
unit_test/test_mlhmmv.c
TEST_MLRUTIL_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/string_builder.c \
unit_test/test_mlrutil.c
TEST_MLRREGEX_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrregex.c \
lib/string_builder.c \
lib/string_array.c \
unit_test/test_mlrregex.c
TEST_STRING_BUILDER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlr_globals.c \
lib/string_builder.c \
unit_test/test_string_builder.c
TEST_PARSE_TRIE_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlr_globals.c \
lib/string_builder.c \
containers/parse_trie.c \
unit_test/test_parse_trie.c
TEST_RVAL_EVALUATORS_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrdatetime.c \
lib/mlrregex.c \
lib/mlrmath.c \
lib/string_builder.c \
lib/string_array.c \
containers/mlrval.c \
containers/mvfuncs.c \
containers/xvfuncs.c \
containers/sllv.c \
containers/slls.c \
containers/sllmv.c \
containers/lrec.c \
containers/lhmsv.c \
containers/lhmsi.c \
containers/lhmsll.c \
containers/mlhmmv.c \
containers/lhmsmv.c \
containers/hss.c \
containers/mixutil.c \
containers/loop_stack.c \
containers/local_stack.c \
containers/type_decl.c \
dsl/mlr_dsl_ast.c \
dsl/function_manager.c \
dsl/keylist_evaluators.c \
dsl/rval_expr_evaluators.c \
dsl/rxval_expr_evaluators.c \
dsl/rval_func_evaluators.c \
dsl/rxval_func_evaluators.c \
dsl/rval_list_evaluators.c \
dsl/mlr_dsl_stack_allocate.c \
dsl/mlr_dsl_blocked_ast.c \
dsl/mlr_dsl_cst.c \
dsl/mlr_dsl_cst_condish_statements.c \
dsl/mlr_dsl_cst_for_map_statements.c \
dsl/mlr_dsl_cst_for_srec_statements.c \
dsl/mlr_dsl_cst_func_subr.c \
dsl/mlr_dsl_cst_keywords.c \
dsl/mlr_dsl_cst_loop_control_statements.c \
dsl/mlr_dsl_cst_map_assignment_statements.c \
dsl/mlr_dsl_cst_output_statements.c \
dsl/mlr_dsl_cst_return_statements.c \
dsl/mlr_dsl_cst_scalar_assignment_statements.c \
dsl/mlr_dsl_cst_statements.c \
dsl/mlr_dsl_cst_triple_for_statements.c \
dsl/mlr_dsl_cst_unset_statements.c \
output/lrec_writer_csv.c \
output/lrec_writer_csvlite.c \
output/lrec_writer_dkvp.c \
output/lrec_writer_json.c \
output/lrec_writer_markdown.c \
output/lrec_writer_nidx.c \
output/lrec_writer_pprint.c \
output/lrec_writer_xtab.c \
output/lrec_writers.c \
output/multi_lrec_writer.c \
output/multi_out.c \
unit_test/test_rval_evaluators.c
TEST_JOIN_BUCKET_KEEPER_SRCS = \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/mlrescape.c \
lib/mlr_globals.c \
lib/string_builder.c \
lib/context.c \
containers/parse_trie.c \
containers/lrec.c \
containers/sllv.c \
containers/rslls.c \
containers/slls.c \
containers/lhmslv.c \
containers/hss.c \
containers/mixutil.c \
containers/header_keeper.c \
containers/join_bucket_keeper.c \
input/stdio_byte_reader.c \
input/line_readers.c \
input/lrec_reader_in_memory.c \
input/lrec_readers.c \
input/lrec_reader_stdio_csv.c \
input/lrec_reader_stdio_csvlite.c \
input/lrec_reader_stdio_dkvp.c \
input/lrec_reader_stdio_nidx.c \
input/lrec_reader_stdio_xtab.c \
input/lrec_reader_stdio_json.c \
input/mlr_json_adapter.c \
input/json_parser.c \
input/file_reader_stdio.c \
input/file_ingestor_stdio.c \
input/peek_file_reader.c \
unit_test/test_join_bucket_keeper.c
EXPERIMENTAL_READER_SRCS = \
lib/mlrutil.c \
lib/mlrdatetime.c \
lib/mlr_arch.c \
lib/mtrand.c \
lib/context.c \
lib/mlrescape.c \
lib/mlrregex.c \
lib/mlr_globals.c \
lib/string_array.c \
lib/string_builder.c \
input/stdio_byte_reader.c \
input/line_readers.c \
containers/parse_trie.c \
experimental/getlines.c
EXPERIMENTAL_JSON_VG_MEM_SRCS = \
lib/mlr_globals.c \
lib/mlrutil.c \
lib/mlr_arch.c \
lib/mtrand.c \
input/json_parser.c \
experimental/json_vg_mem.c
# ================================================================
# User-make: creates the executable and runs unit & regression tests
# This is the default target for anyone pulling the repo and trying to
# build it to be able to use it. It just needs flex and the C compiler.
#top: mlr tests
# xxx temp:
top: mlr mlrg tests
install: mlr tests
cp mlr $(INSTALLDIR)
installhome: mlr tests
cp mlr $(HOME)/bin
# ================================================================
tags: .always
ctags -R .
mlr: .always parsing
$(CCOPT) $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlr
# On x86_64/Centos I had to first do
# sudo yum install glibc-devel glibc-static
mlr.static: .always parsing
$(CCOPT) -static $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlr.static
parsing: .always
make -C parsing -f Makefile.no-autoconfig mlr_dsl_parse.o
make -C parsing -f Makefile.no-autoconfig mlr_dsl_lexer.o
make -C parsing -f Makefile.no-autoconfig mlr_dsl_wrapper.o
two: mlr mlrg
# ----------------------------------------------------------------
# Other executable variants
# Debug version
mlrg: .always parsing
$(CCDEBUG) $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrg
# Debug version with local-stack verbosity
mlrt: .always parsing
$(CCDEBUG) -DLOCAL_STACK_TRACE_ENABLE -DLOCAL_STACK_BOUNDS_CHECK_ENABLE \
$(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrt
# Profile version. Usage:
# * make mlrp
# * mlrp {arguments>
# * gprof mlrp gmon.out > myfile.txt
# Note: works on Linux; not on OSX.
mlrp: .always parsing
$(CCDEBUG) -g -pg $(NON_DSL_SRCS) $(DSL_OBJS) $(LFLAGS) -o mlrp
# ================================================================
tests: unit-test reg-test
unit-test: test-mlrutil test-mlrregex test-argparse test-line-readers test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-mlhmmv test-string-builder test-rval-evaluators test-join-bucket-keeper
./test-mlrutil
./test-mlrregex
./test-argparse
./test-line-readers
./test-byte-readers
./test-peek-file-reader
./test-parse-trie
./test-lrec
./test-multiple-containers
./test-mlhmmv
./test-string-builder
./test-rval-evaluators
./test-join-bucket-keeper
@echo
@echo DONE
reg-test:
./reg_test/run
# ----------------------------------------------------------------
# Run this after unit-test expected output has changed, and is verified to be
# OK. (Example: after adding new test cases in test/run.)
regtest-copy taters:
cp output-regtest/out reg_test/expected
regtest-copy-dev:
cp output-regtest/out-dev reg_test/expected
# ----------------------------------------------------------------
# Unit-test executables
test-argparse: .always
$(CCDEBUG) $(TEST_ARGPARSE_SRCS) $(LFLAGS) -o test-argparse
test-byte-readers: .always
$(CCDEBUG) $(TEST_BYTE_READERS_SRCS) $(LFLAGS) -o test-byte-readers
test-line-readers: .always
$(CCDEBUG) $(TEST_LINE_READERS_SRCS) $(LFLAGS) -o test-line-readers
test-peek-file-reader: .always
$(CCDEBUG) $(TEST_PEEK_FILE_READER_SRCS) $(LFLAGS) -o test-peek-file-reader
test-lrec: .always
$(CCDEBUG) $(TEST_LREC_SRCS) $(LFLAGS) -o test-lrec -lm
test-multiple-containers: .always
$(CCDEBUG) $(TEST_MULTIPLE_CONTAINERS_SRCS) $(LFLAGS) -o test-multiple-containers -lm
test-mlhmmv: .always
$(CCDEBUG) $(TEST_MLHMMV_SRCS) $(LFLAGS) -o test-mlhmmv -lm
test-mlrutil: .always
$(CCDEBUG) $(TEST_MLRUTIL_SRCS) $(LFLAGS) -o test-mlrutil -lm
test-mlrregex: .always
$(CCDEBUG) $(TEST_MLRREGEX_SRCS) $(LFLAGS) -o test-mlrregex
test-string-builder: .always
$(CCDEBUG) $(TEST_STRING_BUILDER_SRCS) $(LFLAGS) -o test-string-builder
test-parse-trie: .always
$(CCDEBUG) $(TEST_PARSE_TRIE_SRCS) $(LFLAGS) -o test-parse-trie
test-rval-evaluators: .always
$(CCDEBUG) $(TEST_RVAL_EVALUATORS_SRCS) $(LFLAGS) -o test-rval-evaluators -lm
test-join-bucket-keeper: .always
$(CCDEBUG) $(TEST_JOIN_BUCKET_KEEPER_SRCS) $(LFLAGS) -o test-join-bucket-keeper -lm
# ----------------------------------------------------------------
# Standalone mains
getl: .always
$(CCOPT) $(EXPERIMENTAL_READER_SRCS) $(LFLAGS) -o getl
json-vg-mem: .always
$(CCDEBUG) $(EXPERIMENTAL_JSON_VG_MEM_SRCS) $(LFLAGS) -o json-vg-mem
# ================================================================
# BSD can't handle rm -v, alas
clean:
@rm -f mlr mlrd mlrg mlrp tester
@make -C parsing -f Makefile.no-autoconfig clean
perfclean profclean:
@rm -f gmon.out perf.data perf.data.old
.always:
@true

96
c/README.md Normal file
View file

@ -0,0 +1,96 @@
# Data flow
Miller data flow is records produced by a record-reader in `input/`, followed
by one or more mappers in `mapping/`, written by a record-writer in `output/`,
controlled by logic in `stream/`. Argument parsing for initial stream setup is
in `cli/`.
# Container names
The user-visible concept of *stream record* (or *srec*) is implemented in the
`lrec_t` (*linked-record type*) data structure. The user-visible concept of
*out-of-stream variables* is implemented using the `mlhmmv_t` (multi-level
hashmap of mlrvals) structure. Source-code comments and names within the code
refer to `srec`/`lrec` and `oosvar`/`mlhmmv` depending on the context.
While those two data structures contain user-visible data structures, others
are used in Miller's implementation: `slls` and `sllv` are singly-linked lists
of string and void-star respectively; `lhmss` is a linked hashmap from string
to string; `lhmsi` is a linked hashmap from string to int; and so on.
# Memory management
Miller is streaming and as near stateless as possible. For most Miller
functions, you can ingest a 20GB file with 4GB RAM, no problem. For example,
`mlr cat` of a DKVP file retains no data in memory from one line to another;
`mlr cat` of a CSV file retains only the field names from the header line. The
`stats1` and `stats2` commands retain only aggregation state (e.g. count and
sum over specified fields needed to compute mean of specified fields). The `mlr
tac` and `mlr sort` commands, obviously, need to consume and retain all input
records before emitting any output records.
Miller classes are in general modular, following a constructor/destructor model
with minimal dependencies between classes. As a general rule, void-star
payloads (`sllv`, `lhmslv`) must be freed by the callee (which has access to
the data type) whereas non-void-star payloads (`slls`, `hss`) are freed by the
container class.
One complication is for free-flags in `lrec` and `slls`: the idea is that an
entire line is mallocked and presented by the record reader; then individual
fields are split out and populated into linked list or records. To reduce the
amount of strduping there, free-flags are used to track which fields should be
freed by the destructor and which are freed elsewhere.
The `header_keeper` object is an elaboration on this theme: suppose there is a
CSV file with header line `a,b,c` and data lines `1,2,3`, then `4,5,6`, then
`7,8,9`. Then the keys `a`, `b`, and `c` are shared between all three records;
they are retained in a single `header_keeper` object.
A bigger complication to the otherwise modular nature of Miller is its
*baton-passing memory-management model*. Namely, one class may be responsible
for freeing memory allocated by another class.
For example, using `mlr cat`: The record-reader produces records and returns
pointers to them. The record-mapper is just a pass-through; it returns the
record-pointers it receives. The record-writer formats the records to stdout
and does not return them, so it is responsible for freeing them.
Similarly, `mlr cut -x` and any other mappers which modify record objects
without creating new ones. By contrast,`stats1` et al. produce their own
records; they free what they do not pass on.
# Null-lrec conventions
Record-readers return a null lrec-pointer to signify end of input stream.
Each mapper takes an lrec-pointer as input and returns a linked list of
lrec-pointer.
Null-lrec is input to mappers to signify end of stream: e.g. `sort` or `tac`
should use this as a signal to deliver the sorted/reversed list of rows.
When a mapper has no output before end of stream (e.g. `sort` or `tac` while
accumulating inputs) it returns a null lrec-pointer which is treated as
synonymous with returning an empty list.
At end of stream, a mapper returns a linked list of records ending in a null
lrec-pointer.
A null lrec-pointer at end of stream is passed to lrec writers so that they may
produce final output (e.g. pretty-print which produces no output until end of
stream).
# Performance optimizations
The initial implementation of Miller used `lhmss`
(insertion-ordered string-to-string hash map) for record objects.
Keys and values were strduped out of file-input lines. Each of the following
produced from 5 to 30 percent performance gains:
* The `lrec` object is a hashless map suited to low access-to-creation ratio.
See detailed comments in
https://github.com/johnkerl/miller/blob/master/c/containers/lrec.h.
* Free-flags as discussed above removed additional occurrences of string copies.
* Using `mmap` to read files gets rid of double passes on record parsing
(one to find end of line, and another to separate fields) as well as most use
of `malloc`. Note however that standard input cannot be mmapped, so both
record-reader options are retained.

4
c/asanmk Executable file
View file

@ -0,0 +1,4 @@
#!/bin/bash
export CC='clang -fsanitize=address'
make -ef Makefile.no-autoconfig "$@"

8
c/auxents/Makefile.am Normal file
View file

@ -0,0 +1,8 @@
noinst_LTLIBRARIES= libauxents.la
libauxents_la_SOURCES= aux_entries.c \
aux_entries.h
libauxents_la_LIBADD= ../lib/libmlr.la
AM_CPPFLAGS= -I${srcdir}/../
AM_CFLAGS= -std=gnu99

592
c/auxents/Makefile.in Normal file
View file

@ -0,0 +1,592 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = c/auxents
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libauxents_la_DEPENDENCIES = ../lib/libmlr.la
am_libauxents_la_OBJECTS = aux_entries.lo
libauxents_la_OBJECTS = $(am_libauxents_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/autotools/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libauxents_la_SOURCES)
DIST_SOURCES = $(libauxents_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in \
$(top_srcdir)/autotools/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
noinst_LTLIBRARIES = libauxents.la
libauxents_la_SOURCES = aux_entries.c \
aux_entries.h
libauxents_la_LIBADD = ../lib/libmlr.la
AM_CPPFLAGS = -I${srcdir}/../
AM_CFLAGS = -std=gnu99
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu c/auxents/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu c/auxents/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libauxents.la: $(libauxents_la_OBJECTS) $(libauxents_la_DEPENDENCIES) $(EXTRA_libauxents_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libauxents_la_OBJECTS) $(libauxents_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aux_entries.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

529
c/auxents/aux_entries.c Normal file
View file

@ -0,0 +1,529 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "lib/mlr_globals.h"
#include "lib/mlr_arch.h"
#include "lib/mlrutil.h"
#include "lib/netbsd_strptime.h"
#include "input/line_readers.h"
// ----------------------------------------------------------------
static int aux_list_main(int argc, char** argv);
static int lecat_main(int argc, char** argv);
static int termcvt_main(int argc, char** argv);
static int hex_main(int argc, char** argv);
static int unhex_main(int argc, char** argv);
static int netbsd_strptime_main(int argc, char** argv);
static int lecat_stream(FILE* input_stream, int do_color);
static void hex_dump_fp(FILE *in_fp, FILE *out_fp, int do_raw);
static void unhex_fp(FILE *in_fp, FILE *out_fp);
static void aux_list_usage(char* argv0, char* argv1, FILE* o, int exit_code);
static void lecat_usage(char* argv0, char* argv1, FILE* o, int exit_code);
static void termcvt_usage(char* argv0, char* argv1, FILE* o, int exit_code);
static void hex_usage(char* argv0, char* argv1, FILE* o, int exit_code);
static void unhex_usage(char* argv0, char* argv1, FILE* o, int exit_code);
static void netbsd_strptime_usage(char* argv0, char* argv1, FILE* o, int exit_code);
// ----------------------------------------------------------------
typedef int aux_main_t(int argc, char**argv);
typedef void aux_usage_t( char* argv0, char* argv1, FILE* o, int exit_code);
typedef struct _aux_lookup_entry_t {
char* name;
aux_main_t* pmain;
aux_usage_t* pusage;
} aux_lookup_entry_t;
static aux_lookup_entry_t aux_lookup_table[] = {
{ "aux-list", aux_list_main, aux_list_usage },
{ "lecat", lecat_main, lecat_usage },
{ "termcvt", termcvt_main, termcvt_usage },
{ "hex", hex_main, hex_usage },
{ "unhex", unhex_main, unhex_usage },
{ "netbsd-strptime", netbsd_strptime_main, netbsd_strptime_usage },
};
static int aux_lookup_table_size = sizeof(aux_lookup_table) / sizeof(aux_lookup_table[0]);
// ================================================================
void do_aux_entries(int argc, char** argv) {
if (argc < 2) {
return;
}
for (int i = 0; i < aux_lookup_table_size; i++) {
if (streq(argv[1], aux_lookup_table[i].name)) {
exit(aux_lookup_table[i].pmain(argc, argv));
}
}
// else return to mlrmain for the rest of Miller.
}
void show_aux_entries(FILE* fp) {
fprintf(fp, "Available subcommands:\n");
for (int i = 0; i < aux_lookup_table_size; i++) {
fprintf(fp, " %s\n", aux_lookup_table[i].name);
}
fprintf(fp, "For more information, please invoke %s {subcommand} --help\n", MLR_GLOBALS.bargv0);
}
// ----------------------------------------------------------------
static void aux_list_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s [options]\n", argv0, argv1);
fprintf(o, "Options:\n");
fprintf(o, "-h or --help: print this message\n");
exit(exit_code);
}
int aux_list_main(int argc, char** argv) {
show_aux_entries(stdout);
return 0;
}
// ----------------------------------------------------------------
static void lecat_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s [options] {zero or more file names}\n", argv0, argv1);
fprintf(o, "Simply echoes input, but flags CR characters in red and LF characters in green.\n");
fprintf(o, "If zero file names are supplied, standard input is read.\n");
fprintf(o, "Options:\n");
fprintf(o, "--mono: don't try to colorize the output\n");
fprintf(o, "-h or --help: print this message\n");
exit(exit_code);
}
static int lecat_main(int argc, char** argv) {
int ok = 1;
int do_color = TRUE;
if (argc >= 3) {
if (streq(argv[2], "-h") || streq(argv[2], "--help")) {
lecat_usage(argv[0], argv[1], stdout, 0);
}
}
// 'mlr' and 'lecat' are already argv[0] and argv[1].
int argb = 2;
if (argc >= 3 && argv[argb][0] == '-') {
if (streq(argv[argb], "--mono")) {
do_color = FALSE;
argb++;
} else {
fprintf(stderr, "%s %s: unrecognized option \"%s\".\n",
argv[0], argv[1], argv[argb]);
return 1;
}
}
if (argb == argc) {
ok = ok && lecat_stream(stdin, do_color);
} else {
for (int argi = argb; argi < argc; argi++) {
char* file_name = argv[argi];
FILE* input_stream = fopen(file_name, "r");
if (input_stream == NULL) {
perror(file_name);
exit(1);
}
ok = lecat_stream(input_stream, do_color);
fclose(input_stream);
}
}
return ok ? 0 : 1;
}
static int lecat_stream(FILE* input_stream, int do_color) {
while (1) {
int c = fgetc(input_stream);
if (c == EOF)
break;
if (c == '\r') {
if (do_color)
printf("\033[31;01m"); // xterm red
printf("[CR]");
if (do_color)
printf("\033[0m");
} else if (c == '\n') {
if (do_color)
printf("\033[32;01m"); // xterm green
printf("[LF]\n");
if (do_color)
printf("\033[0m");
} else {
putchar(c);
}
}
return 1;
}
// ================================================================
static int termcvt_stream(FILE* input_stream, FILE* output_stream, char* inend, char* outend) {
size_t line_length = MLR_ALLOC_READ_LINE_INITIAL_SIZE;
int inend_length = strlen(inend);
while (1) {
char* line = mlr_alloc_read_line_multiple_delimiter(input_stream, inend, inend_length, &line_length);
if (line == NULL) {
break;
}
fputs(line, output_stream);
fputs(outend, output_stream);
free(line);
}
return 1;
}
// ----------------------------------------------------------------
static void termcvt_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s [option] {zero or more file names}\n", argv0, argv1);
fprintf(o, "Option (exactly one is required):\n");
fprintf(o, "--cr2crlf\n");
fprintf(o, "--lf2crlf\n");
fprintf(o, "--crlf2cr\n");
fprintf(o, "--crlf2lf\n");
fprintf(o, "--cr2lf\n");
fprintf(o, "--lf2cr\n");
fprintf(o, "-I in-place processing (default is to write to stdout)\n");
fprintf(o, "-h or --help: print this message\n");
fprintf(o, "Zero file names means read from standard input.\n");
fprintf(o, "Output is always to standard output; files are not written in-place.\n");
exit(exit_code);
}
// ----------------------------------------------------------------
static int termcvt_main(int argc, char** argv) {
int ok = 1;
char* inend = "\n";
char* outend = "\n";
int do_in_place = FALSE;
// argv[0] is 'mlr'
// argv[1] is 'termcvt'
// argv[2] is '--some-option'
// argv[3] and above are filenames
if (argc < 2)
termcvt_usage(argv[0], argv[1], stderr, 1);
int argi;
for (argi = 2; argi < argc; argi++) {
char* opt = argv[argi];
if (opt[0] != '-')
break;
if (streq(opt, "-h") || streq(opt, "--help")) {
termcvt_usage(argv[0], argv[1], stdout, 0);
} else if (streq(opt, "-I")) {
do_in_place = TRUE;
} else if (streq(opt, "--cr2crlf")) {
inend = "\r";
outend = "\r\n";
} else if (streq(opt, "--lf2crlf")) {
inend = "\n";
outend = "\r\n";
} else if (streq(opt, "--crlf2cr")) {
inend = "\r\n";
outend = "\r";
} else if (streq(opt, "--lf2cr")) {
inend = "\n";
outend = "\r";
} else if (streq(opt, "--crlf2lf")) {
inend = "\r\n";
outend = "\n";
} else if (streq(opt, "--cr2lf")) {
inend = "\r";
outend = "\n";
} else {
termcvt_usage(argv[0], argv[1], stdout, 0);
}
}
int nfiles = argc - argi;
if (nfiles == 0) {
ok = ok && termcvt_stream(stdin, stdout, inend, outend);
} else if (do_in_place) {
for (; argi < argc; argi++) {
char* file_name = argv[argi];
char* temp_name = alloc_suffixed_temp_file_name(file_name);
FILE* input_stream = fopen(file_name, "r");
FILE* output_stream = fopen(temp_name, "wb");
if (input_stream == NULL) {
perror("fopen");
fprintf(stderr, "%s: Could not open \"%s\" for read.\n",
MLR_GLOBALS.bargv0, file_name);
exit(1);
}
if (output_stream == NULL) {
perror("fopen");
fprintf(stderr, "%s: Could not open \"%s\" for write.\n",
MLR_GLOBALS.bargv0, temp_name);
exit(1);
}
ok = termcvt_stream(input_stream, output_stream, inend, outend);
fclose(input_stream);
fclose(output_stream);
int rc = rename(temp_name, file_name);
if (rc != 0) {
perror("rename");
fprintf(stderr, "%s: Could not rename \"%s\" to \"%s\".\n",
MLR_GLOBALS.bargv0, temp_name, file_name);
exit(1);
}
free(temp_name);
}
} else {
for (; argi < argc; argi++) {
char* file_name = argv[argi];
FILE* input_stream = fopen(file_name, "r");
if (input_stream == NULL) {
perror(file_name);
exit(1);
}
ok = termcvt_stream(input_stream, stdout, inend, outend);
fclose(input_stream);
}
}
return ok ? 0 : 1;
}
// ================================================================
// Copyright (c) 1998 John Kerl.
// ================================================================
// This is a simple hex dump with hex offsets to the left, hex data in the
// middle, and ASCII at the right. This is a subset of the functionality of
// Unix od; I wrote it in my NT days.
//
// Example:
//
// $ d2h $(jot 0 128) | unhex | hex
// 00000000: 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................|
// 00000010: 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................|
// 00000020: 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f | !"#$%&'()*+,-./|
// 00000030: 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f |0123456789:;<=>?|
// 00000040: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f |@ABCDEFGHIJKLMNO|
// 00000050: 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f |PQRSTUVWXYZ[\]^_|
// 00000060: 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f |`abcdefghijklmno|
// 00000070: 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f |pqrstuvwxyz{|}~.|
// ================================================================
#define LINE_LENGTH_MAX 8192
// ----------------------------------------------------------------
static void hex_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s [options] {zero or more file names}\n", argv0, argv1);
fprintf(o, "Simple hex-dump.\n");
fprintf(o, "If zero file names are supplied, standard input is read.\n");
fprintf(o, "Options:\n");
fprintf(o, "-r: print only raw hex without leading offset indicators or trailing ASCII dump.\n");
fprintf(o, "-h or --help: print this message\n");
exit(exit_code);
}
//----------------------------------------------------------------------
// 'mlr' and 'hex' are already argv[0] and argv[1].
static int hex_main(int argc, char **argv) {
char * filename;
FILE * in_fp;
FILE * out_fp;
int do_raw = 0;
int argi = 2;
if (argc >= 3) {
if (streq(argv[2], "-r")) {
do_raw = 1;
argi++;
} else if (streq(argv[2], "-h") || streq(argv[2], "--help")) {
hex_usage(argv[0], argv[1], stdout, 0);
}
}
int num_file_names = argc - argi;
if (num_file_names == 0) {
#ifdef WINDOWS
setmode(fileno(stdin), O_BINARY);
#endif //WINDOWS
hex_dump_fp(stdin, stdout, do_raw);
} else {
for ( ; argi < argc; argi++) {
if (!do_raw) {
if (num_file_names > 1)
printf("%s:\n", argv[argi]);
}
filename = argv[argi];
in_fp = fopen(filename, "rb");
out_fp = stdout;
if (in_fp == NULL) {
fprintf(stderr, "Couldn't open \"%s\"; skipping.\n",
filename);
}
else {
hex_dump_fp(in_fp, out_fp, do_raw);
fclose(in_fp);
if (out_fp != stdout)
fclose(out_fp);
}
if (!do_raw) {
if (num_file_names > 1)
printf("\n");
}
}
}
return 0;
}
// ----------------------------------------------------------------
static void hex_dump_fp(FILE *in_fp, FILE *out_fp, int do_raw) {
const int bytes_per_clump = 4;
const int clumps_per_line = 4;
const int buffer_size = bytes_per_clump * clumps_per_line;
unsigned char buf[buffer_size];
long num_bytes_read;
long num_bytes_total = 0;
int byteno;
while ((num_bytes_read=fread(buf, sizeof(unsigned char),
buffer_size, in_fp)) > 0)
{
if (!do_raw) {
printf("%08lx: ", num_bytes_total);
}
for (byteno = 0; byteno < num_bytes_read; byteno++) {
unsigned int temp = buf[byteno];
printf("%02x ", temp);
if ((byteno % bytes_per_clump) ==
(bytes_per_clump - 1))
{
if ((byteno > 0) && (byteno < buffer_size-1))
printf(" ");
}
}
for (byteno = num_bytes_read; byteno < buffer_size; byteno++) {
printf(" ");
if ((byteno % bytes_per_clump) ==
(bytes_per_clump - 1))
{
if ((byteno > 0) && (byteno < buffer_size-1))
printf(" ");
}
}
if (!do_raw) {
printf("|");
for (byteno = 0; byteno < num_bytes_read; byteno++) {
unsigned char temp = buf[byteno];
if (!isprint(temp))
temp = '.';
printf("%c", temp);
}
printf("|");
}
printf("\n");
num_bytes_total += num_bytes_read;
}
}
// ----------------------------------------------------------------
static void unhex_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s [option] {zero or more file names}\n", argv0, argv1);
fprintf(o, "Options:\n");
fprintf(o, "-h or --help: print this message\n");
fprintf(o, "Zero file names means read from standard input.\n");
fprintf(o, "Output is always to standard output; files are not written in-place.\n");
exit(exit_code);
}
// ----------------------------------------------------------------
int unhex_main(int argc, char ** argv) {
// 'mlr' and 'unhex' are already argv[0] and argv[1].
if (argc >= 3) {
if (streq(argv[2], "-h") || streq(argv[2], "--help")) {
unhex_usage(argv[0], argv[1], stdout, 0);
}
}
int exit_code = 0;
if (argc == 2) {
unhex_fp(stdin, stdout);
} else {
for (int argi = 2; argi < argc; argi++) {
char* filename = argv[argi];
FILE* infp = fopen(filename, "rb");
if (infp == NULL) {
fprintf(stderr, "%s %s: Couldn't open \"%s\"; skipping.\n",
argv[0], argv[1], filename);
exit_code = 1;
} else {
unhex_fp(infp, stdout);
fclose(infp);
}
}
}
return exit_code;
}
// ----------------------------------------------------------------
static void unhex_fp(FILE *infp, FILE *outfp) {
unsigned char byte;
unsigned temp;
int count;
while ((count=fscanf(infp, "%x", &temp)) > 0) {
byte = temp;
fwrite (&byte, sizeof(byte), 1, outfp);
}
}
// ================================================================
static void netbsd_strptime_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
fprintf(o, "Usage: %s %s {string value} {format}\n", argv0, argv1);
fprintf(o, "Standalone driver for replacement strptime for MSYS2.\n");
fprintf(o, "Example string value: 2012-03-04T05:06:07Z\n");
fprintf(o, "Example format: %%Y-%%m-%%dT%%H:%%M:%%SZ\n");
exit(exit_code);
}
//----------------------------------------------------------------------
#define MYBUFLEN 256
static int netbsd_strptime_main(int argc, char **argv) {
// 'mlr' and 'netbsd_strptime' are already argv[0] and argv[1].
if (streq(argv[2], "-h") || streq(argv[2], "--help") || (argc != 4)) {
netbsd_strptime_usage(argv[0], argv[1], stdout, 0);
}
struct tm tm;
char* strptime_input = argv[2];
char* format = argv[3];
memset(&tm, 0, sizeof(tm));
char* strptime_output = netbsd_strptime(strptime_input, format, &tm);
if (strptime_output == NULL) {
printf("Could not strptime(\"%s\", \"%s\").\n", strptime_input, format);
} else {
printf("strptime: %s ->\n", strptime_input);
printf(" tm_sec = %d\n", tm.tm_sec);
printf(" tm_min = %d\n", tm.tm_min);
printf(" tm_hour = %d\n", tm.tm_hour);
printf(" tm_mday = %d\n", tm.tm_mday);
printf(" tm_mon = %d\n", tm.tm_mon);
printf(" tm_year = %d\n", tm.tm_year);
printf(" tm_wday = %d\n", tm.tm_wday);
printf(" tm_yday = %d\n", tm.tm_yday);
printf(" tm_isdst = %d\n", tm.tm_isdst);
printf(" remainder = \"%s\"\n", strptime_output);
}
return 0;
}

10
c/auxents/aux_entries.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef AUX_ENTRIES_H
#define AUX_ENTRIES_H
// Handles 'mlr lecat' and any other one-off tool things which don't go through the record-streaming logic.
// If the argument after the basename (i.e. argv[1]) is recognized then this function doesn't return,
// invoking the code for that argument instead and exiting.
void do_aux_entries(int argc, char** argv);
void show_aux_entries(FILE* fp);
#endif // AUX_ENTRIES_H

11
c/camake Executable file
View file

@ -0,0 +1,11 @@
#!/bin/bash
export CCOPT="clang -fsanitize=address -std=gnu99 -I. -I.. -Wall -Werror -O3 -lm"
export CCDEBUG="clang -fsanitize=address -std=gnu99 -I. -I.. -Wall -Werror -g -lm"
export DSLCC="clang"
export DSLCFLAGS="-I.. -Wall -O2 -fsanitize=address"
make -e -f Makefile.no-autoconfig -C parsing clean
make -e -f Makefile.no-autoconfig mlr mlrg

18
c/cli/Makefile.am Normal file
View file

@ -0,0 +1,18 @@
noinst_LTLIBRARIES= libcli.la
libcli_la_SOURCES= \
argparse.c \
argparse.h \
comment_handling.h \
json_array_ingest.h \
mlrcli.c \
mlrcli.h \
quoting.h
# TODO: causes circular dependency
#noinst_PROGRAMS= ap
#ap_CPPFLAGS= -D__AP_MAIN__ ${AM_CPPFLAGS}
#ap_LDADD= ../containers/libcontainers.la ../lib/libmlr.la
#ap_SOURCES= argparse.c
AM_CPPFLAGS= -I${srcdir}/../
AM_CFLAGS= -std=gnu99

604
c/cli/Makefile.in Normal file
View file

@ -0,0 +1,604 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = c/cli
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libcli_la_LIBADD =
am_libcli_la_OBJECTS = argparse.lo mlrcli.lo
libcli_la_OBJECTS = $(am_libcli_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/autotools/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libcli_la_SOURCES)
DIST_SOURCES = $(libcli_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in \
$(top_srcdir)/autotools/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
noinst_LTLIBRARIES = libcli.la
libcli_la_SOURCES = \
argparse.c \
argparse.h \
comment_handling.h \
json_array_ingest.h \
mlrcli.c \
mlrcli.h \
quoting.h
# TODO: causes circular dependency
#noinst_PROGRAMS= ap
#ap_CPPFLAGS= -D__AP_MAIN__ ${AM_CPPFLAGS}
#ap_LDADD= ../containers/libcontainers.la ../lib/libmlr.la
#ap_SOURCES= argparse.c
AM_CPPFLAGS = -I${srcdir}/../
AM_CFLAGS = -std=gnu99
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu c/cli/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu c/cli/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libcli.la: $(libcli_la_OBJECTS) $(libcli_la_DEPENDENCIES) $(EXTRA_libcli_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libcli_la_OBJECTS) $(libcli_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/argparse.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mlrcli.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

6
c/cli/README.md Normal file
View file

@ -0,0 +1,6 @@
# Miller command-line interface
mlrcli.h/c bags up all command-line options
I use argparse.h/c in place of getopt in order not to depend on GNU-isms, yet,
elsewhere in Miller I readily depend on GNU-isms such as getdelim().

213
c/cli/argparse.c Normal file
View file

@ -0,0 +1,213 @@
#include <stdio.h>
#include "lib/mlrutil.h"
#include "cli/argparse.h"
// ================================================================
typedef enum _ap_flag_t {
AP_INT_VALUE_FLAG,
AP_INT_FLAG,
AP_LONG_LONG_FLAG,
AP_DOUBLE_FLAG,
AP_STRING_FLAG,
AP_STRING_BUILD_LIST_FLAG,
AP_STRING_LIST_FLAG,
AP_STRING_ARRAY_FLAG
} ap_flag_t;
typedef struct _ap_flag_def_t {
char* flag_name;
ap_flag_t type;
int intval;
void* pval;
int count; // 1 for bool flags; 2 for the rest
} ap_flag_def_t;
static ap_flag_def_t* ap_find(ap_state_t* pstate, char* flag_name) {
for (sllve_t* pe = pstate->pflag_defs->phead; pe != NULL; pe = pe->pnext) {
ap_flag_def_t* pdef = pe->pvvalue;
if (streq(pdef->flag_name, flag_name))
return pdef;
}
return NULL;
}
static ap_flag_def_t* ap_flag_def_alloc(char* flag_name, ap_flag_t ap_type, int intval, void* pval, int count) {
ap_flag_def_t* pdef = mlr_malloc_or_die(sizeof(ap_flag_def_t));
pdef->flag_name = flag_name;
pdef->type = ap_type;
pdef->intval = intval;
pdef->pval = pval;
pdef->count = count;
return pdef;
}
// ================================================================
ap_state_t* ap_alloc() {
ap_state_t* pstate = mlr_malloc_or_die(sizeof(ap_state_t));
pstate->pflag_defs = sllv_alloc();
return pstate;
}
void ap_free(ap_state_t* pstate) {
if (pstate == NULL)
return;
for (sllve_t* pe = pstate->pflag_defs->phead; pe != NULL; pe = pe->pnext) {
ap_flag_def_t* pdef = pe->pvvalue;
// Linked-lists are pointed to by mappers and freed by their free
// methods. If any mappers miss on that contract, we can find out by
// using valgrind --leak-check=full (e.g. reg_test/run --valgrind).
//
//if (pdef->type == AP_STRING_LIST_FLAG && pdef->pval != NULL) {
// slls_t** pplist = pdef->pval;
// slls_free(*pplist);
//}
free(pdef);
}
sllv_free(pstate->pflag_defs);
free(pstate);
}
// ----------------------------------------------------------------
void ap_define_true_flag(ap_state_t* pstate, char* flag_name, int* pintval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_INT_VALUE_FLAG, TRUE, pintval, 1));
}
void ap_define_false_flag(ap_state_t* pstate, char* flag_name, int* pintval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_INT_VALUE_FLAG, FALSE, pintval, 1));
}
void ap_define_int_value_flag(ap_state_t* pstate, char* flag_name, int intval, int* pintval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_INT_VALUE_FLAG, intval, pintval, 1));
}
void ap_define_int_flag(ap_state_t* pstate, char* flag_name, int* pintval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_INT_FLAG, 0, pintval, 2));
}
void ap_define_long_long_flag(ap_state_t* pstate, char* flag_name, long long* pintval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_LONG_LONG_FLAG, 0, pintval, 2));
}
void ap_define_float_flag(ap_state_t* pstate, char* flag_name, double* pdoubleval) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_DOUBLE_FLAG, 0, pdoubleval, 2));
}
void ap_define_string_flag(ap_state_t* pstate, char* flag_name, char** pstring) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_STRING_FLAG, 0, pstring, 2));
}
void ap_define_string_build_list_flag(ap_state_t* pstate, char* flag_name, slls_t** pplist) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_STRING_BUILD_LIST_FLAG, 0, pplist, 2));
}
void ap_define_string_list_flag(ap_state_t* pstate, char* flag_name, slls_t** pplist) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_STRING_LIST_FLAG, 0, pplist, 2));
}
void ap_define_string_array_flag(ap_state_t* pstate, char* flag_name, string_array_t** pparray) {
sllv_append(pstate->pflag_defs, ap_flag_def_alloc(flag_name, AP_STRING_ARRAY_FLAG, 0, pparray, 2));
}
// ----------------------------------------------------------------
int ap_parse(ap_state_t* pstate, char* verb, int* pargi, int argc, char** argv) {
return ap_parse_aux(pstate, verb, pargi, argc, argv, TRUE);
}
int ap_parse_aux(ap_state_t* pstate, char* verb, int* pargi, int argc, char** argv,
int error_on_unrecognized)
{
int argi = *pargi;
int ok = TRUE;
while (argi < argc) {
if (argv[argi][0] != '-' && argv[argi][0] != '+') {
break;
}
if (streq(argv[argi], "-h") || streq(argv[argi], "--help")) {
ok = FALSE;
break;
}
ap_flag_def_t* pdef = ap_find(pstate, argv[argi]);
if (pdef == NULL) {
if (error_on_unrecognized)
ok = FALSE;
break;
}
if ((argc-argi) < pdef->count) {
fprintf(stderr, "%s %s: option %s requires an argument.\n",
argv[0], verb, argv[argi]);
fprintf(stderr, "\n");
ok = FALSE;
break;
}
if (pdef->type == AP_INT_VALUE_FLAG) {
*(int *)pdef->pval = pdef->intval;
} else if (pdef->type == AP_INT_FLAG) {
if (sscanf(argv[argi+1], "%d", (int *)pdef->pval) != 1) {
fprintf(stderr, "%s %s: couldn't parse \"%s\" after \"%s\" as integer.\n",
argv[0], verb, argv[argi+1], argv[argi]);
fprintf(stderr, "\n");
}
} else if (pdef->type == AP_LONG_LONG_FLAG) {
if (sscanf(argv[argi+1], "%lld", (long long *)pdef->pval) != 1) {
fprintf(stderr, "%s %s: couldn't parse \"%s\" after \"%s\" as integer.\n",
argv[0], verb, argv[argi+1], argv[argi]);
fprintf(stderr, "\n");
}
} else if (pdef->type == AP_DOUBLE_FLAG) {
if (!mlr_try_float_from_string(argv[argi+1], (double *)pdef->pval)) {
fprintf(stderr, "%s %s: couldn't parse \"%s\" after \"%s\" as double.\n",
argv[0], verb, argv[argi+1], argv[argi]);
fprintf(stderr, "\n");
}
} else if (pdef->type == AP_STRING_FLAG) {
char** pstring = pdef->pval;
*pstring = argv[argi+1];
pdef->pval = pstring;
} else if (pdef->type == AP_STRING_BUILD_LIST_FLAG) {
slls_t** pplist = pdef->pval;
if (*pplist == NULL) {
*pplist = slls_alloc();
}
slls_append_no_free(*pplist, argv[argi+1]);
pdef->pval = pplist;
} else if (pdef->type == AP_STRING_LIST_FLAG) {
slls_t** pplist = pdef->pval;
if (*pplist != NULL)
slls_free(*pplist);
*pplist = slls_from_line(argv[argi+1], ',', FALSE);
pdef->pval = pplist;
} else if (pdef->type == AP_STRING_ARRAY_FLAG) {
string_array_t** pparray = pdef->pval;
if (*pparray != NULL)
string_array_free(*pparray);
*pparray = string_array_from_line(argv[argi+1], ',');
pdef->pval = pparray;
} else {
ok = FALSE;
fprintf(stderr, "argparse.c: internal coding error: flag-def type %x not recognized.\n", pdef->type);
fprintf(stderr, "\n");
break;
}
argi += pdef->count;
}
*pargi = argi;
return ok;
}

33
c/cli/argparse.h Normal file
View file

@ -0,0 +1,33 @@
// ================================================================
// Argument-parsing library, with non-getopt semantics.
// ================================================================
#ifndef ARGPARSE_H
#define ARGPARSE_H
#include "containers/slls.h"
#include "containers/sllv.h"
#include "lib/string_array.h"
typedef struct _ap_state_t {
sllv_t* pflag_defs;
} ap_state_t;
ap_state_t* ap_alloc();
void ap_free(ap_state_t* pstate);
void ap_define_true_flag(ap_state_t* pstate, char* flag_name, int* pintval);
void ap_define_false_flag(ap_state_t* pstate, char* flag_name, int* pintval);
void ap_define_int_value_flag(ap_state_t* pstate, char* flag_name, int value, int* pintval);
void ap_define_int_flag(ap_state_t* pstate, char* flag_name, int* pintval);
void ap_define_long_long_flag(ap_state_t* pstate, char* flag_name, long long* pintval);
void ap_define_float_flag(ap_state_t* pstate, char* flag_name, double* pdoubleval);
void ap_define_string_flag(ap_state_t* pstate, char* flag_name, char** pstring);
void ap_define_string_build_list_flag(ap_state_t* pstate, char* flag_name, slls_t** pplist);
void ap_define_string_list_flag(ap_state_t* pstate, char* flag_name, slls_t** pplist);
void ap_define_string_array_flag(ap_state_t* pstate, char* flag_name, string_array_t** pparray);
int ap_parse(ap_state_t* pstate, char* verb, int* pargi, int argc, char** argv);
int ap_parse_aux(ap_state_t* pstate, char* verb, int* pargi, int argc, char** argv,
int error_on_unrecognized);
#endif // ARGPARSE_H

10
c/cli/comment_handling.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef COMMENT_HANDLING_H
#define COMMENT_HANDLING_H
typedef enum _comment_handling_t {
PASS_COMMENTS,
SKIP_COMMENTS,
COMMENTS_ARE_DATA,
} comment_handling_t;
#endif // COMMENT_HANDLING_H

11
c/cli/json_array_ingest.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef JSON_ARRAY_INGEST_H
#define JSON_ARRAY_INGEST_H
typedef enum _json_array_ingest_t {
JSON_ARRAY_INGEST_UNSPECIFIED,
JSON_ARRAY_INGEST_FATAL,
JSON_ARRAY_INGEST_SKIP,
JSON_ARRAY_INGEST_AS_MAP,
} json_array_ingest_t;
#endif // JSON_ARRAY_INGEST_H

2474
c/cli/mlrcli.c Normal file

File diff suppressed because it is too large Load diff

139
c/cli/mlrcli.h Normal file
View file

@ -0,0 +1,139 @@
// ================================================================
// Miller command-line parsing
// ================================================================
#ifndef MLRCLI_H
#define MLRCLI_H
#include "lib/context.h"
#include "containers/slls.h"
#include "containers/sllv.h"
#include "cli/quoting.h"
#include "cli/comment_handling.h"
#include "cli/json_array_ingest.h"
#include "containers/lhmsll.h"
#include "containers/lhmss.h"
#include <unistd.h>
// ----------------------------------------------------------------
typedef struct _generator_opts_t {
char* field_name;
// xxx to do: convert to mv_t
long long start;
long long stop;
long long step;
} generator_opts_t;
typedef struct _cli_reader_opts_t {
char* ifile_fmt;
char* irs;
char* ifs;
char* ips;
char* input_json_flatten_separator;
json_array_ingest_t json_array_ingest;
int allow_repeat_ifs;
int allow_repeat_ips;
int use_implicit_csv_header;
int allow_ragged_csv_input;
// Command for popen on input, e.g. "zcat -cf <". Can be null in which case
// files are read directly rather than through a pipe.
char* prepipe;
comment_handling_t comment_handling;
char* comment_string;
// Fake internal-data-generator 'reader'
generator_opts_t generator_opts;
} cli_reader_opts_t;
// ----------------------------------------------------------------
typedef struct _cli_writer_opts_t {
char* ofile_fmt;
char* ors;
char* ofs;
char* ops;
int headerless_csv_output;
int right_justify_xtab_value;
int right_align_pprint;
int pprint_barred;
int stack_json_output_vertically;
int wrap_json_output_in_outer_list;
int json_quote_int_keys;
int json_quote_non_string_values;
char* output_json_flatten_separator;
char* oosvar_flatten_separator;
quoting_t oquoting;
} cli_writer_opts_t;
// ----------------------------------------------------------------
typedef struct _cli_opts_t {
cli_reader_opts_t reader_opts;
cli_writer_opts_t writer_opts;
// These are used to construct the mapper list. In particular, for in-place mode
// they're reconstructed for each file. We make copies since each pass through a
// CLI-parser operates destructively, principally by running strtok over
// comma-delimited field-name lists.
char** original_argv;
char** non_in_place_argv;
int argc;
int mapper_argb;
slls_t* filenames;
char* ofmt;
long long nr_progress_mod;
int do_in_place;
int no_input;
int have_rand_seed;
unsigned rand_seed;
} cli_opts_t;
// ----------------------------------------------------------------
cli_opts_t* parse_command_line(int argc, char** argv, sllv_t** ppmapper_list);
// See stream.c. The idea is that the mapper-chain is constructed once for normal stream-over-all-files
// mode, but per-file for in-place mode.
sllv_t* cli_parse_mappers(char** argv, int* pargi, int argc, cli_opts_t* popts);
int cli_handle_reader_options(char** argv, int argc, int *pargi, cli_reader_opts_t* preader_opts);
int cli_handle_writer_options(char** argv, int argc, int *pargi, cli_writer_opts_t* pwriter_opts);
int cli_handle_reader_writer_options(char** argv, int argc, int *pargi,
cli_reader_opts_t* preader_opts, cli_writer_opts_t* pwriter_opts);
void cli_opts_init(cli_opts_t* popts);
void cli_reader_opts_init(cli_reader_opts_t* preader_opts);
void cli_writer_opts_init(cli_writer_opts_t* pwriter_opts);
void cli_apply_defaults(cli_opts_t* popts);
void cli_apply_reader_defaults(cli_reader_opts_t* preader_opts);
void cli_apply_writer_defaults(cli_writer_opts_t* pwriter_opts);
// For mapper join which has its separate input-format overrides:
void cli_merge_reader_opts(cli_reader_opts_t* pfunc_opts, cli_reader_opts_t* pmain_opts);
// For mapper tee & mapper put which have their separate output-format overrides:
void cli_merge_writer_opts(cli_writer_opts_t* pfunc_opts, cli_writer_opts_t* pmain_opts);
// Stream context is for lrec-writer drain on tee et al. when using aggregated
// output. E.g. pretty-print output has column widths which are only computable
// after all output records have been retained. The free methods are used as
// drain triggers.
void cli_opts_free(cli_opts_t* popts);
// The caller can unconditionally free the return value
char* cli_sep_from_arg(char* arg);
#endif // MLRCLI_H

13
c/cli/quoting.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef QUOTING_H
#define QUOTING_H
typedef enum _quoting_t {
QUOTE_ALL,
QUOTE_NONE,
QUOTE_MINIMAL,
QUOTE_NUMERIC,
QUOTE_ORIGINAL,
QUOTE_UNSPECIFIED,
} quoting_t;
#endif // QUOTING_H

12
c/cmake Executable file
View file

@ -0,0 +1,12 @@
#!/bin/bash
export CCOPT="clang -std=gnu99 -I. -I.. -Wall -Werror -O3 -lm"
export CCDEBUG="clang -std=gnu99 -I. -I.. -Wall -Werror -g -lm"
export DSLCC="clang"
#export DSLCFLAGS="-I.. -Wall -O2"
export DSLCFLAGS="-I.. -Wall -O2 -g"
make -e -f Makefile.no-autoconfig -C parsing clean
make -e -f Makefile.no-autoconfig mlr mlrg

62
c/containers/Makefile.am Normal file
View file

@ -0,0 +1,62 @@
noinst_LTLIBRARIES= libcontainers.la
libcontainers_la_SOURCES= \
boxed_xval.h \
dheap.c \
dheap.h \
dvector.c \
dvector.h \
header_keeper.c \
header_keeper.h \
hss.c \
hss.h \
join_bucket_keeper.c \
join_bucket_keeper.h \
lhms2v.c \
lhms2v.h \
lhmsi.c \
lhmsi.h \
lhmsll.c \
lhmsll.h \
lhmslv.c \
lhmslv.h \
lhmsmv.c \
lhmsmv.h \
lhmss.c \
lhmss.h \
lhmsv.c \
lhmsv.h \
local_stack.c \
local_stack.h \
loop_stack.c \
loop_stack.h \
lrec.c \
lrec.h \
mixutil.c \
mixutil.h \
mlhmmv.c \
mlhmmv.h \
parse_trie.c \
parse_trie.h \
percentile_keeper.c \
percentile_keeper.h \
rslls.c \
rslls.h \
sllmv.c \
sllmv.h \
slls.c \
slls.h \
sllv.c \
sllv.h \
top_keeper.c \
top_keeper.h \
type_decl.c \
type_decl.h \
xvfuncs.c \
xvfuncs.h
libcontainers_la_LIBADD= \
../lib/libmlr.la \
../mapping/libmapping.la
AM_CPPFLAGS= -I${srcdir}/../
AM_CFLAGS= -std=gnu99

678
c/containers/Makefile.in Normal file
View file

@ -0,0 +1,678 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = c/containers
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libcontainers_la_DEPENDENCIES = ../lib/libmlr.la \
../mapping/libmapping.la
am_libcontainers_la_OBJECTS = dheap.lo dvector.lo header_keeper.lo \
hss.lo join_bucket_keeper.lo lhms2v.lo lhmsi.lo lhmsll.lo \
lhmslv.lo lhmsmv.lo lhmss.lo lhmsv.lo local_stack.lo \
loop_stack.lo lrec.lo mixutil.lo mlhmmv.lo parse_trie.lo \
percentile_keeper.lo rslls.lo sllmv.lo slls.lo sllv.lo \
top_keeper.lo type_decl.lo xvfuncs.lo
libcontainers_la_OBJECTS = $(am_libcontainers_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/autotools/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libcontainers_la_SOURCES)
DIST_SOURCES = $(libcontainers_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in \
$(top_srcdir)/autotools/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
noinst_LTLIBRARIES = libcontainers.la
libcontainers_la_SOURCES = \
boxed_xval.h \
dheap.c \
dheap.h \
dvector.c \
dvector.h \
header_keeper.c \
header_keeper.h \
hss.c \
hss.h \
join_bucket_keeper.c \
join_bucket_keeper.h \
lhms2v.c \
lhms2v.h \
lhmsi.c \
lhmsi.h \
lhmsll.c \
lhmsll.h \
lhmslv.c \
lhmslv.h \
lhmsmv.c \
lhmsmv.h \
lhmss.c \
lhmss.h \
lhmsv.c \
lhmsv.h \
local_stack.c \
local_stack.h \
loop_stack.c \
loop_stack.h \
lrec.c \
lrec.h \
mixutil.c \
mixutil.h \
mlhmmv.c \
mlhmmv.h \
parse_trie.c \
parse_trie.h \
percentile_keeper.c \
percentile_keeper.h \
rslls.c \
rslls.h \
sllmv.c \
sllmv.h \
slls.c \
slls.h \
sllv.c \
sllv.h \
top_keeper.c \
top_keeper.h \
type_decl.c \
type_decl.h \
xvfuncs.c \
xvfuncs.h
libcontainers_la_LIBADD = \
../lib/libmlr.la \
../mapping/libmapping.la
AM_CPPFLAGS = -I${srcdir}/../
AM_CFLAGS = -std=gnu99
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu c/containers/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu c/containers/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libcontainers.la: $(libcontainers_la_OBJECTS) $(libcontainers_la_DEPENDENCIES) $(EXTRA_libcontainers_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libcontainers_la_OBJECTS) $(libcontainers_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dheap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dvector.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/header_keeper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hss.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/join_bucket_keeper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhms2v.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmsi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmsll.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmslv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmsmv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmss.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lhmsv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/local_stack.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_stack.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lrec.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mixutil.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mlhmmv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parse_trie.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/percentile_keeper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rslls.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sllmv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slls.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sllv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/top_keeper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/type_decl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xvfuncs.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

6
c/containers/README.md Normal file
View file

@ -0,0 +1,6 @@
# Miller containers
These are generally very standard and simple programming-exercise kinds of
things. There is some duplication of simple hash-map logic by key and value
types: this is plain C with no templates or generics and I didn't want to do
preprocessor tricks to reduce line count at the expense of code clarity.

51
c/containers/boxed_xval.h Normal file
View file

@ -0,0 +1,51 @@
// ================================================================
// This is for map-valued contexts: LHS/RHS of assignments,
// UDF/subroutine arguments, and UDF return values.
// The is_ephemeral flag is TRUE for map-literals, function return values, and
// data copied out of srecs. It is FALSE when the pointer is into an existing
// data structure's memory (e.g. oosvars or locals).
// ================================================================
#ifndef BOXED_XVAL_H
#define BOXED_XVAL_H
#include "../lib/mlrval.h"
#include "../containers/mlhmmv.h"
// ----------------------------------------------------------------
typedef struct _boxed_xval_t {
mlhmmv_xvalue_t xval;
char is_ephemeral;
} boxed_xval_t;
// ----------------------------------------------------------------
static inline boxed_xval_t box_ephemeral_val(mv_t val) {
return (boxed_xval_t) {
.xval = mlhmmv_xvalue_wrap_terminal(val),
.is_ephemeral = TRUE,
};
}
static inline boxed_xval_t box_non_ephemeral_val(mv_t val) {
return (boxed_xval_t) {
.xval = mlhmmv_xvalue_wrap_terminal(val),
.is_ephemeral = FALSE,
};
}
static inline boxed_xval_t box_ephemeral_xval(mlhmmv_xvalue_t xval) {
return (boxed_xval_t) {
.xval = xval,
.is_ephemeral = TRUE,
};
}
static inline boxed_xval_t box_non_ephemeral_xval(mlhmmv_xvalue_t xval) {
return (boxed_xval_t) {
.xval = xval,
.is_ephemeral = FALSE,
};
}
#endif // BOXED_XVAL_H

290
c/containers/dheap.c Normal file
View file

@ -0,0 +1,290 @@
// ================================================================
// Zero-indexed max-heap of double.
// John Kerl
// 2012-06-02
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include "lib/mlrutil.h"
#include "lib/mlr_globals.h"
#include "containers/dheap.h"
// ----------------------------------------------------------------
// 1-up: left child 2*i
// right child 2*i+1
// parent i/2
// 0-up: left child 2*i+1
// right child 2*i+2
// parent (i-1)/2
// Why: Example of 1-up i=10 l=20 r=21; 0-up i=9 l=19 r=20.
// Or: 0-up i |-> 1-up i+1 |-> 1-up 2*i+2 |-> 0-up 2*i+1.
// And likewise for right child & parent.
static inline int dheap_left_child_index(int i, int n)
{
int li = 2*i+1;
if (li >= n)
return -1;
else
return li;
}
static inline int dheap_right_child_index(int i, int n)
{
int ri = 2*i+2;
if (ri >= n)
return -1;
else
return ri;
}
static inline int dheap_parent_index(int i, int n)
{
if (i == 0)
return -1;
else
return (i-1)/2;
}
static inline void ptr_swap(double *pa, double *pb)
{
double temp = *pa;
*pa = *pb;
*pb = temp;
}
// ================================================================
dheap_t *dheap_alloc()
{
dheap_t *pdheap = mlr_malloc_or_die(sizeof(dheap_t));
pdheap->n = 0;
pdheap->alloc_size = DHEAP_INIT_ALLOC_SIZE;
pdheap->elements = mlr_malloc_or_die(pdheap->alloc_size*sizeof(double));
pdheap->is_malloced = 1;
return pdheap;
}
// ----------------------------------------------------------------
dheap_t *dheap_from_array(double *array, int n)
{
dheap_t *pdheap = mlr_malloc_or_die(sizeof(dheap_t));
pdheap->n = 0;
pdheap->alloc_size = n;
pdheap->elements = array;
pdheap->is_malloced = 0;
return pdheap;
}
// ----------------------------------------------------------------
void dheap_free(dheap_t *pdheap)
{
if (pdheap == NULL)
return;
if (pdheap->elements != NULL)
if (pdheap->is_malloced)
free(pdheap->elements);
pdheap->n = 0;
pdheap->alloc_size = 0;
pdheap->elements = NULL;
free(pdheap);
}
// ================================================================
static void dheap_print_aux(dheap_t *pdheap, int i, int depth)
{
if (i >= pdheap->n)
return;
int w;
printf("[%04d] ", i);
for (w = 0; w < depth; w++)
printf(" ");
printf("%.8lf\n", pdheap->elements[i]);
int li = dheap_left_child_index (i, pdheap->n);
int ri = dheap_right_child_index(i, pdheap->n);
if (li != -1)
dheap_print_aux(pdheap, li, depth+1);
if (ri != -1)
dheap_print_aux(pdheap, ri, depth+1);
}
void dheap_print(dheap_t *pdheap)
{
printf("BEGIN DHEAP (n=%d):\n", pdheap->n);
dheap_print_aux(pdheap, 0, 0);
printf("END DHEAP\n");
}
// ----------------------------------------------------------------
// 1
// 2 3
// 4 5 6 7
// 8 9 10 11 12 13 14 15
static int dheap_check_aux(dheap_t *pdheap, int i, char *file, int line)
{
int n = pdheap->n;
double *pe = pdheap->elements;
if (i >= n)
return TRUE;
int li = dheap_left_child_index (i, pdheap->n);
int ri = dheap_right_child_index(i, pdheap->n);
if (li != -1) {
if (pe[i] < pe[li]) {
fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n",
file, line, i, pe[i], li, pe[li]);
return FALSE;
}
dheap_check_aux(pdheap, li, file, line);
}
if (ri != -1) {
if (pe[i] < pe[ri]) {
fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n",
file, line, i, pe[i], ri, pe[ri]);
return FALSE;
}
dheap_check_aux(pdheap, ri, file, line);
}
return TRUE;
}
int dheap_check(dheap_t *pdheap, char *file, int line)
{
return dheap_check_aux(pdheap, 1, file, line);
}
// ----------------------------------------------------------------
static void dheap_bubble_up(dheap_t *pdheap, int i)
{
int pi = dheap_parent_index(i, pdheap->n);
if (pi == -1)
return;
double *pe = pdheap->elements;
if (pe[pi] < pe[i]) {
ptr_swap(&pe[pi], &pe[i]);
dheap_bubble_up(pdheap, pi);
}
}
// ----------------------------------------------------------------
// 1
// 2 3
// 4 5 6 7
// 8 9 10 11 12 (n=13)
void dheap_add(dheap_t *pdheap, double v)
{
if (pdheap->n >= pdheap->alloc_size) {
if (!pdheap->is_malloced) {
fprintf(stderr, "extension of non-malloced dheap!\n");
exit(1);
}
pdheap->alloc_size *= 2;
pdheap->elements = (double *)mlr_realloc_or_die(pdheap->elements,
pdheap->alloc_size*sizeof(double));
}
pdheap->elements[pdheap->n++] = v;
dheap_bubble_up(pdheap, pdheap->n-1);
}
// ----------------------------------------------------------------
// 1. Replace the root of the dheap with the last element on the last level.
// 2. Compare the new root with its children; if they are in the correct order,
// stop.
// 3. If not, swap the element with one of its children and return to the
// previous step. (Swap with its smaller child in a min-dheap and its larger
// child in a max-dheap.)
static void dheap_bubble_down(dheap_t *pdheap, int i)
{
int li = dheap_left_child_index(i, pdheap->n);
if (li == -1) {
// We add left to right, so this means left and right are both nil.
return;
}
int ri = dheap_right_child_index(i, pdheap->n);
double *pe = pdheap->elements;
if (ri == -1) {
// Right is nil, left is non-nil.
if (pe[li] > pe[i]) {
ptr_swap(&pe[li], &pe[i]);
dheap_bubble_down(pdheap, li);
}
return;
}
// Now left and right are both non-nil.
//
// P 3
// L R 9 7
// a b c d 1 2 4 6
//
// Cases:
double *L = &pe[li];
double *P = &pe[i];
double *R = &pe[ri];
if (*L <= *P) {
if (*R <= *P) {
// 1. L <= R <= P: done.
// 2. R <= L <= P: done.
return;
}
else if (*P <= *R) {
// 3. L <= P <= R: swap P&R; bubble down R.
ptr_swap(R, P);
dheap_bubble_down(pdheap, ri);
}
}
else if (*R <= *P && *P <= *L) {
// 4. R <= P <= L: swap P&L; bubble down L.
ptr_swap(L, P);
dheap_bubble_down(pdheap, li);
}
else if (*P <= *R && *R <= *L) {
// 5. P <= R <= L: swap P&L; bubble down L.
ptr_swap(L, P);
dheap_bubble_down(pdheap, li);
}
else if (P <= L && L <= R) {
// 6. P <= L <= R: swap P&R; bubble down R.
ptr_swap(R, P);
dheap_bubble_down(pdheap, ri);
}
}
double dheap_remove(dheap_t *pdheap)
{
if (pdheap->n <= 0) {
fprintf(stderr, "remove from empty dheap!\n");
exit(1);
}
double rv = pdheap->elements[0];
pdheap->elements[0] = pdheap->elements[pdheap->n-1];
pdheap->n--;
dheap_bubble_down(pdheap, 0);
return rv;
}
void dheap_sort(double *array, int n)
{
dheap_t *pdheap = dheap_from_array(array, n);
int i;
for (i = 0; i < n; i++)
dheap_add(pdheap, pdheap->elements[i]);
for (i = n-1; i >= 0; i--)
pdheap->elements[i] = dheap_remove(pdheap);
dheap_free(pdheap);
}

30
c/containers/dheap.h Normal file
View file

@ -0,0 +1,30 @@
// ================================================================
// Zero-indexed max-heap of doubles.
// John Kerl
// 2012-06-02
// ================================================================
#ifndef DHEAP_H
#define DHEAP_H
#define DHEAP_INIT_ALLOC_SIZE 1024 // Power of two
typedef struct _dheap_t {
int n;
int alloc_size;
char is_malloced;
double *elements;
} dheap_t;
dheap_t *dheap_alloc();
dheap_t *dheap_from_array(double *array, int n);
void dheap_free(dheap_t *pheap);
void dheap_add(dheap_t *pdheap, double v);
double dheap_remove(dheap_t *pdheap);
// For debug
void dheap_print(dheap_t *pdheap);
// For unit test
int dheap_check(dheap_t *pdheap, char *file, int line);
#endif // DHEAP_H

34
c/containers/dvector.c Normal file
View file

@ -0,0 +1,34 @@
#include <string.h>
#include <stdlib.h>
#include "lib/mlrutil.h"
#include "containers/dvector.h"
// ----------------------------------------------------------------
dvector_t* dvector_alloc(unsigned long long initial_capacity) {
unsigned long long capacity = initial_capacity;
dvector_t* pdvector = mlr_malloc_or_die(sizeof(dvector_t));
pdvector->data = mlr_malloc_or_die(capacity*sizeof(double));
pdvector->size = 0;
pdvector->capacity = capacity;
return pdvector;
}
// ----------------------------------------------------------------
void dvector_free(dvector_t* pdvector) {
if (pdvector == NULL)
return;
free(pdvector->data);
pdvector->data = NULL;
pdvector->size = 0;
pdvector->capacity = 0;
free(pdvector);
}
void dvector_append(dvector_t* pdvector, double value) {
if (pdvector->size >= pdvector->capacity) {
pdvector->capacity = (unsigned long long)(pdvector->capacity * 2);
pdvector->data = (double*)mlr_realloc_or_die(pdvector->data,
pdvector->capacity*sizeof(double));
}
pdvector->data[pdvector->size++] = value;
}

14
c/containers/dvector.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef DVECTOR_H
#define DVECTOR_H
typedef struct _dvector_t {
double* data;
unsigned long long size;
unsigned long long capacity;
} dvector_t;
dvector_t* dvector_alloc(unsigned long long initial_capacity);
void dvector_free(dvector_t* pdvector);
void dvector_append(dvector_t* pdvector, double value);
#endif // DVECTOR_H

View file

@ -0,0 +1,19 @@
#include <stdlib.h>
#include "lib/mlrutil.h"
#include "containers/header_keeper.h"
header_keeper_t* header_keeper_alloc(char* line, slls_t* pkeys) {
header_keeper_t* pheader_keeper = mlr_malloc_or_die(sizeof(header_keeper_t));
pheader_keeper->line = line;
pheader_keeper->pkeys = pkeys;
return pheader_keeper;
}
void header_keeper_free(header_keeper_t* pheader_keeper) {
if (pheader_keeper == NULL)
return;
free(pheader_keeper->line);
slls_free(pheader_keeper->pkeys);
free(pheader_keeper);
}

View file

@ -0,0 +1,19 @@
// ================================================================
// Retains field names from CSV header lines across record reads.
// See also c/README.md.
// ================================================================
#ifndef HEADER_KEEPER_H
#define HEADER_KEEPER_H
#include "containers/slls.h"
typedef struct _header_keeper_t {
char* line;
slls_t* pkeys;
} header_keeper_t;
header_keeper_t* header_keeper_alloc(char* line, slls_t* pkeys);
void header_keeper_free(header_keeper_t* pheader_keeper);
#endif // HEADER_KEEPER_H

249
c/containers/hss.c Normal file
View file

@ -0,0 +1,249 @@
// ================================================================
// Array-only (open addressing) string-valued hash set with linear probing for
// collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/hss.h"
// ----------------------------------------------------------------
#define INITIAL_ARRAY_LENGTH 128
#define LOAD_FACTOR 0.7
#define OCCUPIED 444
#define DELETED 555
#define EMPTY 666
// ================================================================
static void hsse_clear(hsse_t *pentry) {
pentry->key = NULL;
pentry->state = EMPTY;
pentry->ideal_index = -1;
}
// ----------------------------------------------------------------
static hsse_t* hss_make_alloc_array(int length) {
hsse_t* array = (hsse_t*)mlr_malloc_or_die(sizeof(hsse_t) * length);
for (int i = 0; i < length; i++)
hsse_clear(&array[i]);
return array;
}
static void hss_init(hss_t *pset, int length) {
pset->num_occupied = 0;
pset->num_freed = 0;
pset->array_length = length;
pset->array = hss_make_alloc_array(length);
}
hss_t* hss_alloc() {
hss_t* pset = mlr_malloc_or_die(sizeof(hss_t));
hss_init(pset, INITIAL_ARRAY_LENGTH);
return pset;
}
void hss_free(hss_t* pset) {
if (pset == NULL)
return;
free(pset->array);
pset->array = NULL;
pset->num_occupied = 0;
pset->num_freed = 0;
pset->array_length = 0;
free(pset);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int hss_find_index_for_key(hss_t* pset, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pset->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
hsse_t* pe = &pset->array[index];
if (pe->state == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pe->state == EMPTY) {
return index;
}
// If the current entry has been freed, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pset->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pset->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
}
// ----------------------------------------------------------------
static void hss_enlarge(hss_t* pset);
void hss_add(hss_t* pset, char* key) {
if ((pset->num_occupied + pset->num_freed) >= (pset->array_length*LOAD_FACTOR))
hss_enlarge(pset);
int ideal_index = 0;
int index = hss_find_index_for_key(pset, key, &ideal_index);
hsse_t* pe = &pset->array[index];
if (pe->state == OCCUPIED) {
// Existing key found in chain. Chaining already handled by hss_find_index_for_key.
return;
}
else if (pe->state == EMPTY) {
// End of chain.
pe->key = key;
pe->state = OCCUPIED;
pe->ideal_index = ideal_index;
pset->num_occupied++;
}
else {
fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
exit(1);
}
}
// ----------------------------------------------------------------
static void hss_enlarge(hss_t* pset) {
int old_array_length = pset->array_length;
hsse_t* old_array = pset->array;
hss_init(pset, pset->array_length*2);
for (int index = 0; index < old_array_length; index++) {
hsse_t e = old_array[index];
if (e.state == OCCUPIED)
hss_add(pset, e.key);
}
free(old_array);
}
// ----------------------------------------------------------------
int hss_has(hss_t* pset, char* key) {
int ideal_index = 0;
int index = hss_find_index_for_key(pset, key, &ideal_index);
hsse_t* pe = &pset->array[index];
if (pe->state == OCCUPIED)
return TRUE;
else if (pe->state == EMPTY)
return FALSE;
else {
fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
exit(1);
}
}
// ----------------------------------------------------------------
void hss_remove(hss_t* pset, char* key) {
int ideal_index = 0;
int index = hss_find_index_for_key(pset, key, &ideal_index);
hsse_t* pe = &pset->array[index];
if (pe->state == OCCUPIED) {
pe->key = NULL;
pe->state = DELETED;
pe->ideal_index = -1;
pset->num_freed++;
pset->num_occupied--;
}
else if (pe->state == EMPTY) {
}
else {
fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
exit(1);
}
}
// ----------------------------------------------------------------
void hss_clear(hss_t* pset) {
for (int i = 0; i < pset->array_length; i++) {
hsse_clear(&pset->array[i]);
}
pset->num_occupied = 0;
pset->num_freed = 0;
}
int hss_size(hss_t* pset) {
return pset->num_occupied;
}
// ----------------------------------------------------------------
int hss_check_counts(hss_t* pset) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pset->array_length; index++) {
hsse_t* pe = &pset->array[index];
if (pe->state == OCCUPIED)
nocc++;
else if (pe->state == DELETED)
ndel++;
}
if (nocc != pset->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pset->num_occupied);
return FALSE;
}
if (ndel != pset->num_freed) {
fprintf(stderr,
"freed-count mismatch: actual %d != cached %d.\n",
ndel, pset->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "freed"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void hss_print(hss_t* pset) {
for (int index = 0; index < pset->array_length; index++) {
hsse_t* pe = &pset->array[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s |\n",
get_state_name(pe->state), index, pe->ideal_index, key_string);
}
}

43
c/containers/hss.h Normal file
View file

@ -0,0 +1,43 @@
// ================================================================
// Array-only (open addressing) string-valued hash set with linear probing for
// collisions.
//
// Notes:
// * null key is not supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef HSS_H
#define HSS_H
// ----------------------------------------------------------------
typedef struct _hsse_t {
char* key;
int state;
int ideal_index;
} hsse_t;
// ----------------------------------------------------------------
typedef struct _hss_t {
int num_occupied;
int num_freed;
int array_length;
hsse_t* array;
} hss_t;
// ----------------------------------------------------------------
hss_t* hss_alloc();
void hss_free(hss_t* pset);
void hss_add(hss_t* pset, char* key);
int hss_has(hss_t* pset, char* key);
void hss_remove(hss_t* pset, char* key);
void hss_clear(hss_t* pset);
int hss_size(hss_t* pset);
// Unit-test hook
int hss_check_counts(hss_t* pset);
#endif // HSS_H

View file

@ -0,0 +1,504 @@
#include <stdio.h>
#include <stdlib.h>
#include "lib/mlrutil.h"
#include "lib/mlr_globals.h"
#include "lib/context.h"
#include "containers/mixutil.h"
#include "containers/join_bucket_keeper.h"
#include "input/lrec_readers.h"
// ================================================================
// JOIN_BUCKET_KEEPER
//
// This data structure supports Miller's sorted (double-streaming) join. It is
// perhaps best explained by first comparing with the unsorted (half-streaming)
// case.
//
// In both cases, we have left and right join keys. Suppose the left file has
// data with field name "L" to be joined with right-file(s) data with field
// name "R". For the unsorted case (see mapper_join.c) the entire left file is
// first loaded into buckets of record-lists, one for each distinct value of L.
// E.g. given the following:
//
// +-----+-----+
// | L | R |
// + --- + --- +
// | a | a |
// | c | b |
// | a | f |
// | b | |
// | c | |
// | d | |
// | a | |
// +-----+-----+
//
// the left file is bucketed as
//
// +-----+ +-----+ +-----+ +-----+
// | L | | L | | L | | L |
// + --- + + --- + + --- + + --- +
// | a | | c | | b | | d |
// | a | | c | +-----+ +-----+
// | a | + --- +
// + --- +
//
// Then the right file is processed one record at a time (hence
// "half-streaming"). The pairings are easy:
// * the right record with R=a is paired with the L=a bucket,
// * the right record with R=b is paired with the L=b bucket,
// * the right record with R=f is unpaired, and
// * the left records with L=c and L=d are unpaired.
//
// ----------------------------------------------------------------
// Now for the sorted (doubly-streaming) case. Here we require that the left
// and right files be already sorted (lexically ascending) by the join fields.
// Then the example inputs look like this:
//
// +-----+-----+
// | L | R |
// + --- + --- +
// | a | a |
// | a | b |
// | a | f |
// | b | |
// | c | |
// | c | |
// | d | |
// +-----+-----+
//
// The right file is still read one record at a time. It's the job of this
// join_bucket_keeper class to keep track of the left-file buckets, one bucket
// at a time. This includes all records with same values for the join
// field(s), e.g. the three L=a records, as well as a "peek" record which is
// either the next record with a different join value (e.g. the L=b record), or
// an end-of-file indicator.
//
// If a right-file record has join field matching the current left-file bucket,
// then it's paired with all records in that bucket. Otherwise the
// join_bucket_keeper needs to either stay with the current bucket or advance
// to the next one, depending whether the current right-file record's
// join-field values compare lexically with the the left-file bucket's
// join-field values.
//
// Examples:
//
// +-----------+-----------+-----------+-----------+-----------+-----------+
// | L R | L R | L R | L R | L R | L R |
// + --- --- + --- --- + --- --- + --- --- + --- --- + --- --- +
// | a | a | e | a | e e | e e |
// | b | e | e | e e | e | e e |
// | e | e | e | e | e | e |
// | e | e | f | e | f | g g |
// | e | f | g | g | g | g |
// | g | g | g | g | g | |
// | g | g | h | | | |
// +-----------+-----------+-----------+-----------+-----------+-----------+
//
// In all these examples, the join_bucket_keeper goes through these steps:
// * bucket is empty, peek rec has L=e
// * bucket is L=e records, peek rec has L=g
// * bucket is L=g records, peek rec is null (due to EOF)
// * bucket is empty, peek rec is null (due to EOF)
//
// Example 1:
// * left-bucket is empty and left-peek has L=e
// * right record has R=a; join_bucket_keeper does not advance
// * right record has R=b; join_bucket_keeper does not advance
// * right end of file; all left records are unpaired.
//
// Example 2:
// * left-bucket is empty and left-peek has L=e
// * right record has R=a; join_bucket_keeper does not advance
// * right record has R=f; left records with L=e are unpaired.
// * etc.
//
// ================================================================
// ----------------------------------------------------------------
#define LEFT_STATE_0_PREFILL 0
#define LEFT_STATE_1_FULL 1
#define LEFT_STATE_2_LAST_BUCKET 2
#define LEFT_STATE_3_EOF 3
// ----------------------------------------------------------------
// (0) pre-fill: Lv == null, peek == null, leof = false
// (1) midstream: Lv != null, peek != null, leof = false
// (2) last bucket: Lv != null, peek == null, leof = true
// (3) leof: Lv == null, peek == null, leof = true
// ----------------------------------------------------------------
// Private methods
static int join_bucket_keeper_get_state(join_bucket_keeper_t* pkeeper);
static void join_bucket_keeper_initial_fill(join_bucket_keeper_t* pkeeper,
sllv_t** pprecords_left_unpaired);
static void join_bucket_keeper_advance_to(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired);
static void join_bucket_keeper_fill(join_bucket_keeper_t* pkeeper, sllv_t** pprecords_left_unpaired);
static void join_bucket_keeper_drain(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired);
static char* describe_state(int state);
// ----------------------------------------------------------------
join_bucket_keeper_t* join_bucket_keeper_alloc(
char* prepipe,
char* left_file_name,
cli_reader_opts_t* popts,
slls_t* pleft_field_names
) {
lrec_reader_t* plrec_reader = lrec_reader_alloc(popts);
return join_bucket_keeper_alloc_from_reader(plrec_reader, prepipe, left_file_name, pleft_field_names);
}
// ----------------------------------------------------------------
join_bucket_keeper_t* join_bucket_keeper_alloc_from_reader(
lrec_reader_t* plrec_reader,
char* prepipe,
char* left_file_name,
slls_t* pleft_field_names)
{
join_bucket_keeper_t* pkeeper = mlr_malloc_or_die(sizeof(join_bucket_keeper_t));
void* pvhandle = plrec_reader->popen_func(plrec_reader->pvstate, prepipe, left_file_name);
plrec_reader->psof_func(plrec_reader->pvstate, pvhandle);
context_t* pctx = mlr_malloc_or_die(sizeof(context_t));
context_init_from_first_file_name(pctx, left_file_name);
pkeeper->plrec_reader = plrec_reader;
pkeeper->pvhandle = pvhandle;
pkeeper->pctx = pctx;
pkeeper->pleft_field_names = pleft_field_names;
pkeeper->pbucket = mlr_malloc_or_die(sizeof(join_bucket_t));
pkeeper->pbucket->precords = sllv_alloc();
pkeeper->pbucket->pleft_field_values = NULL;
pkeeper->pbucket->was_paired = FALSE;
pkeeper->prec_peek = NULL;
pkeeper->leof = FALSE;
pkeeper->state = LEFT_STATE_0_PREFILL;
return pkeeper;
}
// ----------------------------------------------------------------
void join_bucket_keeper_free(join_bucket_keeper_t* pkeeper, char* prepipe) {
if (pkeeper == NULL)
return;
slls_free(pkeeper->pbucket->pleft_field_values);
sllv_free(pkeeper->pbucket->precords);
free(pkeeper->pbucket);
pkeeper->plrec_reader->pclose_func(pkeeper->plrec_reader->pvstate, pkeeper->pvhandle, prepipe);
pkeeper->plrec_reader->pfree_func(pkeeper->plrec_reader);
lrec_free(pkeeper->prec_peek);
free(pkeeper->pctx);
free(pkeeper);
}
// ----------------------------------------------------------------
void join_bucket_keeper_emit(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired)
{
*pprecords_paired = NULL;
*pprecords_left_unpaired = NULL;
int cmp = 0;
if (pkeeper->state == LEFT_STATE_0_PREFILL) {
join_bucket_keeper_initial_fill(pkeeper, pprecords_left_unpaired);
pkeeper->state = join_bucket_keeper_get_state(pkeeper);
}
if (pright_field_values != NULL) { // Not right EOF
if (pkeeper->state == LEFT_STATE_1_FULL || pkeeper->state == LEFT_STATE_2_LAST_BUCKET) {
cmp = slls_compare_lexically(pkeeper->pbucket->pleft_field_values, pright_field_values);
if (cmp < 0) {
// Advance left until match or left EOF.
join_bucket_keeper_advance_to(pkeeper, pright_field_values, pprecords_paired, pprecords_left_unpaired);
} else if (cmp == 0) {
pkeeper->pbucket->was_paired = TRUE;
*pprecords_paired = pkeeper->pbucket->precords;
} else {
// No match and no need to advance left; return null lists.
}
} else if (pkeeper->state != LEFT_STATE_3_EOF) {
fprintf(stderr, "%s: internal coding error: failed transition from prefill state.\n",
MLR_GLOBALS.bargv0);
exit(1);
}
} else { // Right EOF: return the final left-unpaireds.
join_bucket_keeper_drain(pkeeper, pright_field_values, pprecords_paired, pprecords_left_unpaired);
}
pkeeper->state = join_bucket_keeper_get_state(pkeeper);
}
// ----------------------------------------------------------------
static int join_bucket_keeper_get_state(join_bucket_keeper_t* pkeeper) {
if (pkeeper->pbucket->pleft_field_values == NULL) {
if (pkeeper->leof)
return LEFT_STATE_3_EOF;
else
return LEFT_STATE_0_PREFILL;
} else {
if (pkeeper->prec_peek == NULL)
return LEFT_STATE_2_LAST_BUCKET;
else
return LEFT_STATE_1_FULL;
}
}
static void join_bucket_keeper_initial_fill(join_bucket_keeper_t* pkeeper,
sllv_t** pprecords_left_unpaired)
{
while (TRUE) {
// Skip over records not having the join keys. These go straight to the
// left-unpaired list.
pkeeper->prec_peek = pkeeper->plrec_reader->pprocess_func(pkeeper->plrec_reader->pvstate,
pkeeper->pvhandle, pkeeper->pctx);
if (pkeeper->prec_peek == NULL) {
break;
}
if (record_has_all_keys(pkeeper->prec_peek, pkeeper->pleft_field_names)) {
break;
} else {
if (*pprecords_left_unpaired == NULL)
*pprecords_left_unpaired = sllv_alloc();
sllv_append(*pprecords_left_unpaired, pkeeper->prec_peek);
}
}
if (pkeeper->prec_peek == NULL) {
pkeeper->leof = TRUE;
return;
}
join_bucket_keeper_fill(pkeeper, pprecords_left_unpaired);
}
// Preconditions:
// * prec_peek != NULL
// * prec_peek has the join keys
static void join_bucket_keeper_fill(join_bucket_keeper_t* pkeeper, sllv_t** pprecords_left_unpaired) {
slls_t* pleft_field_values = mlr_reference_selected_values_from_record(pkeeper->prec_peek,
pkeeper->pleft_field_names);
if (pleft_field_values == NULL) {
fprintf(stderr, "%s: internal coding error: peek record should have had join keys.\n",
MLR_GLOBALS.bargv0);
exit(1);
}
pkeeper->pbucket->pleft_field_values = slls_copy(pleft_field_values);
slls_free(pleft_field_values);
sllv_append(pkeeper->pbucket->precords, pkeeper->prec_peek);
pkeeper->pbucket->was_paired = FALSE;
pkeeper->prec_peek = NULL;
while (TRUE) {
// Skip over records not having the join keys. These go straight to the
// left-unpaired list.
pkeeper->prec_peek = pkeeper->plrec_reader->pprocess_func(pkeeper->plrec_reader->pvstate,
pkeeper->pvhandle, pkeeper->pctx);
if (pkeeper->prec_peek == NULL) {
pkeeper->leof = TRUE;
break;
}
if (record_has_all_keys(pkeeper->prec_peek, pkeeper->pleft_field_names)) {
int cmp = slls_lrec_compare_lexically(
pkeeper->pbucket->pleft_field_values,
pkeeper->prec_peek,
pkeeper->pleft_field_names);
if (cmp != 0) {
break;
}
sllv_append(pkeeper->pbucket->precords, pkeeper->prec_peek);
} else {
if (*pprecords_left_unpaired == NULL)
*pprecords_left_unpaired = sllv_alloc();
sllv_append(*pprecords_left_unpaired, pkeeper->prec_peek);
}
pkeeper->prec_peek = NULL;
}
}
// Pre-conditions:
// * pkeeper->pleft_field_values < pright_field_values.
// * currently in state 1 or 2 so there is a bucket but there may or may not be a peek-record
// * current bucket was/wasn't paired on previous emits but is not paired on this emit.
// Actions:
// * if bucket was never paired, return it to the caller; else discard.
// * consume left input stream, feeding into unpaired, for as long as leftvals < rightvals && !eof.
// * if there is leftrec with vals == rightvals: parallel initial_fill.
// else, mimic initial_fill.
static void join_bucket_keeper_advance_to(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired)
{
if (pkeeper->pbucket->was_paired) {
while (pkeeper->pbucket->precords->phead)
lrec_free(sllv_pop(pkeeper->pbucket->precords));
sllv_free(pkeeper->pbucket->precords);
pkeeper->pbucket->precords = NULL;
} else {
if (*pprecords_left_unpaired == NULL) {
*pprecords_left_unpaired = pkeeper->pbucket->precords;
} else {
sllv_transfer(*pprecords_left_unpaired, pkeeper->pbucket->precords);
}
}
pkeeper->pbucket->precords = sllv_alloc();
if (pkeeper->pbucket->pleft_field_values != NULL) {
slls_free(pkeeper->pbucket->pleft_field_values);
pkeeper->pbucket->pleft_field_values = NULL;
}
pkeeper->pbucket->was_paired = FALSE;
if (pkeeper->prec_peek == NULL) { // left EOF
return;
}
// Need a double condition here ... the peek record is either het or hom.
// (Or, change that: -> ensure elsewhere the peek record is hom.)
// The former is destined for lunp and shouldn't be lexcmped. The latter
// should be.
int cmp = lrec_slls_compare_lexically(pkeeper->prec_peek, pkeeper->pleft_field_names, pright_field_values);
if (cmp < 0) {
// keep seeking & filling the bucket until = or >; this may or may not end up being a match.
if (*pprecords_left_unpaired == NULL)
*pprecords_left_unpaired = sllv_alloc();
while (TRUE) {
sllv_append(*pprecords_left_unpaired, pkeeper->prec_peek);
pkeeper->prec_peek = NULL;
while (TRUE) {
// Skip over records not having the join keys. These go straight to the
// left-unpaired list.
pkeeper->prec_peek = pkeeper->plrec_reader->pprocess_func(pkeeper->plrec_reader->pvstate,
pkeeper->pvhandle, pkeeper->pctx);
if (pkeeper->prec_peek == NULL)
break;
if (record_has_all_keys(pkeeper->prec_peek, pkeeper->pleft_field_names)) {
break;
} else {
if (*pprecords_left_unpaired == NULL)
*pprecords_left_unpaired = sllv_alloc();
sllv_append(*pprecords_left_unpaired, pkeeper->prec_peek);
}
}
if (pkeeper->prec_peek == NULL) {
pkeeper->leof = TRUE;
break;
}
cmp = lrec_slls_compare_lexically(pkeeper->prec_peek, pkeeper->pleft_field_names, pright_field_values);
if (cmp >= 0)
break;
}
}
if (cmp == 0) {
join_bucket_keeper_fill(pkeeper, pprecords_left_unpaired);
pkeeper->pbucket->was_paired = TRUE;
*pprecords_paired = pkeeper->pbucket->precords;
} else if (cmp > 0) {
join_bucket_keeper_fill(pkeeper, pprecords_left_unpaired);
}
}
static void join_bucket_keeper_drain(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired)
{
// 1. Any records already in pkeeper->pbucket->precords (current bucket)
if (pkeeper->pbucket->was_paired) {
if (*pprecords_left_unpaired == NULL)
*pprecords_left_unpaired = sllv_alloc();
} else {
if (*pprecords_left_unpaired == NULL) {
*pprecords_left_unpaired = pkeeper->pbucket->precords;
} else {
sllv_transfer(*pprecords_left_unpaired, pkeeper->pbucket->precords);
sllv_free(pkeeper->pbucket->precords);
}
}
// 2. Peek-record, if any
if (pkeeper->prec_peek != NULL) {
sllv_append(*pprecords_left_unpaired, pkeeper->prec_peek);
pkeeper->prec_peek = NULL;
}
// 3. Remainder of left input stream
while (TRUE) {
lrec_t* prec = pkeeper->plrec_reader->pprocess_func(pkeeper->plrec_reader->pvstate,
pkeeper->pvhandle, pkeeper->pctx);
if (prec == NULL)
break;
sllv_append(*pprecords_left_unpaired, prec);
}
pkeeper->pbucket->precords = NULL;
}
// ----------------------------------------------------------------
void join_bucket_keeper_print(join_bucket_keeper_t* pkeeper) {
printf("pbucket at %p:\n", pkeeper);
printf(" pvhandle = %p\n", pkeeper->pvhandle);
context_print(pkeeper->pctx, " ");
printf(" pleft_field_names = ");
slls_print(pkeeper->pleft_field_names);
printf("\n");
join_bucket_print(pkeeper->pbucket, " ");
printf(" prec_peek = ");
if (pkeeper->prec_peek == NULL) {
printf("null\n");
} else {
lrec_print(pkeeper->prec_peek);
}
printf(" leof = %d\n", pkeeper->leof);
printf(" state = %s\n", describe_state(pkeeper->state));
}
void join_bucket_keeper_print_aux(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired)
{
join_bucket_keeper_print(pkeeper);
printf(" pright_field_values = ");
slls_print(pright_field_values);
printf("\n");
printf(" precords_paired =\n");
lrec_print_list_with_prefix(*pprecords_paired, " ");
printf("\n");
printf(" precords_left_unpaired =\n");
lrec_print_list_with_prefix(*pprecords_left_unpaired, " ");
printf("\n");
}
void join_bucket_print(join_bucket_t* pbucket, char* indent) {
printf("%spbucket at %p:\n", indent, pbucket);
printf("%s pleft_field_values = ", indent);
slls_print(pbucket->pleft_field_values);
printf("\n");
if (pbucket->precords == NULL) {
printf("%s precords:\n", indent);
printf("%s (null)\n", indent);
} else {
printf("%s precords (length=%llu):\n", indent, pbucket->precords->length);
lrec_print_list_with_prefix(pbucket->precords, " ");
}
printf("%s was_paired = %d\n", indent, pbucket->was_paired);
}
static char* describe_state(int state) {
switch (state) {
case LEFT_STATE_0_PREFILL: return "LEFT_STATE_0_PREFILL";
case LEFT_STATE_1_FULL: return "LEFT_STATE_1_FULL";
case LEFT_STATE_2_LAST_BUCKET: return "LEFT_STATE_2_LAST_BUCKET";
case LEFT_STATE_3_EOF: return "LEFT_STATE_3_EOF";
default: return "???";
}
}

View file

@ -0,0 +1,65 @@
// ================================================================
// Data structures for mlr join, sorted case. Please see descriptive
// comments at the top of join_bucket_keeper.h.
// ================================================================
#ifndef JOIN_BUCKET_KEEPER_H
#define JOIN_BUCKET_KEEPER_H
#include "lib/context.h"
#include "cli/mlrcli.h"
#include "containers/lrec.h"
#include "containers/slls.h"
#include "containers/sllv.h"
#include "input/lrec_reader.h"
// ----------------------------------------------------------------
typedef struct _join_bucket_t {
slls_t* pleft_field_values;
sllv_t* precords;
int was_paired;
} join_bucket_t;
typedef struct _join_bucket_keeper_t {
lrec_reader_t* plrec_reader;
void* pvhandle;
context_t* pctx;
slls_t* pleft_field_names;
join_bucket_t* pbucket;
lrec_t* prec_peek;
int leof;
int state;
} join_bucket_keeper_t;
// ----------------------------------------------------------------
join_bucket_keeper_t* join_bucket_keeper_alloc(
char* prepipe,
char* left_file_name,
cli_reader_opts_t* popts,
slls_t* pleft_field_names);
join_bucket_keeper_t* join_bucket_keeper_alloc_from_reader(
lrec_reader_t* plrec_reader,
char* prepipe,
char* left_file_name,
slls_t* pleft_field_names);
void join_bucket_keeper_free(join_bucket_keeper_t* pkeeper, char* prepipe);
// *pprecords_paired should not be freed by the caller.
// *pprecords_left_unpaired should be freed by the caller.
void join_bucket_keeper_emit(
join_bucket_keeper_t* pkeeper,
slls_t* pright_field_values,
sllv_t** pprecords_paired,
sllv_t** pprecords_left_unpaired);
void join_bucket_print(join_bucket_t* pbucket, char* indent);
void join_bucket_keeper_print(join_bucket_keeper_t* pkeeper);
void join_bucket_keeper_print_aux(join_bucket_keeper_t* pkeeper, slls_t* pright_field_values,
sllv_t** pprecords_paired, sllv_t** pprecords_left_unpaired);
#endif // JOIN_BUCKET_KEEPER_H

300
c/containers/lhms2v.c Normal file
View file

@ -0,0 +1,300 @@
// ================================================================
// Array-only (open addressing) string-list-to-void-star linked hash map with
// linear probing for collisions.
//
// John Kerl 2014-12-22
//
// Notes:
// * null key is not supported.
// * null value is not supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhms2v.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void* lhms2v_put_no_enlarge(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags);
static void lhms2v_enlarge(lhms2v_t* pmap);
// ================================================================
static void lhms2v_init(lhms2v_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhms2ve_t*)mlr_malloc_or_die(sizeof(lhms2ve_t) * length);
// Don't do lhms2ve_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhms2ve_state_t*)mlr_malloc_or_die(sizeof(lhms2ve_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhms2v_t* lhms2v_alloc() {
lhms2v_t* pmap = mlr_malloc_or_die(sizeof(lhms2v_t));
lhms2v_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
// void-star payloads should first be freed by the caller.
void lhms2v_free(lhms2v_t* pmap) {
if (pmap == NULL)
return;
for (lhms2ve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key1);
free(pe->key2);
}
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhms2v_find_index_for_key(lhms2v_t* pmap, char* key1, char* key2, int* pideal_index) {
int hash = mlr_string_pair_hash_func(key1, key2);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
int done = 0;
while (!done) {
lhms2ve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey1 = pe->key1;
char* ekey2 = pe->key2;
// Existing key found in chain.
if (streq(key1, ekey1) && streq(key2, ekey2))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been deleted, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void* lhms2v_put(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhms2v_enlarge(pmap);
return lhms2v_put_no_enlarge(pmap, key1, key2, pvvalue, free_flags);
}
static void* lhms2v_put_no_enlarge(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags) {
int ideal_index = 0;
int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
lhms2ve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
pe->pvvalue = pvvalue;
}
else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key1 = key1;
pe->key2 = key2;
pe->pvvalue = pvvalue;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
}
else {
fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
return pvvalue;
}
// ----------------------------------------------------------------
void* lhms2v_get(lhms2v_t* pmap, char* key1, char* key2) {
int ideal_index = 0;
int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
lhms2ve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe->pvvalue;
else if (pmap->states[index] == EMPTY)
return NULL;
else {
fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhms2v_has_key(lhms2v_t* pmap, char* key1, char* key2) {
int ideal_index = 0;
int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhms2v_size(lhms2v_t* pmap) {
return pmap->num_occupied;
}
// ----------------------------------------------------------------
static void lhms2v_enlarge(lhms2v_t* pmap) {
lhms2ve_t* old_entries = pmap->entries;
lhms2ve_state_t* old_states = pmap->states;
lhms2ve_t* old_head = pmap->phead;
lhms2v_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhms2ve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhms2v_put_no_enlarge(pmap, pe->key1, pe->key2, pe->pvvalue, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
int lhms2v_check_counts(lhms2v_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "deleted"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhms2v_print(lhms2v_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhms2ve_t* pe = &pmap->entries[index];
const char* key1_string = (pe == NULL) ? "none" :
pe->key1 == NULL ? "null" :
pe->key1;
const char* key2_string = (pe == NULL) ? "none" :
pe->key2 == NULL ? "null" :
pe->key2;
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key1: %12s | key2: %12s | pvvalue: %12s |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index,
key1_string, key2_string, value_string);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhms2ve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key1_string = (pe == NULL) ? "none" :
pe->key1 == NULL ? "null" :
pe->key1;
const char* key2_string = (pe == NULL) ? "none" :
pe->key2 == NULL ? "null" :
pe->key2;
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key1: %12s | key2: %12s | pvvalue: %12s |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key1_string, key2_string, value_string);
}
}

55
c/containers/lhms2v.h Normal file
View file

@ -0,0 +1,55 @@
// ================================================================
// Array-only (open addressing) string-pair-to-void-star linked hash map with
// linear probing for collisions.
//
// John Kerl 2014-12-22
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMS2V_H
#define LHMS2V_H
#include "lib/free_flags.h"
// ----------------------------------------------------------------
typedef struct _lhms2ve_t {
int ideal_index;
char* key1;
char* key2;
void* pvvalue;
char free_flags;
struct _lhms2ve_t *pprev;
struct _lhms2ve_t *pnext;
} lhms2ve_t;
typedef unsigned char lhms2ve_state_t;
// ----------------------------------------------------------------
typedef struct _lhms2v_t {
int num_occupied;
int num_freed;
int array_length;
lhms2ve_t* entries;
lhms2ve_state_t* states;
lhms2ve_t* phead;
lhms2ve_t* ptail;
} lhms2v_t;
lhms2v_t* lhms2v_alloc();
void lhms2v_free(lhms2v_t* pmap);
void* lhms2v_put(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags);
void* lhms2v_get(lhms2v_t* pmap, char* key1, char* key2);
int lhms2v_has_key(lhms2v_t* pmap, char* key1, char* key2);
int lhms2v_size(lhms2v_t* pmap);
// Unit-test hook
int lhms2v_check_counts(lhms2v_t* pmap);
#endif // LHMS2V_H

322
c/containers/lhmsi.c Normal file
View file

@ -0,0 +1,322 @@
// ================================================================
// Array-only (open addressing) string-to-string linked hash map with linear
// probing for collisions.
//
// Keys are not strduped.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmsi.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void lhmsi_put_no_enlarge(lhmsi_t* pmap, char* key, int value, char free_flags);
static void lhmsi_enlarge(lhmsi_t* pmap);
// ================================================================
static void lhmsi_init(lhmsi_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmsie_t*)mlr_malloc_or_die(sizeof(lhmsie_t) * length);
// Don't do lhmsie_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmsie_state_t*)mlr_malloc_or_die(sizeof(lhmsie_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmsi_t* lhmsi_alloc() {
lhmsi_t* pmap = mlr_malloc_or_die(sizeof(lhmsi_t));
lhmsi_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
lhmsi_t* lhmsi_copy(lhmsi_t* pmap) {
lhmsi_t* pnew = lhmsi_alloc();
for (lhmsie_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
lhmsi_put(pnew, mlr_strdup_or_die(pe->key), pe->value, FREE_ENTRY_KEY);
return pnew;
}
void lhmsi_free(lhmsi_t* pmap) {
if (pmap == NULL)
return;
for (lhmsie_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmsi_find_index_for_key(lhmsi_t* pmap, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmsie_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been deleted, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
}
// ----------------------------------------------------------------
void lhmsi_put(lhmsi_t* pmap, char* key, int value, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmsi_enlarge(pmap);
lhmsi_put_no_enlarge(pmap, key, value, free_flags);
}
static void lhmsi_put_no_enlarge(lhmsi_t* pmap, char* key, int value, char free_flags) {
int ideal_index = 0;
int index = lhmsi_find_index_for_key(pmap, key, &ideal_index);
lhmsie_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
pe->value = value;
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmsi_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsi_get(lhmsi_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsi_find_index_for_key(pmap, key, &ideal_index);
lhmsie_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe->value;
else if (pmap->states[index] == EMPTY)
return -999; // caller must do lhmsi_has_key to check validity
else {
fprintf(stderr, "%s: lhmsi_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsi_test_and_get(lhmsi_t* pmap, char* key, int* pval) {
int ideal_index = 0;
int index = lhmsi_find_index_for_key(pmap, key, &ideal_index);
lhmsie_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
*pval = pe->value;
return TRUE;
} else if (pmap->states[index] == EMPTY) {
return FALSE;
} else {
fprintf(stderr, "%s: lhmsi_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
lhmsie_t* lhmsi_get_entry(lhmsi_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsi_find_index_for_key(pmap, key, &ideal_index);
lhmsie_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe;
else if (pmap->states[index] == EMPTY)
return NULL;
else {
fprintf(stderr, "%s: lhmsi_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsi_has_key(lhmsi_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsi_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmsi_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
void lhmsi_rename(lhmsi_t* pmap, char* old_key, char* new_key) {
fprintf(stderr, "rename is not supported in the hashed-record impl.\n");
exit(1);
}
// ----------------------------------------------------------------
static void lhmsi_enlarge(lhmsi_t* pmap) {
lhmsie_t* old_entries = pmap->entries;
lhmsie_state_t* old_states = pmap->states;
lhmsie_t* old_head = pmap->phead;
lhmsi_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmsie_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmsi_put_no_enlarge(pmap, pe->key, pe->value, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
int lhmsi_check_counts(lhmsi_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "deleted"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhmsi_print(lhmsi_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsie_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | value: %8d |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, pe->value);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhmsie_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %8d |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, pe->value);
}
}

54
c/containers/lhmsi.h Normal file
View file

@ -0,0 +1,54 @@
// ================================================================
// Array-only (open addressing) string-to-int linked hash map with linear
// probing for collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSI_H
#define LHMSI_H
// ----------------------------------------------------------------
typedef struct _lhmsie_t {
int ideal_index;
char* key;
int value;
char free_flags;
struct _lhmsie_t *pprev;
struct _lhmsie_t *pnext;
} lhmsie_t;
typedef unsigned char lhmsie_state_t;
typedef struct _lhmsi_t {
int num_occupied;
int num_freed;
int array_length;
lhmsie_t* entries;
lhmsie_state_t* states;
lhmsie_t* phead;
lhmsie_t* ptail;
} lhmsi_t;
// ----------------------------------------------------------------
lhmsi_t* lhmsi_alloc();
lhmsi_t* lhmsi_copy(lhmsi_t* pmap);
void lhmsi_free(lhmsi_t* pmap);
void lhmsi_put(lhmsi_t* pmap, char* key, int value, char free_flags);
int lhmsi_get(lhmsi_t* pmap, char* key); // caller must do lhmsi_has_key to check validity
int lhmsi_test_and_get(lhmsi_t* pmap, char* key, int* pval); // *pval undefined if return is FALSE
lhmsie_t* lhmsi_get_entry(lhmsi_t* pmap, char* key);
int lhmsi_has_key(lhmsi_t* pmap, char* key);
// Unit-test hook
int lhmsi_check_counts(lhmsi_t* pmap);
#endif // LHMSI_H

339
c/containers/lhmsll.c Normal file
View file

@ -0,0 +1,339 @@
// ================================================================
// Array-only (open addressing) string-to-string linked hash map with linear
// probing for collisions.
//
// Keys are not strduped.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmsll.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags);
static void lhmsll_enlarge(lhmsll_t* pmap);
// ================================================================
static void lhmsll_init(lhmsll_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmslle_t*)mlr_malloc_or_die(sizeof(lhmslle_t) * length);
// Don't do lhmslle_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmslle_state_t*)mlr_malloc_or_die(sizeof(lhmslle_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmsll_t* lhmsll_alloc() {
lhmsll_t* pmap = mlr_malloc_or_die(sizeof(lhmsll_t));
lhmsll_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
lhmsll_t* lhmsll_copy(lhmsll_t* pmap) {
lhmsll_t* pnew = lhmsll_alloc();
for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
lhmsll_put(pnew, mlr_strdup_or_die(pe->key), pe->value, FREE_ENTRY_KEY);
return pnew;
}
void lhmsll_free(lhmsll_t* pmap) {
if (pmap == NULL)
return;
for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmsll_find_index_for_key(lhmsll_t* pmap, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been deleted, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void lhmsll_put(lhmsll_t* pmap, char* key, int value, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmsll_enlarge(pmap);
lhmsll_put_no_enlarge(pmap, key, value, free_flags);
}
static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
pe->value = value;
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
long long lhmsll_get(lhmsll_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe->value;
else if (pmap->states[index] == EMPTY)
return -999; // caller must do lhmsll_has_key to check validity
else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsll_test_and_get(lhmsll_t* pmap, char* key, long long* pval) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
*pval = pe->value;
return TRUE;
} else if (pmap->states[index] == EMPTY) {
return FALSE;
} else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
int lhmsll_test_and_increment(lhmsll_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
pe->value++;
return TRUE;
} else if (pmap->states[index] == EMPTY) {
return FALSE;
} else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
lhmslle_t* lhmsll_get_entry(lhmsll_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
lhmslle_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe;
else if (pmap->states[index] == EMPTY)
return NULL;
else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsll_has_key(lhmsll_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
void lhmsll_rename(lhmsll_t* pmap, char* old_key, char* new_key) {
fprintf(stderr, "rename is not supported in the hashed-record impl.\n");
exit(1);
}
// ----------------------------------------------------------------
static void lhmsll_enlarge(lhmsll_t* pmap) {
lhmslle_t* old_entries = pmap->entries;
lhmslle_state_t* old_states = pmap->states;
lhmslle_t* old_head = pmap->phead;
lhmsll_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmslle_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmsll_put_no_enlarge(pmap, pe->key, pe->value, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
int lhmsll_check_counts(lhmsll_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "deleted"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhmsll_print(lhmsll_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmslle_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | value: %8lld |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, pe->value);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %8lld |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, pe->value);
}
}

57
c/containers/lhmsll.h Normal file
View file

@ -0,0 +1,57 @@
// ================================================================
// Array-only (open addressing) string-to-int linked hash map with linear
// probing for collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSLL_H
#define LHMSLL_H
// ----------------------------------------------------------------
typedef struct _lhmslle_t {
int ideal_index;
char* key;
long long value;
char free_flags;
struct _lhmslle_t *pprev;
struct _lhmslle_t *pnext;
} lhmslle_t;
typedef unsigned char lhmslle_state_t;
typedef struct _lhmsll_t {
int num_occupied;
int num_freed;
int array_length;
lhmslle_t* entries;
lhmslle_state_t* states;
lhmslle_t* phead;
lhmslle_t* ptail;
} lhmsll_t;
// ----------------------------------------------------------------
lhmsll_t* lhmsll_alloc();
lhmsll_t* lhmsll_copy(lhmsll_t* pmap);
void lhmsll_free(lhmsll_t* pmap);
void lhmsll_put(lhmsll_t* pmap, char* key, int value, char free_flags);
long long lhmsll_get(lhmsll_t* pmap, char* key); // caller must do lhmsll_has_key to check validity
int lhmsll_test_and_get(lhmsll_t* pmap, char* key, long long* pval); // *pval undefined if return is FALSE
int lhmsll_test_and_increment(lhmsll_t* pmap, char* key); // increments value only if mapping exists
lhmslle_t* lhmsll_get_entry(lhmsll_t* pmap, char* key);
int lhmsll_has_key(lhmsll_t* pmap, char* key);
// Unit-test hook
int lhmsll_check_counts(lhmsll_t* pmap);
#endif // LHMSLL_H

288
c/containers/lhmslv.c Normal file
View file

@ -0,0 +1,288 @@
// ================================================================
// Array-only (open addressing) string-list-to-void-star linked hash map with
// linear probing for collisions.
//
// John Kerl 2014-12-22
//
// Notes:
// * null key is not supported.
// * null value is not supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmslv.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void* lhmslv_put_no_enlarge(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags);
static void lhmslv_enlarge(lhmslv_t* pmap);
// ================================================================
static void lhmslv_init(lhmslv_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmslve_t*)mlr_malloc_or_die(sizeof(lhmslve_t) * length);
// Don't do lhmslve_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmslve_state_t*)mlr_malloc_or_die(sizeof(lhmslve_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmslv_t* lhmslv_alloc() {
lhmslv_t* pmap = mlr_malloc_or_die(sizeof(lhmslv_t));
lhmslv_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
// void-star payloads should first be freed by the caller.
void lhmslv_free(lhmslv_t* pmap) {
if (pmap == NULL)
return;
for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
if (pe->free_flags & FREE_ENTRY_KEY)
slls_free(pe->key);
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmslv_find_index_for_key(lhmslv_t* pmap, slls_t* key, int* pideal_index) {
int hash = slls_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmslve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
slls_t* ekey = pe->key;
// Existing key found in chain.
if (slls_equals(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been freed, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void* lhmslv_put(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmslv_enlarge(pmap);
return lhmslv_put_no_enlarge(pmap, key, pvvalue, free_flags);
}
static void* lhmslv_put_no_enlarge(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags) {
int ideal_index = 0;
int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
lhmslve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
pe->pvvalue = pvvalue;
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->free_flags = free_flags;
pe->pvvalue = pvvalue;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
return pvvalue;
}
// ----------------------------------------------------------------
void* lhmslv_get(lhmslv_t* pmap, slls_t* key) {
int ideal_index = 0;
int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
lhmslve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe->pvvalue;
else if (pmap->states[index] == EMPTY)
return NULL;
else {
fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmslv_has_key(lhmslv_t* pmap, slls_t* key) {
int ideal_index = 0;
int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmslv_size(lhmslv_t* pmap) {
return pmap->num_occupied;
}
// ----------------------------------------------------------------
static void lhmslv_enlarge(lhmslv_t* pmap) {
lhmslve_t* old_entries = pmap->entries;
lhmslve_state_t* old_states = pmap->states;
lhmslve_t* old_head = pmap->phead;
lhmslv_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmslve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmslv_put_no_enlarge(pmap, pe->key, pe->pvvalue, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
int lhmslv_check_counts(lhmslv_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"freed-count mismatch: actual %d != cached %d\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "freed"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhmslv_print(lhmslv_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmslve_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
slls_join(pe->key, ",");
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | pvvalue: %12s |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, value_string);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
slls_join(pe->key, ",");
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | pvvalue: %12s |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, value_string);
}
}

54
c/containers/lhmslv.h Normal file
View file

@ -0,0 +1,54 @@
// ================================================================
// Array-only (open addressing) string-list-to-void-star linked hash map with
// linear probing for collisions.
//
// John Kerl 2014-12-22
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSLV_H
#define LHMSLV_H
#include "containers/slls.h"
// ----------------------------------------------------------------
typedef struct _lhmslve_t {
int ideal_index;
slls_t* key;
void* pvvalue;
char free_flags;
struct _lhmslve_t *pprev;
struct _lhmslve_t *pnext;
} lhmslve_t;
typedef unsigned char lhmslve_state_t;
// ----------------------------------------------------------------
typedef struct _lhmslv_t {
int num_occupied;
int num_freed;
int array_length;
lhmslve_t* entries;
lhmslve_state_t* states;
lhmslve_t* phead;
lhmslve_t* ptail;
} lhmslv_t;
lhmslv_t* lhmslv_alloc();
void lhmslv_free(lhmslv_t* pmap);
void* lhmslv_put(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags);
void* lhmslv_get(lhmslv_t* pmap, slls_t* key);
int lhmslv_has_key(lhmslv_t* pmap, slls_t* key);
int lhmslv_size(lhmslv_t* pmap);
// Unit-test hook
int lhmslv_check_counts(lhmslv_t* pmap);
#endif // LHMSLV_H

297
c/containers/lhmsmv.c Normal file
View file

@ -0,0 +1,297 @@
// ================================================================
// Array-only (open addressing) string-to-mlrval linked hash map with linear
// probing for collisions.
//
// Keys and values are not strduped.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmsmv.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 32
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void lhmsmv_put_no_enlarge(lhmsmv_t* pmap, char* key, mv_t* pvalue, char free_flags);
static void lhmsmv_enlarge(lhmsmv_t* pmap);
static void lhmsmv_init(lhmsmv_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmsmve_t*)mlr_malloc_or_die(sizeof(lhmsmve_t) * length);
// Don't do a memset of all entries at init time, since this has a drastic
// effect on the time needed to construct an empty map (and Miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmsmve_state_t*)mlr_malloc_or_die(sizeof(lhmsmve_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmsmv_t* lhmsmv_alloc() {
lhmsmv_t* pmap = mlr_malloc_or_die(sizeof(lhmsmv_t));
lhmsmv_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
// ----------------------------------------------------------------
lhmsmv_t* lhmsmv_copy(lhmsmv_t* pold) {
lhmsmv_t* pnew = lhmsmv_alloc();
for (lhmsmve_t* pe = pold->phead; pe != NULL; pe = pe->pnext) {
char* nkey = mlr_strdup_or_die(pe->key);
mv_t nval = mv_copy(&pe->value);
lhmsmv_put(pnew, nkey, &nval, FREE_ENTRY_KEY | FREE_ENTRY_VALUE);
}
return pnew;
}
// ----------------------------------------------------------------
void lhmsmv_clear(lhmsmv_t* pmap) {
if (pmap == NULL)
return;
for (lhmsmve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
if (pe->free_flags & FREE_ENTRY_VALUE)
mv_free(&pe->value);
}
pmap->num_occupied = 0;
pmap->num_freed = 0;
memset(pmap->states, EMPTY, pmap->array_length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
// ----------------------------------------------------------------
void lhmsmv_free(lhmsmv_t* pmap) {
if (pmap == NULL)
return;
for (lhmsmve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
if (pe->free_flags & FREE_ENTRY_VALUE)
mv_free(&pe->value);
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmsmv_find_index_for_key(lhmsmv_t* pmap, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmsmve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been freed, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void lhmsmv_put(lhmsmv_t* pmap, char* key, mv_t* pvalue, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmsmv_enlarge(pmap);
lhmsmv_put_no_enlarge(pmap, key, pvalue, free_flags);
}
static void lhmsmv_put_no_enlarge(lhmsmv_t* pmap, char* key, mv_t* pvalue, char free_flags) {
int ideal_index = 0;
int index = lhmsmv_find_index_for_key(pmap, key, &ideal_index);
lhmsmve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
if (pe->free_flags & FREE_ENTRY_VALUE)
mv_free(&pe->value);
pe->value = *pvalue;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
else
pe->free_flags &= ~FREE_ENTRY_VALUE;
// The caller asked us to free the key when we were done but another copy of the
// key is already present. So free now what they passed in.
if (free_flags & FREE_ENTRY_KEY)
free(key);
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->value = *pvalue;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmsmv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
mv_t* lhmsmv_get(lhmsmv_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsmv_find_index_for_key(pmap, key, &ideal_index);
lhmsmve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
return &pe->value;
} else if (pmap->states[index] == EMPTY) {
return NULL;
} else {
fprintf(stderr, "%s: lhmsmv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsmv_has_key(lhmsmv_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsmv_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmsmv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
static void lhmsmv_enlarge(lhmsmv_t* pmap) {
lhmsmve_t* old_entries = pmap->entries;
lhmsmve_state_t* old_states = pmap->states;
lhmsmve_t* old_head = pmap->phead;
lhmsmv_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmsmve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmsmv_put_no_enlarge(pmap, pe->key, &pe->value, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
void lhmsmv_dump(lhmsmv_t* pmap) {
for (lhmsmve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
char* value_string = mv_alloc_format_val(&pe->value);
printf("| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %12s |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, value_string);
}
}
// ----------------------------------------------------------------
int lhmsmv_check_counts(lhmsmv_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}

57
c/containers/lhmsmv.h Normal file
View file

@ -0,0 +1,57 @@
// ================================================================
// Array-only (open addressing) string-to-mlrval linked hash map with linear
// probing for collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSMV_H
#define LHMSMV_H
#include "containers/sllv.h"
#include "lib/mlrval.h"
// ----------------------------------------------------------------
typedef struct _lhmsmve_t {
int ideal_index;
char free_flags;
char* key;
mv_t value;
struct _lhmsmve_t *pprev;
struct _lhmsmve_t *pnext;
} lhmsmve_t;
typedef unsigned char lhmsmve_state_t;
typedef struct _lhmsmv_t {
int num_occupied;
int num_freed;
int array_length;
lhmsmve_t* entries;
lhmsmve_state_t* states;
lhmsmve_t* phead;
lhmsmve_t* ptail;
} lhmsmv_t;
// ----------------------------------------------------------------
lhmsmv_t* lhmsmv_alloc();
lhmsmv_t* lhmsmv_copy(lhmsmv_t* pmap);
void lhmsmv_clear(lhmsmv_t* pmap);
void lhmsmv_free(lhmsmv_t* pmap);
void lhmsmv_put(lhmsmv_t* pmap, char* key, mv_t* pvalue, char free_flags);
mv_t* lhmsmv_get(lhmsmv_t* pmap, char* key);
int lhmsmv_has_key(lhmsmv_t* pmap, char* key);
void lhmsmv_dump(lhmsmv_t* pmap);
int lhmsmv_check_counts(lhmsmv_t* pmap);
#endif // LHMSMV_H

306
c/containers/lhmss.c Normal file
View file

@ -0,0 +1,306 @@
// ================================================================
// Array-only (open addressing) string-to-string linked hash map with linear
// probing for collisions.
//
// Keys and values are not strduped.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmss.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void lhmss_put_no_enlarge(lhmss_t* pmap, char* key, char* value, char free_flags);
static void lhmss_enlarge(lhmss_t* pmap);
static void lhmss_init(lhmss_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmsse_t*)mlr_malloc_or_die(sizeof(lhmsse_t) * length);
// Don't do lhmsse_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmsse_state_t*)mlr_malloc_or_die(sizeof(lhmsse_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmss_t* lhmss_alloc() {
lhmss_t* pmap = mlr_malloc_or_die(sizeof(lhmss_t));
lhmss_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
lhmss_t* lhmss_copy(lhmss_t* pmap) {
lhmss_t* pnew = lhmss_alloc();
for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
lhmss_put(pnew, mlr_strdup_or_die(pe->key), mlr_strdup_or_die(pe->value), FREE_ENTRY_KEY|FREE_ENTRY_VALUE);
return pnew;
}
void lhmss_free(lhmss_t* pmap) {
if (pmap == NULL)
return;
for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
if (pe->free_flags & FREE_ENTRY_VALUE)
free(pe->value);
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmss_find_index_for_key(lhmss_t* pmap, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmsse_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been freed, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void lhmss_put(lhmss_t* pmap, char* key, char* value, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmss_enlarge(pmap);
lhmss_put_no_enlarge(pmap, key, value, free_flags);
}
static void lhmss_put_no_enlarge(lhmss_t* pmap, char* key, char* value, char free_flags) {
int ideal_index = 0;
int index = lhmss_find_index_for_key(pmap, key, &ideal_index);
lhmsse_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
if (pe->free_flags & FREE_ENTRY_KEY)
free(key);
if (pe->free_flags & FREE_ENTRY_VALUE)
free(pe->value);
pe->value = value;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
else
pe->free_flags &= ~FREE_ENTRY_VALUE;
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmss_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
char* lhmss_get(lhmss_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmss_find_index_for_key(pmap, key, &ideal_index);
lhmsse_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
return pe->value;
} else if (pmap->states[index] == EMPTY) {
return NULL;
} else {
fprintf(stderr, "%s: lhmss_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmss_has_key(lhmss_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmss_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmss_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
void lhmss_rename(lhmss_t* pmap, char* old_key, char* new_key) {
fprintf(stderr, "rename is not supported in the hashed-record impl.\n");
exit(1);
}
// ----------------------------------------------------------------
static void lhmss_enlarge(lhmss_t* pmap) {
lhmsse_t* old_entries = pmap->entries;
lhmsse_state_t* old_states = pmap->states;
lhmsse_t* old_head = pmap->phead;
lhmss_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmsse_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmss_put_no_enlarge(pmap, pe->key, pe->value, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "freed"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhmss_dump(lhmss_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsse_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
const char* value_string = (pe == NULL) ? "none" :
pe->value == NULL ? "null" :
pe->value;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | value: %12s |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, value_string);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
const char* value_string = (pe == NULL) ? "none" :
pe->value == NULL ? "null" :
pe->value;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %12s |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, value_string);
}
}
// ----------------------------------------------------------------
int lhmss_check_counts(lhmss_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}

57
c/containers/lhmss.h Normal file
View file

@ -0,0 +1,57 @@
// ================================================================
// Array-only (open addressing) string-to-string linked hash map with linear
// probing for collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSS_H
#define LHMSS_H
#include "containers/sllv.h"
// ----------------------------------------------------------------
typedef struct _lhmsse_t {
int ideal_index;
char free_flags;
char* key;
char* value;
struct _lhmsse_t *pprev;
struct _lhmsse_t *pnext;
} lhmsse_t;
typedef unsigned char lhmsse_state_t;
typedef struct _lhmss_t {
int num_occupied;
int num_freed;
int array_length;
lhmsse_t* entries;
lhmsse_state_t* states;
lhmsse_t* phead;
lhmsse_t* ptail;
} lhmss_t;
// ----------------------------------------------------------------
lhmss_t* lhmss_alloc();
lhmss_t* lhmss_copy(lhmss_t* pmap);
void lhmss_free(lhmss_t* pmap);
void lhmss_put(lhmss_t* pmap, char* key, char* value, char free_flags);
char* lhmss_get(lhmss_t* pmap, char* key);
int lhmss_has_key(lhmss_t* pmap, char* key);
void lhmss_rename(lhmss_t* pmap, char* old_key, char* new_key);
void lhmss_dump(lhmss_t* pmap);
// Unit-test hook
int lhmss_check_counts(lhmss_t* pmap);
#endif // LHMSS_H

293
c/containers/lhmsv.c Normal file
View file

@ -0,0 +1,293 @@
// ================================================================
// Array-only (open addressing) string-to-void linked hash map with linear
// probing for collisions.
//
// Keys are not strduped; memory management of the void* values is left to the
// caller.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is not supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "containers/lhmsv.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
// Allow compile-time override, e.g using gcc -D.
#ifndef INITIAL_ARRAY_LENGTH
#define INITIAL_ARRAY_LENGTH 16
#endif
#ifndef LOAD_FACTOR
#define LOAD_FACTOR 0.7
#endif
#ifndef ENLARGEMENT_FACTOR
#define ENLARGEMENT_FACTOR 2
#endif
// ----------------------------------------------------------------
#define OCCUPIED 0xa4
#define DELETED 0xb8
#define EMPTY 0xce
// ----------------------------------------------------------------
static void lhmsv_put_no_enlarge(lhmsv_t* pmap, char* key, void* pvvalue, char free_flags);
static void lhmsv_enlarge(lhmsv_t* pmap);
static void lhmsv_init(lhmsv_t *pmap, int length) {
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = length;
pmap->entries = (lhmsve_t*)mlr_malloc_or_die(sizeof(lhmsve_t) * length);
// Don't do lhmsve_clear() of all entries at init time, since this has a
// drastic effect on the time needed to construct an empty map (and miller
// constructs an awful lot of those). The attributes there are don't-cares
// if the corresponding entry state is EMPTY. They are set on put, and
// mutated on remove.
pmap->states = (lhmsve_state_t*)mlr_malloc_or_die(sizeof(lhmsve_state_t) * length);
memset(pmap->states, EMPTY, length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
lhmsv_t* lhmsv_alloc() {
lhmsv_t* pmap = mlr_malloc_or_die(sizeof(lhmsv_t));
lhmsv_init(pmap, INITIAL_ARRAY_LENGTH);
return pmap;
}
// ----------------------------------------------------------------
void lhmsv_free(lhmsv_t* pmap) {
if (pmap == NULL)
return;
for (lhmsve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
}
free(pmap->entries);
free(pmap->states);
pmap->entries = NULL;
pmap->num_occupied = 0;
pmap->num_freed = 0;
pmap->array_length = 0;
free(pmap);
}
void lhmsv_clear(lhmsv_t* pmap) {
if (pmap == NULL)
return;
for (lhmsve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
}
pmap->num_occupied = 0;
pmap->num_freed = 0;
memset(pmap->states, EMPTY, pmap->array_length);
pmap->phead = NULL;
pmap->ptail = NULL;
}
// ----------------------------------------------------------------
// Used by get() and remove().
// Returns >=0 for where the key is *or* should go (end of chain).
static int lhmsv_find_index_for_key(lhmsv_t* pmap, char* key, int* pideal_index) {
int hash = mlr_string_hash_func(key);
int index = mlr_canonical_mod(hash, pmap->array_length);
*pideal_index = index;
int num_tries = 0;
while (TRUE) {
lhmsve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
char* ekey = pe->key;
// Existing key found in chain.
if (streq(key, ekey))
return index;
}
else if (pmap->states[index] == EMPTY) {
return index;
}
// If the current entry has been deleted, i.e. previously occupied,
// the sought index may be further down the chain. So we must
// continue looking.
if (++num_tries >= pmap->array_length) {
fprintf(stderr,
"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
exit(1);
}
// Linear probing.
if (++index >= pmap->array_length)
index = 0;
}
MLR_INTERNAL_CODING_ERROR();
return -1; // not reached
}
// ----------------------------------------------------------------
void lhmsv_put(lhmsv_t* pmap, char* key, void* pvvalue, char free_flags) {
if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
lhmsv_enlarge(pmap);
lhmsv_put_no_enlarge(pmap, key, pvvalue, free_flags);
}
static void lhmsv_put_no_enlarge(lhmsv_t* pmap, char* key, void* pvvalue, char free_flags) {
int ideal_index = 0;
int index = lhmsv_find_index_for_key(pmap, key, &ideal_index);
lhmsve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED) {
// Existing key found in chain; put value.
pe->pvvalue = pvvalue;
} else if (pmap->states[index] == EMPTY) {
// End of chain.
pe->ideal_index = ideal_index;
pe->key = key;
pe->pvvalue = pvvalue;
pe->free_flags = free_flags;
pmap->states[index] = OCCUPIED;
if (pmap->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
pmap->phead = pe;
pmap->ptail = pe;
} else {
pe->pprev = pmap->ptail;
pe->pnext = NULL;
pmap->ptail->pnext = pe;
pmap->ptail = pe;
}
pmap->num_occupied++;
} else {
fprintf(stderr, "%s: lhmsv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
void* lhmsv_get(lhmsv_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsv_find_index_for_key(pmap, key, &ideal_index);
lhmsve_t* pe = &pmap->entries[index];
if (pmap->states[index] == OCCUPIED)
return pe->pvvalue;
else if (pmap->states[index] == EMPTY)
return NULL;
else {
fprintf(stderr, "%s: lhmsv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
int lhmsv_has_key(lhmsv_t* pmap, char* key) {
int ideal_index = 0;
int index = lhmsv_find_index_for_key(pmap, key, &ideal_index);
if (pmap->states[index] == OCCUPIED)
return TRUE;
else if (pmap->states[index] == EMPTY)
return FALSE;
else {
fprintf(stderr, "%s: lhmsv_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
exit(1);
}
}
// ----------------------------------------------------------------
static void lhmsv_enlarge(lhmsv_t* pmap) {
lhmsve_t* old_entries = pmap->entries;
lhmsve_state_t* old_states = pmap->states;
lhmsve_t* old_head = pmap->phead;
lhmsv_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
for (lhmsve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
lhmsv_put_no_enlarge(pmap, pe->key, pe->pvvalue, pe->free_flags);
}
free(old_entries);
free(old_states);
}
// ----------------------------------------------------------------
int lhmsv_check_counts(lhmsv_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
case OCCUPIED: return "occupied"; break;
case DELETED: return "deleted"; break;
case EMPTY: return "empty"; break;
default: return "?????"; break;
}
}
void lhmsv_print(lhmsv_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsve_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | pvvalue: %p |\n",
get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, pe->pvvalue);
}
printf("+\n");
printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
printf("+\n");
for (lhmsve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
printf(
"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | pvvalue: %p |\n",
pe->pprev, pe, pe->pnext,
pe->ideal_index, key_string, pe->pvvalue);
}
}

56
c/containers/lhmsv.h Normal file
View file

@ -0,0 +1,56 @@
// ================================================================
// Array-only (open addressing) string-to-void linked hash map with linear
// probing for collisions.
//
// John Kerl 2012-08-13
//
// Notes:
// * null key is not supported.
// * null value is supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef LHMSV_H
#define LHMSV_H
#include "containers/sllv.h"
#include "lib/free_flags.h"
// ----------------------------------------------------------------
typedef struct _lhmsve_t {
int ideal_index;
char* key;
void* pvvalue;
char free_flags;
struct _lhmsve_t *pprev;
struct _lhmsve_t *pnext;
} lhmsve_t;
typedef unsigned char lhmsve_state_t;
typedef struct _lhmsv_t {
int num_occupied;
int num_freed;
int array_length;
lhmsve_t* entries;
lhmsve_state_t* states;
lhmsve_t* phead;
lhmsve_t* ptail;
} lhmsv_t;
// ----------------------------------------------------------------
lhmsv_t* lhmsv_alloc();
void lhmsv_free(lhmsv_t* pmap);
void lhmsv_clear(lhmsv_t* pmap);
void lhmsv_put(lhmsv_t* pmap, char* key, void* pvvalue, char free_flags);
void* lhmsv_get(lhmsv_t* pmap, char* key);
int lhmsv_has_key(lhmsv_t* pmap, char* key);
// Unit-test hook
int lhmsv_check_counts(lhmsv_t* pmap);
#endif // LHMSV_H

368
c/containers/local_stack.c Normal file
View file

@ -0,0 +1,368 @@
#include <stdlib.h>
#include "lib/mlrutil.h"
#include "lib/mlr_globals.h"
#include "containers/local_stack.h"
// ================================================================
static local_stack_frame_t* _local_stack_alloc(int size, int ephemeral) {
local_stack_frame_t* pframe = mlr_malloc_or_die(sizeof(local_stack_frame_t));
pframe->in_use = FALSE;
pframe->ephemeral = ephemeral;
pframe->size = size;
pframe->subframe_base = 0;
pframe->pvars = mlr_malloc_or_die(size * sizeof(local_stack_frame_entry_t));
for (int i = 0; i < size; i++) {
local_stack_frame_entry_t* pentry = &pframe->pvars[i];
pentry->xvalue = mlhmmv_xvalue_wrap_terminal(mv_absent());
pentry->name = NULL;
// Any type can be written here, unless otherwise specified by a typed definition
pentry->type_mask = TYPE_MASK_ANY;
}
return pframe;
}
// ----------------------------------------------------------------
local_stack_frame_t* local_stack_frame_alloc(int size) {
return _local_stack_alloc(size, FALSE);
}
// ----------------------------------------------------------------
void local_stack_frame_free(local_stack_frame_t* pframe) {
if (pframe == NULL)
return;
for (int i = 0; i < pframe->size; i++) {
mlhmmv_xvalue_free(&pframe->pvars[i].xvalue);
}
free(pframe->pvars);
free(pframe);
}
// ----------------------------------------------------------------
local_stack_frame_t* local_stack_frame_enter(local_stack_frame_t* pframe) {
if (!pframe->in_use) {
pframe->in_use = TRUE;
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME NON-EPH ENTER %p %d\n", pframe, pframe->size));
return pframe;
} else {
local_stack_frame_t* prv = _local_stack_alloc(pframe->size, TRUE);
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME EPH ENTER %p/%p %d\n", pframe, prv, pframe->size));
prv->in_use = TRUE;
return prv;
}
}
// ----------------------------------------------------------------
void local_stack_frame_exit (local_stack_frame_t* pframe) {
MLR_INTERNAL_CODING_ERROR_UNLESS(mlhmmv_xvalue_is_absent_and_nonterminal(&pframe->pvars[0].xvalue));
if (!pframe->ephemeral) {
pframe->in_use = FALSE;
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME NON-EPH EXIT %p %d\n", pframe, pframe->size));
} else {
local_stack_frame_free(pframe);
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME EPH EXIT %p %d\n", pframe, pframe->size));
}
}
// ================================================================
local_stack_t* local_stack_alloc() {
local_stack_t* pstack = mlr_malloc_or_die(sizeof(local_stack_t));
pstack->pframes = sllv_alloc();
return pstack;
}
// ----------------------------------------------------------------
void local_stack_free(local_stack_t* pstack) {
if (pstack == NULL)
return;
for (sllve_t* pe = pstack->pframes->phead; pe != NULL; pe = pe->pnext) {
local_stack_frame_free(pe->pvvalue);
}
sllv_free(pstack->pframes);
free(pstack);
}
// ----------------------------------------------------------------
void local_stack_push(local_stack_t* pstack, local_stack_frame_t* pframe) {
sllv_push(pstack->pframes, pframe);
}
local_stack_frame_t* local_stack_pop(local_stack_t* pstack) {
return sllv_pop(pstack->pframes);
}
// ----------------------------------------------------------------
mv_t local_stack_frame_ref_terminal_from_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p GET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "GET", FALSE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
mlhmmv_xvalue_t* pbase_xval = &pentry->xvalue;
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_read(pentry);
}
#ifdef LOCAL_STACK_TRACE_ENABLE
if (pbase_xval == NULL) {
printf("VALUE IS NULL\n");
} else if (pbase_xval->is_terminal) {
char* s = mv_alloc_format_val(&pbase_xval->terminal_mlrval);
printf("VALUE IS %s\n", s);
free(s);
} else if (pbase_xval->pnext_level == NULL) {
LOCAL_STACK_TRACE(printf("VALUE IS EMPTY\n"));
} else {
printf("VALUE IS:\n");
printf("PTR IS %p\n", pbase_xval->pnext_level);
mlhmmv_level_print_stacked(pbase_xval->pnext_level, 0, TRUE, TRUE, "", stdout);
}
#endif
// xxx this is a mess; clean it up.
int error = 0;
// Maybe null
mlhmmv_xvalue_t* pxval;
if (pmvkeys == NULL || pmvkeys->length == 0) {
pxval = pbase_xval;
} else {
if (pbase_xval->is_terminal) {
return mv_absent();
} else {
pxval = mlhmmv_level_look_up_and_ref_xvalue(pbase_xval->pnext_level, pmvkeys, &error);
}
}
if (pxval != NULL && pxval->is_terminal) {
return pxval->terminal_mlrval;
} else {
return mv_absent();
}
}
// ----------------------------------------------------------------
mlhmmv_xvalue_t* local_stack_frame_ref_extended_from_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p GET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "GET", FALSE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
mlhmmv_xvalue_t* pmvalue = &pentry->xvalue;
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_read(pentry);
}
#ifdef LOCAL_STACK_TRACE_ENABLE
if (pmvalue == NULL) {
printf("VALUE IS NULL\n");
} else if (pmvalue->is_terminal) {
char* s = mv_alloc_format_val(&pmvalue->terminal_mlrval);
printf("VALUE IS %s\n", s);
free(s);
} else if (pmvalue->pnext_level == NULL) {
LOCAL_STACK_TRACE(printf("VALUE IS EMPTY\n"));
} else {
printf("VALUE IS:\n");
printf("PTR IS %p\n", pmvalue->pnext_level);
mlhmmv_level_print_stacked(pmvalue->pnext_level, 0, TRUE, TRUE, "", stdout);
}
#endif
if (pmvkeys == NULL) { // base-level access
return pmvalue;
} else {
int error = 0;
// Maybe null
return mlhmmv_level_look_up_and_ref_xvalue(pmvalue->pnext_level, pmvkeys, &error);
}
}
// ----------------------------------------------------------------
void local_stack_frame_define_terminal(local_stack_frame_t* pframe, char* variable_name,
int vardef_frame_relative_index, int type_mask, mv_t val)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "DEFINE", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
pentry->name = variable_name; // no strdup, for performance -- caller must ensure extent
pentry->type_mask = type_mask;
if (!(type_mask_from_mv(&val) & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &val);
}
mlhmmv_xvalue_free(&pentry->xvalue);
if (mv_is_absent(&val)) {
mv_free(&val); // xxx doc ownership semantics at header file
} else {
pentry->xvalue = mlhmmv_xvalue_wrap_terminal(val); // xxx deep-copy?
}
}
// ----------------------------------------------------------------
void local_stack_frame_define_extended(local_stack_frame_t* pframe, char* variable_name,
int vardef_frame_relative_index, int type_mask, mlhmmv_xvalue_t xval)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "ASSIGN", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
pentry->name = variable_name; // no strdup, for performance -- caller must ensure extent
pentry->type_mask = type_mask;
if (xval.is_terminal) {
if (!(type_mask_from_mv(&xval.terminal_mlrval) & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &xval.terminal_mlrval);
}
} else {
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &xval.terminal_mlrval);
}
}
if (!mlhmmv_xvalue_is_absent_and_nonterminal(&xval)) {
mlhmmv_xvalue_free(&pentry->xvalue);
pentry->xvalue = xval;
}
}
// ----------------------------------------------------------------
void local_stack_frame_assign_terminal_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys,
mv_t terminal_value)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "ASSIGN", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &terminal_value);
}
mlhmmv_xvalue_t* pmvalue = &pentry->xvalue;
if (pmvalue->is_terminal) {
mv_free(&pmvalue->terminal_mlrval);
*pmvalue = mlhmmv_xvalue_alloc_empty_map();
}
mlhmmv_level_put_terminal(pmvalue->pnext_level, pmvkeys->phead, &terminal_value);
LOCAL_STACK_TRACE(printf("VALUE IS:\n"));
LOCAL_STACK_TRACE(mlhmmv_level_print_stacked(pmvalue->pnext_level, 0, TRUE, TRUE, "", stdout));
}
// ----------------------------------------------------------------
void local_stack_frame_assign_extended_nonindexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, mlhmmv_xvalue_t xval)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "ASSIGN", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
if (xval.is_terminal) {
if (!(type_mask_from_mv(&xval.terminal_mlrval) & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &xval.terminal_mlrval);
}
} else {
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &xval.terminal_mlrval);
}
}
mlhmmv_xvalue_free(&pentry->xvalue);
pentry->xvalue = xval;
}
// ----------------------------------------------------------------
void local_stack_frame_assign_extended_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys,
mlhmmv_xvalue_t new_value)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "ASSIGN", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
if (!(TYPE_MASK_MAP & pentry->type_mask)) {
local_stack_frame_throw_type_xmismatch_for_write(pentry, &new_value);
}
mlhmmv_xvalue_t* pmvalue = &pentry->xvalue;
if (pmvalue->is_terminal) {
mv_free(&pmvalue->terminal_mlrval);
*pmvalue = mlhmmv_xvalue_alloc_empty_map();
}
mlhmmv_level_put_xvalue(pmvalue->pnext_level, pmvkeys->phead, &new_value);
LOCAL_STACK_TRACE(printf("VALUE IS:\n"));
LOCAL_STACK_TRACE(mlhmmv_level_print_stacked(pmvalue->pnext_level, 0, TRUE, TRUE, "", stdout));
}
// ----------------------------------------------------------------
static int local_stack_bounds_check_announce_first_call = TRUE;
void local_stack_bounds_check(local_stack_frame_t* pframe, char* op, int set, int vardef_frame_relative_index) {
if (local_stack_bounds_check_announce_first_call) {
fprintf(stderr, "%s: local-stack bounds-checking is enabled\n", MLR_GLOBALS.bargv0);
local_stack_bounds_check_announce_first_call = FALSE;
}
if (vardef_frame_relative_index < 0) {
fprintf(stderr, "OP=%s FRAME=%p IDX=%d/%d STACK UNDERFLOW\n",
op, pframe, vardef_frame_relative_index, pframe->size);
exit(1);
}
if (set && vardef_frame_relative_index == 0) {
fprintf(stderr, "OP=%s FRAME=%p IDX=%d/%d ABSENT WRITE\n",
op, pframe, vardef_frame_relative_index, pframe->size);
exit(1);
}
if (vardef_frame_relative_index >= pframe->size) {
fprintf(stderr, "OP=%s FRAME=%p IDX=%d/%d STACK OVERFLOW\n",
op, pframe, vardef_frame_relative_index, pframe->size);
exit(1);
}
}
// ----------------------------------------------------------------
void local_stack_frame_throw_type_mismatch_for_write(local_stack_frame_entry_t* pentry, mv_t* pval) {
MLR_INTERNAL_CODING_ERROR_IF(pentry->name == NULL);
char* sval = mv_alloc_format_val_quoting_strings(pval);
fprintf(stderr, "%s: %s type assertion for variable %s unmet by value %s with type %s.\n",
MLR_GLOBALS.bargv0, type_mask_to_desc(pentry->type_mask), pentry->name,
sval, mt_describe_type_simple(pval->type));
free(sval);
exit(1);
}
void local_stack_frame_throw_type_xmismatch_for_write(local_stack_frame_entry_t* pentry, mlhmmv_xvalue_t* pxval) {
MLR_INTERNAL_CODING_ERROR_IF(pentry->name == NULL);
char* sval = mv_alloc_format_val_quoting_strings(&pxval->terminal_mlrval); // xxx temp -- maybe not terminal
fprintf(stderr, "%s: %s type assertion for variable %s unmet by value %s with type %s.\n",
MLR_GLOBALS.bargv0, type_mask_to_desc(pentry->type_mask), pentry->name,
sval, mlhmmv_xvalue_describe_type_simple(pxval));
free(sval);
exit(1);
}
// ----------------------------------------------------------------
void local_stack_frame_throw_type_mismatch_for_read(local_stack_frame_entry_t* pentry) {
MLR_INTERNAL_CODING_ERROR_IF(pentry->name == NULL);
fprintf(stderr, "%s: %s type assertion for variable %s unmet on read.\n",
MLR_GLOBALS.bargv0, type_mask_to_desc(pentry->type_mask), pentry->name);
exit(1);
}
void local_stack_frame_throw_type_xmismatch_for_read(local_stack_frame_entry_t* pentry) {
MLR_INTERNAL_CODING_ERROR_IF(pentry->name == NULL);
fprintf(stderr, "%s: %s type assertion for variable %s unmet on read.\n",
MLR_GLOBALS.bargv0, type_mask_to_desc(pentry->type_mask), pentry->name);
exit(1);
}

215
c/containers/local_stack.h Normal file
View file

@ -0,0 +1,215 @@
#ifndef LOCAL_STACK_H
#define LOCAL_STACK_H
#include "lib/mlrval.h"
#include "containers/type_decl.h"
#include "containers/sllv.h"
#include "containers/mlhmmv.h"
// ================================================================
// Bound & scoped variables for use in for-loops, function bodies, and
// subroutine bodies. Indices of local variables, and max-depth for top-level
// statement blocks, are compted by the stack-allocator which marks up the AST
// before the CST is built from it.
//
// A convention shared between the stack-allocator and this data structure is
// that slot 0 is an absent-null which is used for reads of undefined (or
// as-yet-undefined) local variables.
//
// Values assigned to a local-stack variable are owned by this container.
// They will be freed:
// * On overwrite, e.g. on 'x = oldval' then 'x = newval' the oldval
// will be freed on the newval assignment, and
// * At stack-frame exit.
// For this reason values assigned to locals may be passed in by reference
// if they are ephemeral, i.e. if it is desired for this container to free
// them. Otherwise, values should be copied before being passed in.
// ================================================================
// ================================================================
typedef struct _local_stack_frame_entry_t {
char* name; // For type-check error messages. Not strduped; the caller must ensure extent.
mlhmmv_xvalue_t xvalue;
int type_mask;
} local_stack_frame_entry_t;
typedef struct _local_stack_frame_t {
int in_use;
int ephemeral;
int size;
int subframe_base;
local_stack_frame_entry_t* pvars;
} local_stack_frame_t;
// ----------------------------------------------------------------
// A stack is allocated for a top-level statement block: begin, end, or main, or
// user-defined function/subroutine. (The latter two may be called recursively
// in which case the in_use flag notes the need to allocate a new stack.)
local_stack_frame_t* local_stack_frame_alloc(int size);
void local_stack_frame_free(local_stack_frame_t* pframe);
// ================================================================
//#define LOCAL_STACK_TRACE_ENABLE
//#define LOCAL_STACK_BOUNDS_CHECK_ENABLE
void local_stack_bounds_check(local_stack_frame_t* pframe, char* op, int set, int vardef_frame_relative_index);
#ifdef LOCAL_STACK_BOUNDS_CHECK_ENABLE
#define LOCAL_STACK_BOUNDS_CHECK(pframe, op, set, vardef_frame_relative_index) \
local_stack_bounds_check((pframe), (op), (set), (vardef_frame_relative_index))
#else
#define LOCAL_STACK_BOUNDS_CHECK(pframe, op, set, vardef_frame_relative_index)
#endif
#ifdef LOCAL_STACK_TRACE_ENABLE
#define LOCAL_STACK_TRACE(p) p
#else
#define LOCAL_STACK_TRACE(p)
#endif
// These are unconditional. With the single added character 'X' they can be
// used to focus verbosity at specific callsites for dev/debug.
#define LOCAL_STACK_BOUNDS_CHECKX(pframe, op, set, vardef_frame_relative_index) \
local_stack_bounds_check((pframe), (op), (set), (vardef_frame_relative_index))
#define LOCAL_STACK_TRACEX(p) p
// ----------------------------------------------------------------
// Sets/clears the in-use flag for top-level statement blocks, and verifies the
// contract for absent-null at slot 0.
// For non-recursive functions/subroutines the enter method sets the in-use flag
// and returns its argument; the exit method clears that flag. For recursively
// invoked functions/subroutines the enter method returns another stack of the
// same size, and the exit method frees that.
//
// The reason we don't simply always allocate is that begin/main/end statements
// are never recursive, and most functions and subroutines are not recursive, so
// most of the time there will be a single frame for each. We allocate that once
// at startup, reuse it on every record, and free it at exit -- rather than
// allocating and freeing frames on every record.
local_stack_frame_t* local_stack_frame_enter(local_stack_frame_t* pframe);
void local_stack_frame_exit(local_stack_frame_t* pframe);
void local_stack_frame_throw_type_mismatch_for_write(local_stack_frame_entry_t* pentry, mv_t* pval);
void local_stack_frame_throw_type_xmismatch_for_write(local_stack_frame_entry_t* pentry, mlhmmv_xvalue_t* pxval);
void local_stack_frame_throw_type_mismatch_for_read(local_stack_frame_entry_t* pentry);
void local_stack_frame_throw_type_xmismatch_for_read(local_stack_frame_entry_t* pentry);
// ----------------------------------------------------------------
static inline mv_t local_stack_frame_get_terminal_from_nonindexed(local_stack_frame_t* pframe, // move to reference semantics
int vardef_frame_relative_index)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p GET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "GET", FALSE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
mlhmmv_xvalue_t* pvalue = &pentry->xvalue;
if (pvalue != NULL && pvalue->is_terminal) {
return pvalue->terminal_mlrval;
} else {
return mv_absent();
}
}
// ----------------------------------------------------------------
static inline void local_stack_frame_assign_terminal_nonindexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, mv_t val)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p SET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "ASSIGN", TRUE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
if (!(type_mask_from_mv(&val) & pentry->type_mask)) {
local_stack_frame_throw_type_mismatch_for_write(pentry, &val);
}
mlhmmv_xvalue_free(&pentry->xvalue);
pentry->xvalue = mlhmmv_xvalue_wrap_terminal(val); // xxx deep-copy?
}
// ----------------------------------------------------------------
static inline mlhmmv_xvalue_t* local_stack_frame_ref_extended_from_nonindexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index)
{
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p GET %d\n", pframe, vardef_frame_relative_index));
LOCAL_STACK_BOUNDS_CHECK(pframe, "GET", FALSE, vardef_frame_relative_index);
local_stack_frame_entry_t* pentry = &pframe->pvars[vardef_frame_relative_index];
mlhmmv_xvalue_t* pmvalue = &pentry->xvalue;
return pmvalue;
}
// ----------------------------------------------------------------
mv_t local_stack_frame_ref_terminal_from_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys);
mlhmmv_xvalue_t* local_stack_frame_ref_extended_from_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys);
void local_stack_frame_define_terminal(local_stack_frame_t* pframe, char* variable_name,
int vardef_frame_relative_index, int type_mask, mv_t val);
void local_stack_frame_define_extended(local_stack_frame_t* pframe, char* variable_name,
int vardef_frame_relative_index, int type_mask, mlhmmv_xvalue_t xval);
void local_stack_frame_assign_extended_nonindexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, mlhmmv_xvalue_t xval);
void local_stack_frame_assign_terminal_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys,
mv_t terminal_value);
void local_stack_frame_assign_extended_indexed(local_stack_frame_t* pframe,
int vardef_frame_relative_index, sllmv_t* pmvkeys,
mlhmmv_xvalue_t terminal_value);
// ----------------------------------------------------------------
// Frames are entered/exited for each curly-braced statement block, including
// the top-level block itself as well as ifs/fors/whiles.
static inline void local_stack_subframe_enter(local_stack_frame_t* pframe, int count) {
LOCAL_STACK_TRACE(printf("LOCAL STACK SUBFRAME %p ENTER %d->%d\n",
pframe, pframe->subframe_base, pframe->subframe_base+count));
local_stack_frame_entry_t* psubframe = &pframe->pvars[pframe->subframe_base];
for (int i = 0; i < count; i++) {
LOCAL_STACK_TRACE(printf("LOCAL STACK FRAME %p CLEAR %d\n", pframe, pframe->subframe_base+i));
LOCAL_STACK_BOUNDS_CHECK(pframe, "CLEAR", FALSE, pframe->subframe_base+i);
local_stack_frame_entry_t* pentry = &psubframe[i];
mlhmmv_xvalue_reset(&pentry->xvalue);
pentry->type_mask = TYPE_MASK_ANY;
}
pframe->subframe_base += count;
}
// ----------------------------------------------------------------
static inline void local_stack_subframe_exit(local_stack_frame_t* pframe, int count) {
LOCAL_STACK_TRACE(printf("LOCAL STACK SUBFRAME %p EXIT %d->%d\n",
pframe, pframe->subframe_base, pframe->subframe_base-count));
pframe->subframe_base -= count;
local_stack_frame_entry_t* psubframe = &pframe->pvars[pframe->subframe_base];
for (int i = 0; i < count; i++) {
local_stack_frame_entry_t* pentry = &psubframe[i];
mlhmmv_xvalue_free(&pentry->xvalue);
}
}
// ================================================================
typedef struct _local_stack_t {
sllv_t* pframes;
} local_stack_t;
local_stack_t* local_stack_alloc();
void local_stack_free(local_stack_t* pstack);
void local_stack_push(local_stack_t* pstack, local_stack_frame_t* pframe);
local_stack_frame_t* local_stack_pop(local_stack_t* pstack);
static inline local_stack_frame_t* local_stack_get_top_frame(local_stack_t* pstack) {
return pstack->pframes->phead->pvvalue;
}
#endif // LOCAL_STACK_H

77
c/containers/loop_stack.c Normal file
View file

@ -0,0 +1,77 @@
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "loop_stack.h"
#define INITIAL_SIZE 32
// Example states:
// num_allocated = 4 num_allocated = 4 num_allocated = 4 num_allocated = 4
// num_used = 1 num_used = 2 num_used = 3 num_used = 4
// num_used_minus_one = 0 num_used_minus_one = 1 num_used_minus_one = 2 num_used_minus_one = 3
//
// +---+ +---+ +---+ +---+
// | 2 | 0 <--- top | 2 | 0 | 2 | 0 | 2 | 0
// +---+ +---+ +---+ +---+
// |///| 1 | 0 | 1 <--- top | 0 | 1 | 0 |
// +---+ +---+ +---+ +---+
// |///| 2 |///| 2 | 4 | 2 <--- top | 4 |
// +---+ +---+ +---+ +---+
// |///| 3 |///| 3 |///| 3 | 6 | 3 <--- top
// +---+ +---+ +---+ +---+
// ----------------------------------------------------------------
loop_stack_t* loop_stack_alloc() {
loop_stack_t* pstack = mlr_malloc_or_die(sizeof(loop_stack_t));
// Guard zone of one. As noted in the header file, set/get are intentionally not bounds-checked.
// If set is called without push, or after final pop, we can at least not corrupt other code.
pstack->num_used_minus_one = 0;
pstack->num_allocated = INITIAL_SIZE;
pstack->pframes = mlr_malloc_or_die(pstack->num_allocated * sizeof(int));
memset(pstack->pframes, 0, pstack->num_allocated * sizeof(int));
return pstack;
}
// ----------------------------------------------------------------
void loop_stack_free(loop_stack_t* pstack) {
if (pstack == NULL)
return;
free(pstack->pframes);
free(pstack);
}
// ----------------------------------------------------------------
void loop_stack_push(loop_stack_t* pstack) {
if (pstack->num_used_minus_one >= pstack->num_allocated - 1) {
pstack->num_allocated += INITIAL_SIZE;
pstack->pframes = mlr_realloc_or_die(pstack->pframes, pstack->num_allocated * sizeof(int));
}
pstack->num_used_minus_one++;
pstack->pframes[pstack->num_used_minus_one] = 0;
}
// ----------------------------------------------------------------
int loop_stack_pop(loop_stack_t* pstack) {
MLR_INTERNAL_CODING_ERROR_IF(pstack->num_used_minus_one <= 0);
int rv = pstack->pframes[pstack->num_used_minus_one];
pstack->num_used_minus_one--;
return rv;
}
// ----------------------------------------------------------------
// Not bounds-checked, as noted in the header file.
void loop_stack_set(loop_stack_t* pstack, int bits) {
pstack->pframes[pstack->num_used_minus_one] |= bits;
}
void loop_stack_clear(loop_stack_t* pstack, int bits) {
pstack->pframes[pstack->num_used_minus_one] &= ~bits;
}
// ----------------------------------------------------------------
// Not bounds-checked, as noted in the header file.
int loop_stack_get(loop_stack_t* pstack) {
return pstack->pframes[pstack->num_used_minus_one];
}

35
c/containers/loop_stack.h Normal file
View file

@ -0,0 +1,35 @@
// Holds broken/continued flags for loops (for-srec, for-oosvar, while, do-while).
#ifndef LOOP_STACK_H
#define LOOP_STACK_H
#define LOOP_BROKEN 0x8000
#define LOOP_CONTINUED 0x0100
typedef struct _loop_stack_t {
int num_used_minus_one;
int num_allocated;
int* pframes;
} loop_stack_t;
loop_stack_t* loop_stack_alloc();
void loop_stack_free(loop_stack_t* pstack);
// To be used on entry to loop handler.
void loop_stack_push(loop_stack_t* pstack);
// To be used on exit from loop handler.
int loop_stack_pop(loop_stack_t* pstack);
// To be used by break/continue handler.
// NOTE: For efficiency the stack is **NOT** bounds-checked here. E.g. if set is done before a push,
// or after an emptying pop, behavior is unspecified.
void loop_stack_set(loop_stack_t* pstack, int bits);
void loop_stack_clear(loop_stack_t* pstack, int bits);
// To be used by loop handler.
// NOTE: For efficiency the stack is **NOT** bounds-checked here. E.g. if set is done before a push,
// or after an emptying pop, behavior is unspecified.
int loop_stack_get(loop_stack_t* pstack);
#endif // LOOP_STACK_H

747
c/containers/lrec.c Normal file
View file

@ -0,0 +1,747 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "lib/string_builder.h"
#include "containers/hss.h"
#include "containers/slls.h"
#include "containers/lrec.h"
#define SB_ALLOC_LENGTH 256
static lrece_t* lrec_find_entry(lrec_t* prec, char* key);
static void lrec_link_at_head(lrec_t* prec, lrece_t* pe);
static void lrec_link_at_tail(lrec_t* prec, lrece_t* pe);
static void lrec_unbacked_free(lrec_t* prec);
static void lrec_free_single_line_backing(lrec_t* prec);
static void lrec_free_csv_backing(lrec_t* prec);
static void lrec_free_multiline_backing(lrec_t* prec);
// ----------------------------------------------------------------
lrec_t* lrec_unbacked_alloc() {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->pfree_backing_func = lrec_unbacked_free;
return prec;
}
lrec_t* lrec_dkvp_alloc(char* line) {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->psingle_line = line;
prec->pfree_backing_func = lrec_free_single_line_backing;
return prec;
}
lrec_t* lrec_nidx_alloc(char* line) {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->psingle_line = line;
prec->pfree_backing_func = lrec_free_single_line_backing;
return prec;
}
lrec_t* lrec_csvlite_alloc(char* data_line) {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->psingle_line = data_line;
prec->pfree_backing_func = lrec_free_csv_backing;
return prec;
}
lrec_t* lrec_csv_alloc(char* data_line) {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->psingle_line = data_line;
prec->pfree_backing_func = lrec_free_csv_backing;
return prec;
}
lrec_t* lrec_xtab_alloc(slls_t* pxtab_lines) {
lrec_t* prec = mlr_malloc_or_die(sizeof(lrec_t));
memset(prec, 0, sizeof(lrec_t));
prec->pxtab_lines = pxtab_lines;
prec->pfree_backing_func = lrec_free_multiline_backing;
return prec;
}
// ----------------------------------------------------------------
static void lrec_free_contents(lrec_t* prec) {
for (lrece_t* pe = prec->phead; pe != NULL; /*pe = pe->pnext*/) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
if (pe->free_flags & FREE_ENTRY_VALUE)
free(pe->value);
lrece_t* ope = pe;
pe = pe->pnext;
free(ope);
}
prec->pfree_backing_func(prec);
}
// ----------------------------------------------------------------
void lrec_clear(lrec_t* prec) {
if (prec == NULL)
return;
lrec_free_contents(prec);
memset(prec, 0, sizeof(lrec_t));
prec->pfree_backing_func = lrec_unbacked_free;
}
// ----------------------------------------------------------------
void lrec_free(lrec_t* prec) {
if (prec == NULL)
return;
lrec_free_contents(prec);
free(prec);
}
// ----------------------------------------------------------------
lrec_t* lrec_copy(lrec_t* pinrec) {
lrec_t* poutrec = lrec_unbacked_alloc();
for (lrece_t* pe = pinrec->phead; pe != NULL; pe = pe->pnext) {
lrec_put(poutrec, mlr_strdup_or_die(pe->key), mlr_strdup_or_die(pe->value),
FREE_ENTRY_KEY|FREE_ENTRY_VALUE);
}
return poutrec;
}
// ----------------------------------------------------------------
void lrec_put(lrec_t* prec, char* key, char* value, char free_flags) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
if (free_flags & FREE_ENTRY_KEY)
free(key);
pe->value = value;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
else
pe->free_flags &= ~FREE_ENTRY_VALUE;
} else {
pe = mlr_malloc_or_die(sizeof(lrece_t));
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pe->quote_flags = 0;
if (prec->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
prec->phead = pe;
prec->ptail = pe;
} else {
pe->pprev = prec->ptail;
pe->pnext = NULL;
prec->ptail->pnext = pe;
prec->ptail = pe;
}
prec->field_count++;
}
}
void lrec_put_ext(lrec_t* prec, char* key, char* value, char free_flags, char quote_flags) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
if (free_flags & FREE_ENTRY_KEY)
free(key);
pe->value = value;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
else
pe->free_flags &= ~FREE_ENTRY_VALUE;
} else {
pe = mlr_malloc_or_die(sizeof(lrece_t));
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pe->quote_flags = quote_flags;
if (prec->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
prec->phead = pe;
prec->ptail = pe;
} else {
pe->pprev = prec->ptail;
pe->pnext = NULL;
prec->ptail->pnext = pe;
prec->ptail = pe;
}
prec->field_count++;
}
}
void lrec_prepend(lrec_t* prec, char* key, char* value, char free_flags) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
pe->value = value;
pe->free_flags &= ~FREE_ENTRY_VALUE;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
} else {
pe = mlr_malloc_or_die(sizeof(lrece_t));
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pe->quote_flags = 0;
if (prec->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
prec->phead = pe;
prec->ptail = pe;
} else {
pe->pnext = prec->phead;
pe->pprev = NULL;
prec->phead->pprev = pe;
prec->phead = pe;
}
prec->field_count++;
}
}
lrece_t* lrec_put_after(lrec_t* prec, lrece_t* pd, char* key, char* value, char free_flags) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) { // Overwrite
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
pe->value = value;
pe->free_flags &= ~FREE_ENTRY_VALUE;
if (free_flags & FREE_ENTRY_VALUE)
pe->free_flags |= FREE_ENTRY_VALUE;
} else { // Insert after specified entry
pe = mlr_malloc_or_die(sizeof(lrece_t));
pe->key = key;
pe->value = value;
pe->free_flags = free_flags;
pe->quote_flags = 0;
if (pd->pnext == NULL) { // Append at end of list
pd->pnext = pe;
pe->pprev = pd;
pe->pnext = NULL;
prec->ptail = pe;
} else {
lrece_t* pf = pd->pnext;
pd->pnext = pe;
pf->pprev = pe;
pe->pprev = pd;
pe->pnext = pf;
}
prec->field_count++;
}
return pe;
}
// ----------------------------------------------------------------
char* lrec_get(lrec_t* prec, char* key) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
return pe->value;
} else {
return NULL;
}
}
char* lrec_get_pff(lrec_t* prec, char* key, char** ppfree_flags) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
*ppfree_flags = &pe->free_flags;
return pe->value;
} else {
*ppfree_flags = NULL;
return NULL;
}
}
char* lrec_get_ext(lrec_t* prec, char* key, lrece_t** ppentry) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe != NULL) {
*ppentry = pe;
return pe->value;
} else {
*ppentry = NULL;;
return NULL;
}
}
// ----------------------------------------------------------------
lrece_t* lrec_get_pair_by_position(lrec_t* prec, int position) { // 1-up not 0-up
if (position <= 0 || position > prec->field_count) {
return NULL;
}
int sought_index = position - 1;
int found_index = 0;
lrece_t* pe = NULL;
for (
found_index = 0, pe = prec->phead;
pe != NULL;
found_index++, pe = pe->pnext
) {
if (found_index == sought_index) {
return pe;
}
}
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n",
MLR_GLOBALS.bargv0, __FILE__, __LINE__);
exit(1);
}
char* lrec_get_key_by_position(lrec_t* prec, int position) { // 1-up not 0-up
lrece_t* pe = lrec_get_pair_by_position(prec, position);
if (pe == NULL) {
return NULL;
} else {
return pe->key;
}
}
char* lrec_get_value_by_position(lrec_t* prec, int position) { // 1-up not 0-up
lrece_t* pe = lrec_get_pair_by_position(prec, position);
if (pe == NULL) {
return NULL;
} else {
return pe->value;
}
}
// ----------------------------------------------------------------
void lrec_remove(lrec_t* prec, char* key) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe == NULL)
return;
lrec_unlink(prec, pe);
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key);
}
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
free(pe);
}
// ----------------------------------------------------------------
void lrec_remove_by_position(lrec_t* prec, int position) { // 1-up not 0-up
lrece_t* pe = lrec_get_pair_by_position(prec, position);
if (pe == NULL)
return;
lrec_unlink(prec, pe);
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key);
}
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
free(pe);
}
// Before:
// "x" => "3"
// "y" => "4" <-- pold
// "z" => "5" <-- pnew
//
// Rename y to z
//
// After:
// "x" => "3"
// "z" => "4"
//
void lrec_rename(lrec_t* prec, char* old_key, char* new_key, int new_needs_freeing) {
lrece_t* pold = lrec_find_entry(prec, old_key);
if (pold != NULL) {
lrece_t* pnew = lrec_find_entry(prec, new_key);
if (pnew == NULL) { // E.g. rename "x" to "y" when "y" is not present
if (pold->free_flags & FREE_ENTRY_KEY) {
free(pold->key);
pold->key = new_key;
if (!new_needs_freeing)
pold->free_flags &= ~FREE_ENTRY_KEY;
} else {
pold->key = new_key;
if (new_needs_freeing)
pold->free_flags |= FREE_ENTRY_KEY;
}
} else { // E.g. rename "x" to "y" when "y" is already present
if (pnew->free_flags & FREE_ENTRY_VALUE) {
free(pnew->value);
}
if (pold->free_flags & FREE_ENTRY_KEY) {
free(pold->key);
pold->free_flags &= ~FREE_ENTRY_KEY;
}
pold->key = new_key;
if (new_needs_freeing)
pold->free_flags |= FREE_ENTRY_KEY;
else
pold->free_flags &= ~FREE_ENTRY_KEY;
lrec_unlink(prec, pnew);
free(pnew);
}
}
}
// Cases:
// 1. Rename field at position 3 from "x" to "y when "y" does not exist elsewhere in the srec
// 2. Rename field at position 3 from "x" to "y when "y" does exist elsewhere in the srec
// Note: position is 1-up not 0-up
void lrec_rename_at_position(lrec_t* prec, int position, char* new_key, int new_needs_freeing){
lrece_t* pe = lrec_get_pair_by_position(prec, position);
if (pe == NULL) {
if (new_needs_freeing) {
free(new_key);
}
return;
}
lrece_t* pother = lrec_find_entry(prec, new_key);
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key);
}
pe->key = new_key;
if (new_needs_freeing) {
pe->free_flags |= FREE_ENTRY_KEY;
} else {
pe->free_flags &= ~FREE_ENTRY_KEY;
}
if (pother != NULL) {
lrec_unlink(prec, pother);
free(pother);
}
}
// ----------------------------------------------------------------
void lrec_move_to_head(lrec_t* prec, char* key) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe == NULL)
return;
lrec_unlink(prec, pe);
lrec_link_at_head(prec, pe);
}
void lrec_move_to_tail(lrec_t* prec, char* key) {
lrece_t* pe = lrec_find_entry(prec, key);
if (pe == NULL)
return;
lrec_unlink(prec, pe);
lrec_link_at_tail(prec, pe);
}
// ----------------------------------------------------------------
// Simply rename the first (at most) n positions where n is the length of pnames.
//
// Possible complications:
//
// * pnames itself contains duplicates -- we require this as invariant-check from the caller since (for performance)
// we don't want to check this on every record processed.
//
// * pnames has length less than the current record and one of the new names becomes a clash with an existing name.
// Example:
// - Input record has names "a,b,c,d,e".
// - pnames is "d,x,f"
// - We then construct the invalid "d,x,f,d,e" -- we need to detect and unset the second 'd' field.
void lrec_label(lrec_t* prec, slls_t* pnames_as_list, hss_t* pnames_as_set) {
lrece_t* pe = prec->phead;
sllse_t* pn = pnames_as_list->phead;
// Process the labels list
for ( ; pe != NULL && pn != NULL; pe = pe->pnext, pn = pn->pnext) {
char* new_name = pn->value;
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key);
}
pe->key = mlr_strdup_or_die(new_name);;
pe->free_flags |= FREE_ENTRY_KEY;
}
// Process the remaining fields in the record beyond those affected by the new-labels list
for ( ; pe != NULL; ) {
char* name = pe->key;
if (hss_has(pnames_as_set, name)) {
lrece_t* pnext = pe->pnext;
if (pe->free_flags & FREE_ENTRY_KEY) {
free(pe->key);
}
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
lrec_unlink(prec, pe);
free(pe);
pe = pnext;
} else {
pe = pe->pnext;
}
}
}
// ----------------------------------------------------------------
void lrece_update_value(lrece_t* pe, char* new_value, int new_needs_freeing) {
if (pe == NULL) {
return;
}
if (pe->free_flags & FREE_ENTRY_VALUE) {
free(pe->value);
}
pe->value = new_value;
if (new_needs_freeing)
pe->free_flags |= FREE_ENTRY_VALUE;
else
pe->free_flags &= ~FREE_ENTRY_VALUE;
}
// ----------------------------------------------------------------
void lrec_unlink(lrec_t* prec, lrece_t* pe) {
if (pe == prec->phead) {
if (pe == prec->ptail) {
prec->phead = NULL;
prec->ptail = NULL;
} else {
prec->phead = pe->pnext;
pe->pnext->pprev = NULL;
}
} else {
pe->pprev->pnext = pe->pnext;
if (pe == prec->ptail) {
prec->ptail = pe->pprev;
} else {
pe->pnext->pprev = pe->pprev;
}
}
prec->field_count--;
}
void lrec_unlink_and_free(lrec_t* prec, lrece_t* pe) {
if (pe->free_flags & FREE_ENTRY_KEY)
free(pe->key);
if (pe->free_flags & FREE_ENTRY_VALUE)
free(pe->value);
lrec_unlink(prec, pe);
free(pe);
}
// ----------------------------------------------------------------
static void lrec_link_at_head(lrec_t* prec, lrece_t* pe) {
if (prec->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
prec->phead = pe;
prec->ptail = pe;
} else {
// [b,c,d] + a
pe->pprev = NULL;
pe->pnext = prec->phead;
prec->phead->pprev = pe;
prec->phead = pe;
}
prec->field_count++;
}
static void lrec_link_at_tail(lrec_t* prec, lrece_t* pe) {
if (prec->phead == NULL) {
pe->pprev = NULL;
pe->pnext = NULL;
prec->phead = pe;
prec->ptail = pe;
} else {
pe->pprev = prec->ptail;
pe->pnext = NULL;
prec->ptail->pnext = pe;
prec->ptail = pe;
}
prec->field_count++;
}
// ----------------------------------------------------------------
void lrec_dump(lrec_t* prec) {
lrec_dump_fp(prec, stdout);
}
void lrec_dump_fp(lrec_t* prec, FILE* fp) {
if (prec == NULL) {
fprintf(fp, "NULL\n");
return;
}
fprintf(fp, "field_count = %d\n", prec->field_count);
fprintf(fp, "| phead: %16p | ptail %16p\n", prec->phead, prec->ptail);
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
pe->key;
const char* value_string = (pe == NULL) ? "none" :
pe->value == NULL ? "null" :
pe->value;
fprintf(fp,
"| prev: %16p curr: %16p next: %16p | key: %12s | value: %12s |\n",
pe->pprev, pe, pe->pnext,
key_string, value_string);
}
}
void lrec_dump_titled(char* msg, lrec_t* prec) {
printf("%s:\n", msg);
lrec_dump(prec);
printf("\n");
}
void lrec_pointer_dump(lrec_t* prec) {
printf("prec %p\n", prec);
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
printf(" pe %p k %p v %p\n", pe, pe->key, pe->value);
}
}
// ----------------------------------------------------------------
static void lrec_unbacked_free(lrec_t* prec) {
}
static void lrec_free_single_line_backing(lrec_t* prec) {
free(prec->psingle_line);
}
static void lrec_free_csv_backing(lrec_t* prec) {
free(prec->psingle_line);
}
static void lrec_free_multiline_backing(lrec_t* prec) {
slls_free(prec->pxtab_lines);
}
// ================================================================
// ----------------------------------------------------------------
// Note on efficiency:
//
// I was imagining/hoping that strcmp has additional optimizations (e.g.
// hand-coded in assembly), so I don't *want* to re-implement it (i.e. I
// probably can't outperform it).
//
// But actual experiments show I get about a 1-2% performance gain doing it
// myself (on my particular system).
static lrece_t* lrec_find_entry(lrec_t* prec, char* key) {
#if 1
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
char* pa = pe->key;
char* pb = key;
while (*pa && *pb && (*pa == *pb)) {
pa++;
pb++;
}
if (*pa == 0 && *pb == 0)
return pe;
}
return NULL;
#else
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext)
if (streq(pe->key, key))
return pe;
return NULL;
#endif
}
// ----------------------------------------------------------------
lrec_t* lrec_literal_1(char* k1, char* v1) {
lrec_t* prec = lrec_unbacked_alloc();
lrec_put(prec, k1, v1, NO_FREE);
return prec;
}
lrec_t* lrec_literal_2(char* k1, char* v1, char* k2, char* v2) {
lrec_t* prec = lrec_unbacked_alloc();
lrec_put(prec, k1, v1, NO_FREE);
lrec_put(prec, k2, v2, NO_FREE);
return prec;
}
lrec_t* lrec_literal_3(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3) {
lrec_t* prec = lrec_unbacked_alloc();
lrec_put(prec, k1, v1, NO_FREE);
lrec_put(prec, k2, v2, NO_FREE);
lrec_put(prec, k3, v3, NO_FREE);
return prec;
}
lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3, char* k4, char* v4) {
lrec_t* prec = lrec_unbacked_alloc();
lrec_put(prec, k1, v1, NO_FREE);
lrec_put(prec, k2, v2, NO_FREE);
lrec_put(prec, k3, v3, NO_FREE);
lrec_put(prec, k4, v4, NO_FREE);
return prec;
}
void lrec_print(lrec_t* prec) {
FILE* output_stream = stdout;
char ors = '\n';
char ofs = ',';
char ops = '=';
if (prec == NULL) {
fputs("NULL", output_stream);
fputc(ors, output_stream);
return;
}
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(ofs, output_stream);
fputs(pe->key, output_stream);
fputc(ops, output_stream);
fputs(pe->value, output_stream);
nf++;
}
fputc(ors, output_stream);
}
char* lrec_sprint(lrec_t* prec, char* ors, char* ofs, char* ops) {
string_builder_t* psb = sb_alloc(SB_ALLOC_LENGTH);
if (prec == NULL) {
sb_append_string(psb, "NULL");
} else {
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
sb_append_string(psb, ofs);
sb_append_string(psb, pe->key);
sb_append_string(psb, ops);
sb_append_string(psb, pe->value);
nf++;
}
sb_append_string(psb, ors);
}
char* rv = sb_finish(psb);
sb_free(psb);
return rv;
}

190
c/containers/lrec.h Normal file
View file

@ -0,0 +1,190 @@
// ================================================================
// This is a hashless implementation of insertion-ordered key-value pairs for
// Miller's fundamental record data structure. It implements the same
// interface as the hashed version (see lhmss.h).
//
// Design:
//
// * It keeps a doubly-linked list of key-value pairs.
// * No hash functions are computed when the map is written to or read from.
// * Gets are implemented by sequential scan through the list: given a key,
// the key-value pairs are scanned through until a match is (or is not) found.
// * Performance improvement of 10-15% percent over lhmss is found (for test data).
//
// Motivation:
//
// * The use case for records in Miller is that *all* fields are read from
// strings & written to strings (split/join), while only *some* fields are
// operated on.
//
// * Meanwhile there are few repeated accesses to a given record: the
// access-to-construct ratio is quite low for Miller data records. Miller
// instantiates thousands, millions, billions of records (depending on the
// input data) but accesses each record only once per mapping operation.
// (This is in contrast to accumulator hashmaps which are repeatedly accessed
// during a stats run.)
//
// * The hashed impl computes hashsums for *all* fields whether operated on or not,
// for the benefit of the *few* fields looked up during the mapping operation.
//
// * The hashless impl only keeps string pointers. Lookups are done at runtime
// doing prefix search on the key names. Assuming field names are distinct,
// this is just a few char-ptr accesses which (in experiments) turn out to
// offer about a 10-15% performance improvement.
//
// * Added benefit: the field-rename operation (preserving field order) becomes
// trivial.
//
// Notes:
// * null key is not supported.
// * null value is supported.
// ================================================================
#ifndef LREC_H
#define LREC_H
#include "lib/free_flags.h"
#include "containers/sllv.h"
#include "containers/slls.h"
#include "containers/hss.h"
#include "containers/header_keeper.h"
#define FIELD_QUOTED_ON_INPUT 0x02
struct _lrec_t; // forward reference
typedef struct _lrec_t lrec_t;
typedef void lrec_free_func_t(lrec_t* prec);
// ----------------------------------------------------------------
typedef struct _lrece_t {
char* key;
char* value;
// These indicate whether the key/value should be freed on lrec_free().
// Affirmative example: key/value is strdup of something.
// Negative example: key/value are pointers into a line the memory
// management of which is separately managed.
// Another negative example: key/value is a string literal, e.g. "".
char free_flags;
char quote_flags;
struct _lrece_t *pprev;
struct _lrece_t *pnext;
} lrece_t;
struct _lrec_t {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
int field_count;
lrece_t* phead;
lrece_t* ptail;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// See comments above free_flags. Used to track a mallocked pointer to be
// freed at lrec_free().
// E.g. for NIDX, DKVP, and CSV formats (header handled separately in the
// latter case).
char* psingle_line;
// For XTAB format.
slls_t* pxtab_lines;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Format-dependent virtual-function pointer:
lrec_free_func_t* pfree_backing_func;
};
// ----------------------------------------------------------------
lrec_t* lrec_unbacked_alloc();
lrec_t* lrec_dkvp_alloc(char* line);
lrec_t* lrec_nidx_alloc(char* line);
lrec_t* lrec_csvlite_alloc(char* data_line);
lrec_t* lrec_csv_alloc(char* data_line);
lrec_t* lrec_xtab_alloc(slls_t* pxtab_lines);
void lrec_clear(lrec_t* prec);
void lrec_free(lrec_t* prec);
lrec_t* lrec_copy(lrec_t* pinrec);
// The only difference between lrec_put and lrec_prepend is that the latter
// adds to the end of the record, while the former adds to the beginning.
//
// For both, the key/value respectively will be freed by lrec_free if the
// corresponding bits are set in the free_flags.
//
// * If a string literal or other non-allocated pointer (e.g. mmapped memory
// from a file reader) is passed in, the free flag should not be set.
//
// * If dynamically allocated pointers are passed in, then either:
//
// o The respective free_flag(s) should be set and the caller should be sure
// not to also free (else, there will be heap corruption due to
// double-free), or
//
// o The respective free_flag(s) should not be set and the caller should
// free the memory (else, there will be a memory leak).
void lrec_put(lrec_t* prec, char* key, char* value, char free_flags);
void lrec_put_ext(lrec_t* prec, char* key, char* value, char free_flags, char quote_flags);
// Like lrec_put: if key is present, modify value. But if not, add new field at start of record, not at end.
void lrec_prepend(lrec_t* prec, char* key, char* value, char free_flags);
// Like lrec_put: if key is present, modify value. But if not, add new field after specified entry, not at end.
// Returns a pointer to the added/modified node.
lrece_t* lrec_put_after(lrec_t* prec, lrece_t* pd, char* key, char* value, char free_flags);
char* lrec_get(lrec_t* prec, char* key);
lrece_t* lrec_get_pair_by_position(lrec_t* prec, int position); // 1-up not 0-up
char* lrec_get_key_by_position(lrec_t* prec, int position); // 1-up not 0-up
char* lrec_get_value_by_position(lrec_t* prec, int position); // 1-up not 0-up
// This returns a pointer to the lrec's free-flags so that the caller can do ownership-transfer
// of about-to-be-removed key-value pairs.
char* lrec_get_pff(lrec_t* prec, char* key, char** ppfree_flags);
// This returns a pointer to the entry so the caller can update it directly without needing
// to do another field-scan on subsequent lrec_put etc. This is a performance optimization;
// it also allows mlr nest --explode to do explode-in-place rather than explode-at-end.
char* lrec_get_ext(lrec_t* prec, char* key, lrece_t** ppentry);
void lrec_remove(lrec_t* prec, char* key);
void lrec_remove_by_position(lrec_t* prec, int position); // 1-up not 0-up
void lrec_rename(lrec_t* prec, char* old_key, char* new_key, int new_needs_freeing);
void lrec_rename_at_position(lrec_t* prec, int position, char* new_key, int new_needs_freeing); // 1-up not 0-up
void lrec_move_to_head(lrec_t* prec, char* key);
void lrec_move_to_tail(lrec_t* prec, char* key);
// Renames the first n fields where n is the length of pnames.
// The hash-set argument is for efficient dedupe.
// Assumes as a precondition that pnames_as_list has no duplicates.
// If the new labels include any field names existing later on in the record, those are unset.
// For example, input record "a=1,b=2,c=3,d=4,e=5" with labels "d,x,f" results in output record "d=1,x=2,f=3,e=5".
void lrec_label(lrec_t* prec, slls_t* pnames_as_list, hss_t* pnames_as_set);
void lrece_update_value(lrece_t* pe, char* new_value, int new_needs_freeing);
// For lrec-internal use:
void lrec_unlink(lrec_t* prec, lrece_t* pe);
// May be used for removing fields from a record while iterating over it:
void lrec_unlink_and_free(lrec_t* prec, lrece_t* pe);
void lrec_print(lrec_t* prec);
void lrec_dump(lrec_t* prec);
void lrec_dump_fp(lrec_t* prec, FILE* fp);
void lrec_dump_titled(char* msg, lrec_t* prec);
void lrec_pointer_dump(lrec_t* prec);
// The caller should free the return value
char* lrec_sprint(lrec_t* prec, char* ors, char* ofs, char* ops);
// NIDX data are keyed by one-up field index which is not explicitly contained
// in the file, e.g. line "a b c" splits to an lrec with "{"1" => "a", "2" =>
// "b", "3" => "c"}. This function creates the keys, avoiding redundant memory
// allocation for most-used keys such as "1", "2", ... up to 100 or so. In case
// of large idx, free_flags & FREE_ENTRY_KEY will indicate that the key
// was dynamically allocated.
char* low_int_to_string(int idx, char* pfree_flags);
// For unit-test.
lrec_t* lrec_literal_1(char* k1, char* v1);
lrec_t* lrec_literal_2(char* k1, char* v1, char* k2, char* v2);
lrec_t* lrec_literal_3(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3);
lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3, char* k4, char* v4);
#endif // LREC_H

208
c/containers/mixutil.c Normal file
View file

@ -0,0 +1,208 @@
#include "lib/mlr_globals.h"
#include "lib/mlrutil.h"
#include "lib/mlr_globals.h"
#include "containers/mixutil.h"
// ----------------------------------------------------------------
// Makes a list with values pointing to the lrec's keys. slls_free() will
// respect that and not corrupt the lrec. However, the slls values will be
// invalid after the lrec is freed.
slls_t* mlr_reference_keys_from_record(lrec_t* prec) {
slls_t* plist = slls_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
slls_append_no_free(plist, pe->key);
}
return plist;
}
slls_t* mlr_copy_keys_from_record(lrec_t* prec) {
slls_t* plist = slls_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
slls_append_with_free(plist, mlr_strdup_or_die(pe->key));
}
return plist;
}
slls_t* mlr_reference_values_from_record(lrec_t* prec) {
slls_t* plist = slls_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
slls_append_no_free(plist, pe->value);
}
return plist;
}
slls_t* mlr_reference_keys_from_record_except(lrec_t* prec, lrece_t* px) {
slls_t* plist = slls_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (pe != px)
slls_append_no_free(plist, pe->key);
}
return plist;
}
slls_t* mlr_reference_values_from_record_except(lrec_t* prec, lrece_t* px) {
slls_t* plist = slls_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (pe != px)
slls_append_no_free(plist, pe->value);
}
return plist;
}
// ----------------------------------------------------------------
// Makes a list with values pointing into the lrec's values. slls_free() will
// respect that and not corrupt the lrec. However, the slls values will be
// invalid after the lrec is freed.
slls_t* mlr_reference_selected_values_from_record(lrec_t* prec, slls_t* pselected_field_names) {
slls_t* pvalue_list = slls_alloc();
for (sllse_t* pe = pselected_field_names->phead; pe != NULL; pe = pe->pnext) {
char* selected_field_name = pe->value;
char* value = lrec_get(prec, selected_field_name);
if (value == NULL) {
slls_free(pvalue_list);
return NULL;
} else {
slls_append_no_free(pvalue_list, value);
}
}
return pvalue_list;
}
// Makes an array with values pointing into the lrec's values.
// string_array_free() will respect that and not corrupt the lrec. However,
// the array's values will be invalid after the lrec is freed.
void mlr_reference_values_from_record_into_string_array(lrec_t* prec, string_array_t* pselected_field_names,
string_array_t* pvalues)
{
MLR_INTERNAL_CODING_ERROR_IF(pselected_field_names->length != pvalues->length);
pvalues->strings_need_freeing = FALSE;
for (int i = 0; i < pselected_field_names->length; i++) {
char* selected_field_name = pselected_field_names->strings[i];
if (selected_field_name == NULL) {
pvalues->strings[i] = NULL;
} else {
pvalues->strings[i] = lrec_get(prec, selected_field_name);
}
}
}
int record_has_all_keys(lrec_t* prec, slls_t* pselected_field_names) {
for (sllse_t* pe = pselected_field_names->phead; pe != NULL; pe = pe->pnext) {
char* selected_field_name = pe->value;
char* value = lrec_get(prec, selected_field_name);
if (value == NULL)
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
lhmss_t* mlr_reference_key_value_pairs_from_regex_names(lrec_t* prec, regex_t* pregexes, int num_regexes,
int invert_matches)
{
lhmss_t* pmap = lhmss_alloc();
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
int matches_any = FALSE;
for (int i = 0; i < num_regexes; i++) {
regex_t* pregex = &pregexes[i];
if (regmatch_or_die(pregex, pe->key, 0, NULL)) {
matches_any = TRUE;
break;
}
}
if (matches_any ^ invert_matches) {
lhmss_put(pmap, pe->key, pe->value, NO_FREE);
}
}
return pmap;
}
// ----------------------------------------------------------------
hss_t* hss_from_slls(slls_t* plist) {
hss_t* pset = hss_alloc();
for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext)
hss_add(pset, pe->value);
return pset;
}
// ----------------------------------------------------------------
void lrec_print_list(sllv_t* plist) {
for (sllve_t* pe = plist->phead; pe != NULL; pe = pe->pnext) {
lrec_print(pe->pvvalue);
}
}
void lrec_print_list_with_prefix(sllv_t* plist, char* prefix) {
if (plist == NULL) {
printf("%s NULL", prefix);
} else {
for (sllve_t* pe = plist->phead; pe != NULL; pe = pe->pnext) {
printf("%s", prefix);
lrec_print(pe->pvvalue);
}
}
}
// ----------------------------------------------------------------
int slls_lrec_compare_lexically(
slls_t* plist,
lrec_t* prec,
slls_t* pkeys)
{
sllse_t* pe = plist->phead;
sllse_t* pf = pkeys->phead;
while (TRUE) {
if (pe == NULL && pf == NULL)
return 0;
if (pe == NULL)
return 1;
if (pf == NULL)
return -1;
char* precval = lrec_get(prec, pf->value);
if (precval == NULL) {
return -1;
} else {
int rc = strcmp(pe->value, precval);
if (rc != 0)
return rc;
}
pe = pe->pnext;
pf = pf->pnext;
}
}
// ----------------------------------------------------------------
int lrec_slls_compare_lexically(
lrec_t* prec,
slls_t* pkeys,
slls_t* plist)
{
return -slls_lrec_compare_lexically(plist, prec, pkeys);
}
// ----------------------------------------------------------------
int lrec_keys_equal_list(
lrec_t* prec,
slls_t* plist)
{
lrece_t* pe = prec->phead;
sllse_t* pf = plist->phead;
while (TRUE) {
if (pe == NULL && pf == NULL)
return TRUE;
if (pe == NULL || pf == NULL)
return FALSE;
if (!streq(pe->key, pf->value))
return FALSE;
pe = pe->pnext;
pf = pf->pnext;
}
}

62
c/containers/mixutil.h Normal file
View file

@ -0,0 +1,62 @@
// ================================================================
// Functions involving more than one container type
// ================================================================
#ifndef MIXUTIL_H
#define MIXUTIL_H
#include "containers/lrec.h"
#include "containers/slls.h"
#include "containers/hss.h"
#include "containers/lhmss.h"
#include "lib/string_array.h"
#include "lib/mlrregex.h"
// Makes a list with values pointing to the lrec's keys. slls_free() will respect that and not corrupt the lrec.
// However, the slls values will be invalid after the lrec is freed.
slls_t* mlr_reference_keys_from_record(lrec_t* prec);
// Makes a list with values pointing to the lrec's values. slls_free() will respect that and not corrupt the lrec.
// However, the slls values will be invalid after the lrec is freed.
slls_t* mlr_reference_values_from_record(lrec_t* prec);
slls_t* mlr_reference_keys_from_record_except(lrec_t* prec, lrece_t* px);
slls_t* mlr_reference_values_from_record_except(lrec_t* prec, lrece_t* px);
// Copies data; no referencing concerns.
slls_t* mlr_copy_keys_from_record(lrec_t* prec);
// Makes a list with values pointing into the lrec's values. slls_free() will
// respect that and not corrupt the lrec. However, the slls values will be
// invalid after the lrec is freed.
slls_t* mlr_reference_selected_values_from_record(lrec_t* prec, slls_t* pselected_field_names);
void mlr_reference_values_from_record_into_string_array(lrec_t* prec, string_array_t* pselected_field_names,
string_array_t* pvalues);
int record_has_all_keys(lrec_t* prec, slls_t* pselected_field_names);
lhmss_t* mlr_reference_key_value_pairs_from_regex_names(lrec_t* prec, regex_t* pregexes, int num_regexes,
int invert_matches);
// Copies data; no referencing concerns.
hss_t* hss_from_slls(slls_t* plist);
// Prints a list of lrecs using lrec_print.
void lrec_print_list(sllv_t* plist);
void lrec_print_list_with_prefix(sllv_t* plist, char* prefix);
// Same as
// slls_t* prec_values = mlr_reference_selected_values_from_record(prec, pkeys);
// return slls_compare_lexically(plist, prec_values);
// but without the unnecessary copy.
int slls_lrec_compare_lexically(
slls_t* plist,
lrec_t* prec,
slls_t* pkeys);
int lrec_slls_compare_lexically(
lrec_t* prec,
slls_t* pkeys,
slls_t* plist);
int lrec_keys_equal_list(
lrec_t* prec,
slls_t* plist);
#endif // MIXUTIL_H

1594
c/containers/mlhmmv.c Normal file

File diff suppressed because it is too large Load diff

318
c/containers/mlhmmv.h Normal file
View file

@ -0,0 +1,318 @@
// ================================================================
// Array-only (open addressing) multi-level hash map, with linear probing for collisions.
// All keys, and terminal-level values, are mlrvals. All data passed into the put method
// are copied; no pointers in this data structure reference anything external.
//
// Notes:
// * null key is not supported.
// * null value is not supported.
//
// See also:
// * http://en.wikipedia.org/wiki/Hash_table
// * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
// ================================================================
#ifndef MLHMMV_H
#define MLHMMV_H
#include "lib/mlrval.h"
#include "containers/sllmv.h"
#include "containers/sllv.h"
#include "containers/lrec.h"
#define MLHMMV_ERROR_NONE 0x0000
#define MLHMMV_ERROR_KEYLIST_TOO_DEEP 0xdeef
#define MLHMMV_ERROR_KEYLIST_TOO_SHALLOW 0x58a1
// This is made visible here in the API so the unit-tester can be sure to exercise the resize logic.
#define MLHMMV_INITIAL_ARRAY_LENGTH 16
// ----------------------------------------------------------------
void mlhmmv_print_terminal(mv_t* pmv, int quote_keys_always, int quote_values_always,
FILE* ostream);
// ----------------------------------------------------------------
struct _mlhmmv_level_t; // forward reference
// The 'x' is for extended: this can hold a scalar or a map.
typedef struct _mlhmmv_xvalue_t {
struct _mlhmmv_level_t* pnext_level;
mv_t terminal_mlrval;
char is_terminal;
} mlhmmv_xvalue_t;
void mlhmmv_xvalue_reset(mlhmmv_xvalue_t* pxvalue);
mlhmmv_xvalue_t mlhmmv_xvalue_alloc_empty_map();
mlhmmv_xvalue_t mlhmmv_xvalue_copy(mlhmmv_xvalue_t* pxvalue);
void mlhmmv_xvalue_free(mlhmmv_xvalue_t* pxvalue);
char* mlhmmv_xvalue_describe_type_simple(mlhmmv_xvalue_t* pxvalue);
static inline int mlhmmv_xvalue_is_absent_and_nonterminal(mlhmmv_xvalue_t* pxvalue) {
return (pxvalue->is_terminal && mv_is_absent(&pxvalue->terminal_mlrval));
}
static inline int mlhmmv_xvalue_is_present_and_nonterminal(mlhmmv_xvalue_t* pxvalue) {
return (pxvalue->is_terminal && mv_is_present(&pxvalue->terminal_mlrval));
}
// Used by for-loops over map-valued local variables
sllv_t* mlhmmv_xvalue_copy_keys_indexed (mlhmmv_xvalue_t* pxvalue, sllmv_t* pmvkeys);
sllv_t* mlhmmv_xvalue_copy_keys_nonindexed(mlhmmv_xvalue_t* pxvalue);
void mlhmmv_xvalues_to_lrecs_lashed(
mlhmmv_xvalue_t** ptop_values,
int num_submaps,
mv_t* pbasenames,
sllmv_t* pnames,
sllv_t* poutrecs,
int do_full_prefixing,
char* flatten_separator);
// ----------------------------------------------------------------
typedef struct _mlhmmv_level_entry_t {
int ideal_index;
mv_t level_key;
mlhmmv_xvalue_t level_xvalue; // terminal mlrval, or another hashmap
struct _mlhmmv_level_entry_t *pprev;
struct _mlhmmv_level_entry_t *pnext;
} mlhmmv_level_entry_t;
typedef unsigned char mlhmmv_level_entry_state_t;
// Store a mlrval into the mlhmmv_xvalue without copying, implicitly transferring
// ownership of the mlrval's free_flags. This means the mlrval will be freed
// when the mlhmmv_xvalue is freed, so the caller should make a copy first if
// necessary.
//
// This is a hot path for non-map local-variable assignments.
static inline mlhmmv_xvalue_t mlhmmv_xvalue_wrap_terminal(mv_t val) {
return (mlhmmv_xvalue_t) {.is_terminal = TRUE, .terminal_mlrval = val, .pnext_level = NULL};
}
// ----------------------------------------------------------------
typedef struct _mlhmmv_level_t {
int num_occupied;
int num_freed;
int array_length;
mlhmmv_level_entry_t* entries;
mlhmmv_level_entry_state_t* states;
mlhmmv_level_entry_t* phead;
mlhmmv_level_entry_t* ptail;
} mlhmmv_level_t;
mlhmmv_level_t* mlhmmv_level_alloc();
void mlhmmv_level_free(mlhmmv_level_t* plevel);
void mlhmmv_level_clear(mlhmmv_level_t* plevel);
void mlhmmv_level_remove(mlhmmv_level_t* plevel, sllmve_t* prestkeys);
int mlhmmv_level_has_key(mlhmmv_level_t* plevel, mv_t* plevel_key);
mv_t* mlhmmv_level_look_up_and_ref_terminal(
mlhmmv_level_t* plevel,
sllmv_t* pmvkeys,
int* perror);
mlhmmv_xvalue_t* mlhmmv_level_look_up_and_ref_xvalue(
mlhmmv_level_t* plevel,
sllmv_t* pmvkeys,
int* perror);
mlhmmv_level_t* mlhmmv_level_put_empty_map(
mlhmmv_level_t* plevel,
mv_t* pkey);
void mlhmmv_level_put_xvalue(
mlhmmv_level_t* plevel,
sllmve_t* prest_keys,
mlhmmv_xvalue_t* pvalue);
void mlhmmv_level_put_xvalue_singly_keyed(
mlhmmv_level_t* plevel,
mv_t* pkey,
mlhmmv_xvalue_t* pvalue);
void mlhmmv_level_put_terminal(
mlhmmv_level_t* plevel,
sllmve_t* prest_keys,
mv_t* pterminal_value);
void mlhmmv_level_put_terminal_singly_keyed(
mlhmmv_level_t* plevel,
mv_t* pkey,
mv_t* pterminal_value);
void mlhmmv_level_to_lrecs(
mlhmmv_level_t* plevel,
sllmv_t* pkeys,
sllmv_t* pnames,
sllv_t* poutrecs,
int do_full_prefixing,
char* flatten_separator);
void mlhmmv_level_print_stacked(
mlhmmv_level_t* plevel,
int depth,
int do_final_comma,
int quote_keys_always,
int quote_values_always,
char* line_indent,
char* line_term,
FILE* ostream);
// ----------------------------------------------------------------
typedef struct _mlhmmv_root_t {
mlhmmv_xvalue_t root_xvalue;
} mlhmmv_root_t;
mlhmmv_root_t* mlhmmv_root_alloc();
void mlhmmv_root_free(mlhmmv_root_t* pmap);
void mlhmmv_root_clear(mlhmmv_root_t* pmap);
// If the return value is non-null, error will be MLHMMV_ERROR_NONE. If the
// return value is null, the error will be MLHMMV_ERROR_KEYLIST_TOO_DEEP or
// MLHMMV_ERROR_KEYLIST_TOO_SHALLOW, or MLHMMV_ERROR_NONE if the keylist matches
// map depth but the entry is not found.
//
// Note: this returns a pointer to the map's data, not to a copy.
// The caller shouldn't free it, or modify it.
mv_t* mlhmmv_root_look_up_and_ref_terminal(mlhmmv_root_t* pmap, sllmv_t* pmvkeys, int* perror);
// These are an optimization for assignment from full srec, e.g. '@records[$key1][$key2] = $*'.
// Using mlhmmv_root_look_up_or_create_then_ref_level, the CST logic can get or create the @records[$key1][$key2]
// level of the mlhmmv, then copy values there.
mlhmmv_level_t* mlhmmv_root_look_up_or_create_then_ref_level(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
void mlhmmv_root_put_terminal(mlhmmv_root_t* pmap, sllmv_t* pmvkeys, mv_t* pterminal_value);
// For for-loop-over-oosvar, wherein we need to copy the submap before iterating over it
// (since the iteration may modify it). If the keys don't index a submap, then the return
// value has is_terminal = TRUE and pnext_level = NULL.
mlhmmv_xvalue_t mlhmmv_root_copy_xvalue(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
// Used by for-loops over oosvars. Return value is an array of ephemeral mlrvals.
sllv_t* mlhmmv_root_copy_keys_from_submap(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
// Unset value/submap from a specified level onward, also unsetting any maps which become empty as a result.
// Examples:
// {
// "a" : { "x" : 1, "y" : 2 },
// "b" : { "x" : 3, "y" : 4 },
// }
// with pmvkeys = ["a"] leaves
// {
// "b" : { "x" : 3, "y" : 4 },
// }
// but with pmvkeys = ["a", "y"] leaves
// {
// "a" : { "x" : 1 },
// "b" : { "x" : 3, "y" : 4 },
// }
// and with pmvkeys = [] leaves
// {
// }
// Now if ["a","x"] is removed from
// {
// "a" : { "x" : 1 },
// "b" : { "x" : 3, "y" : 4 },
// }
// then
// {
// "b" : { "x" : 3, "y" : 4 },
// }
// is left: unsetting "a":"x" leaves the map at "a" so this is unset as well.
void mlhmmv_root_remove(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
// For 'emit' and 'emitp' in the DSL. These allocate lrecs, appended to the poutrecs list.
// * pmap is the base-level oosvar multi-level hashmap.
// * pkeys specify the level in the mlhmmv at which to produce data.
// * pnames is used to pull subsequent-level keys out into separate fields.
// * In case pnames isn't long enough to reach a terminal mlrval level in the mlhmmv,
// do_full_prefixing specifies whether to concatenate nested mlhmmv keys into single lrec keys.
//
// Examples:
// * pkeys reaches a terminal level:
//
// $ mlr --opprint put -q '@sum += $x; end { emit @sum }' ../data/small
// sum
// 4.536294
// * pkeys reaches terminal levels:
//
// $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emit @sum, "a", "b" }' ../data/small
// a b sum
// pan pan 0.346790
// pan wye 0.502626
// eks pan 0.758680
// eks wye 0.381399
// eks zee 0.611784
// wye wye 0.204603
// wye pan 0.573289
// zee pan 0.527126
// zee wye 0.598554
// hat wye 0.031442
// * pkeys reaches non-terminal levels: non-prefixed:
//
// $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emit @sum, "a" }' ../data/small
// a pan wye
// pan 0.346790 0.502626
//
// a pan wye zee
// eks 0.758680 0.381399 0.611784
//
// a wye pan
// wye 0.204603 0.573289
//
// a pan wye
// zee 0.527126 0.598554
//
// a wye
// hat 0.031442
// * pkeys reaches non-terminal levels: prefixed:
//
// $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' ../data/small
// a sum:pan sum:wye
// pan 0.346790 0.502626
//
// a sum:pan sum:wye sum:zee
// eks 0.758680 0.381399 0.611784
//
// a sum:wye sum:pan
// wye 0.204603 0.573289
//
// a sum:pan sum:wye
// zee 0.527126 0.598554
//
// a sum:wye
// hat 0.031442
// For 'emit all' and 'emitp all' in the DSL
void mlhmmv_root_all_to_lrecs(mlhmmv_root_t* pmap, sllmv_t* pnames, sllv_t* poutrecs,
int do_full_prefixing, char* flatten_separator);
// For 'emit' and 'emitp' in the DSL
void mlhmmv_root_partial_to_lrecs(mlhmmv_root_t* pmap, sllmv_t* pkeys, sllmv_t* pnames, sllv_t* poutrecs,
int do_full_prefixing, char* flatten_separator);
// For 'dump' in the DSL; also used by the lrec-to-JSON writer.
void mlhmmv_root_print_json_stacked(mlhmmv_root_t* pmap,
int quote_keys_always, int quote_values_always,
char* line_indent, char* line_term, FILE* ostream);
void mlhmmv_root_print_json_single_lines(mlhmmv_root_t* pmap, int quote_keys_always,
int quote_values_always, char* line_term, FILE* ostream);
// Used for emit of localvars. Puts the xvalue in a single-key-value-pair map
// keyed by the specified name. The xvalue is referenced, not copied.
mlhmmv_root_t* mlhmmv_wrap_name_and_xvalue(mv_t* pname, mlhmmv_xvalue_t* pxval);
// Used for takedown of the temporary map returned by mlhmmv_wrap_name_and_xvalue. Since the xvalue there
// is referenced, not copied, mlhmmv_xvalue_free would prematurely free the xvalue. This method releases
// the xvalue so that the remaining, map-internal structures can be freed correctly.
void mlhmmv_unwrap_name_and_xvalue(mlhmmv_root_t* pmap);
#endif // MLHMMV_H

Some files were not shown because too many files have changed in this diff Show more