Escribí un script de Python para este propósito que básicamente funciona así:
with fileinput.input(args.file) as data:
headers = data.readline().split()
selectors = [any(string in header for string in args.fixed_strings) or
any(re.search(pat, header) for pat in args.python_regexp)
for header in headers]
print(*itertools.compress(headers, selectors))
for line in data:
print(*itertools.compress(line.split(), selectors))
Lo llamé hgrep
para encabezado grep , se puede usar así:
$ hgrep data.txt -F foo bar -P ^baz$
$ hgrep -F foo bar -P ^baz$ -- data.txt
$ grep -v spam data.txt | hgrep -F foo bar -P ^baz$
El script completo es un poco más largo, porque se usa argparse
para analizar argumentos de línea de comando y el código es el siguiente:
#!/usr/bin/python3
import argparse
import fileinput
import itertools
import re
import sys
import textwrap
def underline(s):
return '\033[4m{}\033[0m'.format(s)
parser = argparse.ArgumentParser(
usage='%(prog)s [OPTIONS] {} [FILE]'.format(
underline('column-specification')),
description=
'Print selected columns by specifying patterns to match the headers.',
epilog=textwrap.dedent('''\
examples:
$ %(prog)s data.txt -F foo bar -P ^baz$
$ %(prog)s -F foo bar -P ^baz$ -- data.txt
$ grep -v spam data.txt | %(prog)s -F foo bar -P ^baz$
'''),
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
'-d', '--debug', action='store_true', help='include debugging information')
parser.add_argument(
'file', metavar='FILE', nargs='?', default='-',
help="use %(metavar)s as input, default is '-' for standard input")
spec = parser.add_argument_group(
'column specification', 'one of these or both must be provided:')
spec.add_argument(
'-F', '--fixed-strings', metavar='STRING', nargs='*', default=[],
help='show columns containing %(metavar)s in header\n\n')
spec.add_argument(
'-P', '--python-regexp', metavar='PATTERN', nargs='*', default=[],
help='show a column if its header matches any %(metavar)s')
args = parser.parse_args()
if args.debug:
for k, v in sorted(vars(args).items()):
print('{}: debug: {:>15}: {}'.format(parser.prog, k, v),
file=sys.stderr)
if not args.fixed_strings and not args.python_regexp:
parser.error('no column specifications given')
try:
with fileinput.input(args.file) as data:
headers = data.readline().split()
selectors = [any(string in header for string in args.fixed_strings) or
any(re.search(pat, header) for pat in args.python_regexp)
for header in headers]
print(*itertools.compress(headers, selectors))
for line in data:
print(*itertools.compress(line.split(), selectors))
except BrokenPipeError:
sys.exit(1)
except KeyboardInterrupt:
print()
sys.exit(1)
cat
no es necesario, por cierto. Podrías usarawk '{ print $1, $3 }' file.tsv