From 34d4ef9f1264caaee4564345d343df82d99a526a Mon Sep 17 00:00:00 2001
From: Serapheim Dimitropoulos <serapheim@delphix.com>
Date: Thu, 23 Apr 2020 16:16:44 +0000
Subject: [PATCH 1/2] Introduce Basic Single Pass Parser

= Motivation

The current parsing logic of sdb based on split() and the shlex library
has resulted in multiple workarounds in the implementation of commands
and the overall user-experience of the tool. In addition, its lack of
proper error-handling and reporting frequently result in the user not
knowing what is wrong with his input. Trying to fix the aforementioned
shortcomings in the existing logic has proven difficult as fixing one
problem brings up a new one.

= Patch

This patch replaces this code a simple hand-written parser that provides
the bare-minimum that we need and improved error-reporting. Unit tests
are also provided for the parser to test its behavior and also highlight
its behavior in extreme cases of input. This patch also does a first
pass in undoing most of the workarounds that we have in existing commands
due to the old parsing logic.

= Things To Note: Quoted Strings

Proper support for single and double quote strings is added with this
patch. Double-quote strings are allowed to escape a double-quote by
inserting a backslash before it. Single-quote strings can escape a
single quote the same way. E.g. the following examples are valid:
```
... | filter "obj.spa_name == 'rpool'" | ...
... | filter "obj.spa_name == \"rpool\"" | ...
... | filter 'obj.spa_name == "rpool"' | ...
... | filter 'obj.spa_name == \'rpool\'' | ...
```

The purpose of strings is solely to allow the ability to pass multiple
words separated by space as a single argument to commands. The `filter`
examples show above get the whole predicate passed in string form as a
single argument. The actual quotes of the string are not part of the
arguments passed to the command. This behavior was modelled after bash.

= Examples of new errors

```
// Before
sdb> echo 1 2 3 |
sdb: cannot recognize command:

// After
sdb: syntax error: freestanding pipe with no command
  echo 1 2 3 |
             ^
```

```
// Before
sdb> echo 1 2 3 | filter obj != 1
sdb: filter: invalid input: comparison operator is missing

// After
sdb> echo 1 2 3 | filter obj != 1
sdb: syntax error: predicates that use != as an operator should be quoted
  echo 1 2 3 | filter obj != 1
                          ^
```

```
// Before
sdb> echo 1 2 3 | filter "obj != 1
sdb encountered an internal error due to a bug. Here's the
information you need to file the bug:
----------------------------------------------------------
Target Info:
	ProgramFlags.IS_LIVE|IS_LINUX_KERNEL
	Platform(<Architecture.X86_64: 1>, <PlatformFlags.IS_LITTLE_ENDIAN|IS_64_BIT: 3>)

Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/sdb/internal/repl.py", line 107, in eval_cmd
    for obj in invoke(self.target, [], input_):
  File "/usr/lib/python3/dist-packages/sdb/pipeline.py", line 107, in invoke
    all_tokens = list(lexer)
  File "/usr/lib/python3.6/shlex.py", line 295, in __next__
    token = self.get_token()
  File "/usr/lib/python3.6/shlex.py", line 105, in get_token
    raw = self.read_token()
  File "/usr/lib/python3.6/shlex.py", line 187, in read_token
    raise ValueError("No closing quotation")
ValueError: No closing quotation
----------------------------------------------------------
Link: https://github.com/delphix/sdb/issues/new

// After
sdb> echo 1 2 3 | filter "obj != 1
sdb: syntax error: unfinished string expression
  echo 1 2 3 | filter "obj != 1
                      ^
```

```
// Before
sdb> ! pwd
sdb: cannot recognize command:
sdb> ! echo hello!
Multiple ! not supported

// After
sdb> ! pwd
/export/home/delphix/sdb
sdb> ! echo hello!
hello!
```
---
 sdb/__init__.py                               |   4 +-
 sdb/command.py                                |  42 ++-
 sdb/commands/container_of.py                  |   2 +-
 sdb/commands/filter.py                        |  35 ++-
 sdb/commands/internal/util.py                 |  12 +-
 sdb/commands/linux/per_cpu.py                 |   6 +-
 sdb/commands/linux/slabs.py                   |   8 -
 sdb/commands/pyfilter.py                      |   6 +-
 sdb/commands/spl/spl_kmem_caches.py           |   8 -
 sdb/commands/stacks.py                        |   6 +-
 sdb/commands/threads.py                       |   2 +-
 sdb/commands/zfs/btree.py                     |   6 +-
 sdb/commands/zfs/dbuf.py                      |   2 +-
 sdb/commands/zfs/range_tree.py                |   5 +-
 sdb/commands/zfs/spa.py                       |  18 +-
 sdb/commands/zfs/vdev.py                      |  14 +-
 sdb/commands/zfs/zfs_dbgmsg.py                |   2 +-
 sdb/error.py                                  |  19 +-
 sdb/parser.py                                 | 250 ++++++++++++++++++
 sdb/pipeline.py                               |  86 +++---
 ... 1 => echo 0x0 0x1 0x2 | filter 'obj < 1'} |   0
 ...1 => echo 0x0 0x1 0x2 | filter 'obj <= 1'} |   0
 ...1 => echo 0x0 0x1 0x2 | filter 'obj == 1'} |   0
 ... 1 => echo 0x0 0x1 0x2 | filter 'obj > 1'} |   0
 ...1 => echo 0x0 0x1 0x2 | filter 'obj >= 1'} |   0
 ... obj == 0 => echo 0x0 | filter 'obj == 0'} |   0
 ... obj == 1 => echo 0x0 | filter 'obj == 1'} |   0
 ... == obj => echo 0x1 | filter 'obj == obj'} |   0
 .../{filter obj == 1 => filter 'obj == 1'}    |   0
 .../regression_output/core/ptype 'struct spa' | 195 ++++++++++++++
 .../regression_output/core/ptype struct spa   |   1 +
 .../regression_output/core/sizeof struct spa  |   2 +-
 ...spa_syncing_txg < 1624' | member spa_name} |   0
 ...pa_syncing_txg <= 1624' | member spa_name} |   0
 ...pa_syncing_txg == 1624' | member spa_name} |   0
 ...spa_syncing_txg > 1624' | member spa_name} |   0
 ...pa_syncing_txg >= 1624' | member spa_name} |   0
 ...ilter 'obj.spa_syncing_txg bogus_op 1624'} |   0
 ...er \"obj.comm == \\\"bogus\\\"\" | thread" |   0
 ...  == obj => zfs_dbgmsg | filter  '== obj'} |   0
 ...er obj == => zfs_dbgmsg | filter 'obj =='} |   0
 ...bj'spa rpool | filter 'obj.bogus == 1624'} |   0
 ...> dmesg | filter 'obj.level == 3' | dmesg} |   0
 ...bs -s active_objs -o active_objs,util,name |   0
 ...bj.name == \"UNIX\"' | slub_cache | count" |   0
 ...\"kmalloc-8\"' | member cpu_slab | percpu" |   0
 ...kmalloc-8\"' | member cpu_slab | percpu 0" |   0
 ...alloc-8\"' | member cpu_slab | percpu 0 1" |   0
 ...loc-8\"' | member cpu_slab | percpu 0 2 1" |   0
 ...kmalloc-8\"' | member cpu_slab | percpu 1" |   0
 ...alloc-8\"' | member cpu_slab | percpu 100" |   0
 ...kmalloc-8\"' | member cpu_slab | percpu 2" |   0
 ...kmalloc-8\"' | member cpu_slab | percpu 3" |   0
 ... 'obj.name == \"zio_cache\"' | slub_cache" |   0
 ...e | cast zio_t * | member io_spa.spa_name" |   0
 ...me == \"zio_cache\"' | slub_cache | count" |   0
 ...filter 'obj.name == \"zio_cache\"' | walk" |   0
 ...s | filter 'obj.comm == \"java\"' | stack" |   0
 ...| filter 'obj.comm == \"java\"' | threads" |   0
 ...em_caches -o name,entry_size -s entry_size | 140 ++++++++++
 .../spl/spl_kmem_caches -o name,source        | 140 ++++++++++
 ...er 'obj.skc_linux_cache == 0' | spl_cache} |   0
 ...j.skc_linux_cache == 0' | spl_cache | cnt} |   0
 ...j.skc_obj_alloc > 0' | head 1 | spl_cache} |   0
 ...er 'obj.skc_name == \"ddt_cache\"' | walk" |   0
 ...ember ms_allocatable.rt_histogram | zhist} |   0
 ...ember ms_sm.sm_phys.smp_histogram | zhist} |   0
 ...ber ms_sm.sm_phys.smp_histogram | zhist 9} |   0
 tests/integration/test_core_generic.py        |  43 +--
 tests/integration/test_linux_generic.py       |  35 +--
 tests/integration/test_spl_generic.py         |  10 +-
 tests/integration/test_zfs_generic.py         |   6 +-
 tests/unit/commands/test_filter.py            |  24 +-
 tests/unit/test_parser.py                     | 148 +++++++++++
 74 files changed, 1097 insertions(+), 180 deletions(-)
 create mode 100644 sdb/parser.py
 rename tests/integration/data/regression_output/core/{echo 0x0 0x1 0x2 | filter obj < 1 => echo 0x0 0x1 0x2 | filter 'obj < 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 0x1 0x2 | filter obj <= 1 => echo 0x0 0x1 0x2 | filter 'obj <= 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 0x1 0x2 | filter obj == 1 => echo 0x0 0x1 0x2 | filter 'obj == 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 0x1 0x2 | filter obj > 1 => echo 0x0 0x1 0x2 | filter 'obj > 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 0x1 0x2 | filter obj >= 1 => echo 0x0 0x1 0x2 | filter 'obj >= 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 | filter obj == 0 => echo 0x0 | filter 'obj == 0'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x0 | filter obj == 1 => echo 0x0 | filter 'obj == 1'} (100%)
 rename tests/integration/data/regression_output/core/{echo 0x1 | filter obj == obj => echo 0x1 | filter 'obj == obj'} (100%)
 rename tests/integration/data/regression_output/core/{filter obj == 1 => filter 'obj == 1'} (100%)
 create mode 100644 tests/integration/data/regression_output/core/ptype 'struct spa'
 create mode 100644 tests/integration/data/regression_output/core/ptype struct spa
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg < 1624 | member spa_name => spa rpool | filter 'obj.spa_syncing_txg < 1624' | member spa_name} (100%)
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg <= 1624 | member spa_name => spa rpool | filter 'obj.spa_syncing_txg <= 1624' | member spa_name} (100%)
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg == 1624 | member spa_name => spa rpool | filter 'obj.spa_syncing_txg == 1624' | member spa_name} (100%)
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg > 1624 | member spa_name => spa rpool | filter 'obj.spa_syncing_txg > 1624' | member spa_name} (100%)
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg >= 1624 | member spa_name => spa rpool | filter 'obj.spa_syncing_txg >= 1624' | member spa_name} (100%)
 rename tests/integration/data/regression_output/core/{spa rpool | filter obj.spa_syncing_txg bogus_op 1624 => spa rpool | filter 'obj.spa_syncing_txg bogus_op 1624'} (100%)
 rename "tests/integration/data/regression_output/core/thread | filter obj.comm == \"bogus\" | thread" => "tests/integration/data/regression_output/core/thread | filter \"obj.comm == \\\"bogus\\\"\" | thread" (100%)
 rename tests/integration/data/regression_output/core/{zfs_dbgmsg | filter  == obj => zfs_dbgmsg | filter  '== obj'} (100%)
 rename tests/integration/data/regression_output/core/{zfs_dbgmsg | filter obj == => zfs_dbgmsg | filter 'obj =='} (100%)
 rename tests/integration/data/regression_output/core/{zfs_dbgmsg | filter objspa rpool | filter obj.bogus == 1624 => zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624'} (100%)
 rename tests/integration/data/regression_output/linux/{dmesg | filter obj.level == 3 | dmesg => dmesg | filter 'obj.level == 3' | dmesg} (100%)
 rename "tests/integration/data/regression_output/linux/slabs -s active_objs -o \"active_objs,util,name\"" => tests/integration/data/regression_output/linux/slabs -s active_objs -o active_objs,util,name (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"UNIX\" | slub_cache | count" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"UNIX\"' | slub_cache | count" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 1" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 1" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 2 1" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 2 1" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 1" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 1" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 100" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 100" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 2" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 2" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 3" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 3" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | cast zio_t * | member io_spa.spa_name" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | cast zio_t * | member io_spa.spa_name" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | count" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | count" (100%)
 rename "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | walk" => "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | walk" (100%)
 rename "tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | stack" => "tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | stack" (100%)
 rename "tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | threads" => "tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | threads" (100%)
 create mode 100644 tests/integration/data/regression_output/spl/spl_kmem_caches -o name,entry_size -s entry_size
 create mode 100644 tests/integration/data/regression_output/spl/spl_kmem_caches -o name,source
 rename tests/integration/data/regression_output/spl/{spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache => spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache} (100%)
 rename tests/integration/data/regression_output/spl/{spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache | cnt => spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache | cnt} (100%)
 rename tests/integration/data/regression_output/spl/{spl_kmem_caches | filter obj.skc_linux_cache > 0 | filter obj.skc_obj_alloc > 0 | head 1 | spl_cache => spl_kmem_caches | filter 'obj.skc_linux_cache > 0' | filter 'obj.skc_obj_alloc > 0' | head 1 | spl_cache} (100%)
 rename "tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_name == \"ddt_cache\" | walk" => "tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_name == \"ddt_cache\"' | walk" (100%)
 rename tests/integration/data/regression_output/zfs/{spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_allocatable.rt_histogram | zhist => spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_allocatable.rt_histogram | zhist} (100%)
 rename tests/integration/data/regression_output/zfs/{spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist => spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist} (100%)
 rename tests/integration/data/regression_output/zfs/{spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9 => spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9} (100%)
 create mode 100644 tests/unit/test_parser.py

diff --git a/sdb/__init__.py b/sdb/__init__.py
index ab83479c..16a30d81 100644
--- a/sdb/__init__.py
+++ b/sdb/__init__.py
@@ -30,7 +30,8 @@
 #
 from sdb.error import (Error, CommandNotFoundError, CommandError,
                        CommandInvalidInputError, SymbolNotFoundError,
-                       CommandArgumentsError, CommandEvalSyntaxError)
+                       CommandArgumentsError, CommandEvalSyntaxError,
+                       ParserError)
 from sdb.target import (create_object, get_object, get_prog, get_typed_null,
                         get_type, get_pointer_type, get_target_flags,
                         get_symbol, type_canonical_name, type_canonicalize,
@@ -53,6 +54,7 @@
     'Error',
     'InputHandler',
     'Locator',
+    'ParserError',
     'PrettyPrinter',
     'SingleInputCommand',
     'SymbolNotFoundError',
diff --git a/sdb/command.py b/sdb/command.py
index 83603cca..9aa49726 100644
--- a/sdb/command.py
+++ b/sdb/command.py
@@ -215,13 +215,47 @@ def help(cls, name: str) -> None:
 
     input_type: Optional[str] = None
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         self.name = name
         self.isfirst = False
         self.islast = False
 
         self.parser = type(self)._init_parser(name)
-        self.args = self.parser.parse_args(args.split())
+
+        #
+        # The if-else clauses below may seem like it can be avoided by:
+        #
+        #     [1] Passing the `args` function argument to parse_args() even if
+        #         it is None - the call won't blow up.
+        #
+        #  or [2] Setting the default value of `args` to be [] instead of None.
+        #
+        # Solution [1] doesn't work because parse_args() actually distinguishes
+        # between None and [] as parameters. If [] is passed it returns an
+        # argparse.Namespace() with default values for all the fields that the
+        # command specified in _init_parser(), which is what we want. If None
+        # is passed then argparse's default logic is to attempt to parse
+        # `_sys.argv[1:]` (reference code: cpython/Lib/argparse.py) which is
+        # the arguments passed to the sdb from the shell. This is far from what
+        # we want.
+        #
+        # Solution 2 is dangerous as default arguments in Python are mutable(!)
+        # and thus invoking a Command with arguments that doesn't specify the
+        # __init__() method can pass its arguments to a similar Command later
+        # in the pipeline even if the latter Command didn't specify any args.
+        # [docs.python-guide.org/writing/gotchas/#mutable-default-arguments]
+        #
+        # We still want to set self.args to an argparse.Namespace() with the
+        # fields specific to our self.parser, thus we are forced to call
+        # parse_args([]) for it, even if `args` is None. This way commands
+        # using arguments can always do self.args.<expected field> without
+        # having to check whether this field exist every time.
+        #
+        if args is None:
+            args = []
+        self.args = self.parser.parse_args(args)
 
     def __init_subclass__(cls, **kwargs: Any) -> None:
         """
@@ -365,7 +399,9 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("type", nargs=argparse.REMAINDER)
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         if not self.args.type:
             self.parser.error("the following arguments are required: <type>")
diff --git a/sdb/commands/container_of.py b/sdb/commands/container_of.py
index 599de6c8..1bf80e43 100644
--- a/sdb/commands/container_of.py
+++ b/sdb/commands/container_of.py
@@ -40,7 +40,7 @@ class ContainerOf(sdb.Command):
 
             sdb> addr init_task | cast void *
             (void *)0xffffffffa8217740
-            sdb> addr init_task | member comm | addr | container_of struct task_struct comm  | cast void *
+            sdb> addr init_task | member comm | addr | container_of task_struct comm  | cast void *
             (void *)0xffffffffa8217740
 
     """
diff --git a/sdb/commands/filter.py b/sdb/commands/filter.py
index d9fb0aec..4367db59 100644
--- a/sdb/commands/filter.py
+++ b/sdb/commands/filter.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import sdb
@@ -55,16 +55,31 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("expr", nargs=argparse.REMAINDER)
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    @staticmethod
+    def _parse_expression(input_expr: str) -> List[str]:
+        pass
+
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         if not self.args.expr:
-            self.parser.error("the following arguments are required: expr")
+            self.parser.error("no expression specified")
+
+        #
+        # This is a stop-gap solution until we figure out
+        # exactly how we want the filter command to behave.
+        #
+        if len(self.args.expr) == 1:
+            self.expr = self.args.expr[0].split()
+        else:
+            self.expr = self.args.expr
 
         index = None
         operators = ["==", "!=", ">", "<", ">=", "<="]
         for operator in operators:
             try:
-                index = self.args.expr.index(operator)
+                index = self.expr.index(operator)
                 # Use the first comparison operator we find.
                 break
             except ValueError:
@@ -83,7 +98,7 @@ def __init__(self, args: str = "", name: str = "_") -> None:
             raise sdb.CommandInvalidInputError(
                 self.name, "left hand side of expression is missing")
 
-        if index == len(self.args.expr) - 1:
+        if index == len(self.expr) - 1:
             # If the index is found to be at the very end of the list,
             # this means there's no right hand side of the comparison to
             # compare the left hand side to. This is an error.
@@ -91,14 +106,14 @@ def __init__(self, args: str = "", name: str = "_") -> None:
                 self.name, "right hand side of expression is missing")
 
         try:
-            self.lhs_code = compile(" ".join(self.args.expr[:index]),
-                                    "<string>", "eval")
-            self.rhs_code = compile(" ".join(self.args.expr[index + 1:]),
-                                    "<string>", "eval")
+            self.lhs_code = compile(" ".join(self.expr[:index]), "<string>",
+                                    "eval")
+            self.rhs_code = compile(" ".join(self.expr[index + 1:]), "<string>",
+                                    "eval")
         except SyntaxError as err:
             raise sdb.CommandEvalSyntaxError(self.name, err)
 
-        self.compare = self.args.expr[index]
+        self.compare = self.expr[index]
 
     def _call_one(self, obj: drgn.Object) -> Iterable[drgn.Object]:
         try:
diff --git a/sdb/commands/internal/util.py b/sdb/commands/internal/util.py
index f1700602..765832f5 100644
--- a/sdb/commands/internal/util.py
+++ b/sdb/commands/internal/util.py
@@ -27,11 +27,8 @@ def get_valid_type_by_name(cmd: sdb.Command, tname: str) -> drgn.Type:
     corresponding drgn.Type object.
 
     This function is used primarily by commands that accept a type
-    name as an argument and exists mainly for 2 reasons:
-    [1] There is a limitation in the way the SDB lexer interacts with
-        argparse making it hard for us to parse type names more than
-        1 token wide (e.g. 'struct task_struct'). [bad reason]
-    [2] We save some typing for the user. [good reason]
+    name as an argument and exist only to save keystrokes for the
+    user.
     """
     if tname in ['struct', 'enum', 'union', 'class']:
         #
@@ -43,8 +40,9 @@ def get_valid_type_by_name(cmd: sdb.Command, tname: str) -> drgn.Type:
         # user-friendly and thus we just avoid that situation
         # by instructing the user to skip such keywords.
         #
-        raise sdb.CommandError(cmd.name,
-                               f"skip keyword '{tname}' and try again")
+        raise sdb.CommandError(
+            cmd.name,
+            f"skip keyword '{tname}' or quote your type \"{tname} <typename>\"")
 
     try:
         type_ = sdb.get_type(tname)
diff --git a/sdb/commands/linux/per_cpu.py b/sdb/commands/linux/per_cpu.py
index 25081489..de1f720a 100644
--- a/sdb/commands/linux/per_cpu.py
+++ b/sdb/commands/linux/per_cpu.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import drgn.helpers.linux.cpumask as drgn_cpumask
@@ -51,7 +51,9 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("cpus", nargs="*", type=int)
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         self.ncpus = len(
             list(drgn_cpumask.for_each_possible_cpu(sdb.get_prog())))
diff --git a/sdb/commands/linux/slabs.py b/sdb/commands/linux/slabs.py
index 22c08a53..f22ee2c8 100644
--- a/sdb/commands/linux/slabs.py
+++ b/sdb/commands/linux/slabs.py
@@ -148,14 +148,6 @@ def no_input(self) -> Iterable[drgn.Object]:
     def __pp_parse_args(self) -> Tuple[str, List[str], Dict[str, Any]]:
         fields = self.DEFAULT_FIELDS
         if self.args.o:
-            #
-            # HACK: Until we have a proper lexer for SDB we can
-            #       only pass the comma-separated list as a
-            #       string (e.g. quoted). Until this is fixed
-            #       we make sure to unquote such strings.
-            #
-            if self.args.o[0] == '"' and self.args.o[-1] == '"':
-                self.args.o = self.args.o[1:-1]
             fields = self.args.o.split(",")
         elif self.args.v:
             fields = list(Slabs.FIELDS.keys())
diff --git a/sdb/commands/pyfilter.py b/sdb/commands/pyfilter.py
index fa63bdc3..c71f0be6 100644
--- a/sdb/commands/pyfilter.py
+++ b/sdb/commands/pyfilter.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import sdb
@@ -33,7 +33,9 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("expr", nargs=argparse.REMAINDER)
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         if not self.args.expr:
             self.parser.error("the following arguments are required: expr")
diff --git a/sdb/commands/spl/spl_kmem_caches.py b/sdb/commands/spl/spl_kmem_caches.py
index 5242b99e..17ad8253 100644
--- a/sdb/commands/spl/spl_kmem_caches.py
+++ b/sdb/commands/spl/spl_kmem_caches.py
@@ -148,14 +148,6 @@ def no_input(self) -> Iterable[drgn.Object]:
     def __pp_parse_args(self) -> Tuple[str, List[str], Dict[str, Any]]:
         fields = SplKmemCaches.DEFAULT_FIELDS
         if self.args.o:
-            #
-            # HACK: Until we have a proper lexer for SDB we can
-            #       only pass the comma-separated list as a
-            #       string (e.g. quoted). Until this is fixed
-            #       we make sure to unquote such strings.
-            #
-            if self.args.o[0] == '"' and self.args.o[-1] == '"':
-                self.args.o = self.args.o[1:-1]
             fields = self.args.o.split(",")
         elif self.args.v:
             fields = list(SplKmemCaches.FIELDS.keys())
diff --git a/sdb/commands/stacks.py b/sdb/commands/stacks.py
index e6be4e21..769040b8 100644
--- a/sdb/commands/stacks.py
+++ b/sdb/commands/stacks.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Dict, Iterable, List, Tuple
+from typing import Dict, Iterable, List, Optional, Tuple
 from collections import defaultdict
 
 import drgn
@@ -143,7 +143,9 @@ class Stacks(sdb.Locator, sdb.PrettyPrinter):
     input_type = "struct task_struct *"
     output_type = "struct task_struct *"
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         self.mod_start, self.mod_end = 0, 0
         self.func_start, self.func_end = 0, 0
diff --git a/sdb/commands/threads.py b/sdb/commands/threads.py
index 2493890e..5845914a 100644
--- a/sdb/commands/threads.py
+++ b/sdb/commands/threads.py
@@ -38,7 +38,7 @@ class Threads(sdb.Locator, sdb.PrettyPrinter):
         comm - the thread's command
 
     EXAMPLE
-        sdb> threads | filter obj.comm == "java" | threads
+        sdb> threads | filter 'obj.comm == "java"' | threads
         task               state         pid  prio comm
         ------------------ ------------- ---- ---- ----
         0xffff95d48b0e8000 INTERRUPTIBLE 4386 120  java
diff --git a/sdb/commands/zfs/btree.py b/sdb/commands/zfs/btree.py
index d25aed8d..fd170a78 100644
--- a/sdb/commands/zfs/btree.py
+++ b/sdb/commands/zfs/btree.py
@@ -16,7 +16,7 @@
 
 # pylint: disable=missing-docstring
 
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import sdb
@@ -52,7 +52,9 @@ class Btree(sdb.Walker):
     names = ["zfs_btree"]
     input_type = "zfs_btree_t *"
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
         self.elem_size: drgn.Object = None
 
diff --git a/sdb/commands/zfs/dbuf.py b/sdb/commands/zfs/dbuf.py
index aa085ad1..cfb3755a 100644
--- a/sdb/commands/zfs/dbuf.py
+++ b/sdb/commands/zfs/dbuf.py
@@ -110,7 +110,7 @@ def argfilter(self, db: drgn.Object) -> bool:
     def all_dnode_dbufs(self, dn: drgn.Object) -> Iterable[drgn.Object]:
         yield from sdb.execute_pipeline(
             [dn.dn_dbufs.address_of_()],
-            [sdb.Walk(), sdb.Cast(self.output_type)])
+            [sdb.Walk(), sdb.Cast([self.output_type])])
 
     @sdb.InputHandler('dnode_t*')
     def from_dnode(self, dn: drgn.Object) -> Iterable[drgn.Object]:
diff --git a/sdb/commands/zfs/range_tree.py b/sdb/commands/zfs/range_tree.py
index a8aa69b8..4cda8962 100644
--- a/sdb/commands/zfs/range_tree.py
+++ b/sdb/commands/zfs/range_tree.py
@@ -81,5 +81,6 @@ def from_range_tree(self, rt: drgn.Object) -> Iterable[drgn.Object]:
             enum_dict['RANGE_SEG_GAP']: 'range_seg_gap_t*',
         }
         seg_type_name = range_seg_type_to_type[int(rt.rt_type)]
-        yield from sdb.execute_pipeline([rt.rt_root.address_of_()],
-                                        [Btree(), Cast(seg_type_name)])
+        yield from sdb.execute_pipeline(
+            [rt.rt_root.address_of_()],
+            [Btree(), Cast([seg_type_name])])
diff --git a/sdb/commands/zfs/spa.py b/sdb/commands/zfs/spa.py
index b6e35a99..8dbda470 100644
--- a/sdb/commands/zfs/spa.py
+++ b/sdb/commands/zfs/spa.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import sdb
@@ -53,15 +53,17 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("poolnames", nargs="*")
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
-        self.arg_string = ""
+        self.arg_list: List[str] = []
         if self.args.metaslab:
-            self.arg_string += "-m "
+            self.arg_list.append("-m")
         if self.args.histogram:
-            self.arg_string += "-H "
+            self.arg_list.append("-H")
         if self.args.weight:
-            self.arg_string += "-w "
+            self.arg_list.append("-w")
 
     def pretty_print(self, spas: Iterable[drgn.Object]) -> None:
         print("{:18} {}".format("ADDR", "NAME"))
@@ -77,12 +79,12 @@ def pretty_print(self, spas: Iterable[drgn.Object]) -> None:
 
             if self.args.vdevs:
                 vdevs = sdb.execute_pipeline([spa], [Vdev()])
-                Vdev(self.arg_string).pretty_print(vdevs, 5)
+                Vdev(self.arg_list).pretty_print(vdevs, 5)
 
     def no_input(self) -> drgn.Object:
         spas = sdb.execute_pipeline(
             [sdb.get_object("spa_namespace_avl").address_of_()],
-            [Avl(), sdb.Cast("spa_t *")],
+            [Avl(), sdb.Cast(["spa_t *"])],
         )
         for spa in spas:
             if (self.args.poolnames and spa.spa_name.string_().decode("utf-8")
diff --git a/sdb/commands/zfs/vdev.py b/sdb/commands/zfs/vdev.py
index 6787e369..6781fe5c 100644
--- a/sdb/commands/zfs/vdev.py
+++ b/sdb/commands/zfs/vdev.py
@@ -17,7 +17,7 @@
 # pylint: disable=missing-docstring
 
 import argparse
-from typing import Iterable
+from typing import Iterable, List, Optional
 
 import drgn
 import sdb
@@ -58,13 +58,15 @@ def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser.add_argument("vdev_ids", nargs="*", type=int)
         return parser
 
-    def __init__(self, args: str = "", name: str = "_") -> None:
+    def __init__(self,
+                 args: Optional[List[str]] = None,
+                 name: str = "_") -> None:
         super().__init__(args, name)
-        self.arg_string = ""
+        self.arg_list: List[str] = []
         if self.args.histogram:
-            self.arg_string += "-H "
+            self.arg_list.append("-H")
         if self.args.weight:
-            self.arg_string += "-w "
+            self.arg_list.append("-w")
 
     def pretty_print(self,
                      vdevs: Iterable[drgn.Object],
@@ -106,7 +108,7 @@ def pretty_print(self,
                 )
             if self.args.metaslab:
                 metaslabs = sdb.execute_pipeline([vdev], [Metaslab()])
-                Metaslab(self.arg_string).pretty_print(metaslabs, indent + 5)
+                Metaslab(self.arg_list).pretty_print(metaslabs, indent + 5)
 
     @sdb.InputHandler("spa_t*")
     def from_spa(self, spa: drgn.Object) -> Iterable[drgn.Object]:
diff --git a/sdb/commands/zfs/zfs_dbgmsg.py b/sdb/commands/zfs/zfs_dbgmsg.py
index 9b6e73b9..3fb534e0 100644
--- a/sdb/commands/zfs/zfs_dbgmsg.py
+++ b/sdb/commands/zfs/zfs_dbgmsg.py
@@ -60,4 +60,4 @@ def no_input(self) -> Iterable[drgn.Object]:
         list_addr = proc_list.address_of_()
 
         yield from sdb.execute_pipeline(
-            [list_addr], [SPLList(), sdb.Cast("zfs_dbgmsg_t *")])
+            [list_addr], [SPLList(), sdb.Cast(["zfs_dbgmsg_t *"])])
diff --git a/sdb/error.py b/sdb/error.py
index 1469dbff..77fc37ad 100644
--- a/sdb/error.py
+++ b/sdb/error.py
@@ -24,7 +24,7 @@ class Error(Exception):
     text: str = ""
 
     def __init__(self, text: str) -> None:
-        self.text = 'sdb: {}'.format(text)
+        self.text = f"sdb: {text}"
         super().__init__(self.text)
 
 
@@ -90,3 +90,20 @@ def __init__(self, command: str, err: SyntaxError) -> None:
             indicator = ''.join(spaces_str)
             msg += f"\n\t{indicator}"
         super().__init__(command, msg)
+
+
+class ParserError(Error):
+    """
+    Thrown when SDB fails to parse input from the user.
+    """
+
+    line: str = ""
+    message: str = ""
+    offset: int = 0
+
+    def __init__(self, line: str, message: str, offset: int = 0) -> None:
+        self.line, self.message, self.offset = line, message, offset
+        msg = (f"syntax error: {self.message}\n"
+               f"  {self.line}\n"
+               f"  {' ' * (self.offset)}^")
+        super().__init__(msg)
diff --git a/sdb/parser.py b/sdb/parser.py
new file mode 100644
index 00000000..ad8865f0
--- /dev/null
+++ b/sdb/parser.py
@@ -0,0 +1,250 @@
+#
+# Copyright 2020 Delphix
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+This module contains the logic for the tokenization and parsing
+of the input given by the SDB REPL.
+"""
+
+#
+# Why Roll Our Own Parser?
+#
+# Our grammar in its current state could be implemented with shlex() that is
+# part of the standard library if we applied some workarounds to it. That said
+# the code wouldn't be clean, it would be hard to add new rules (workarounds
+# on top of workaroudns) and providing helpful error messages would be hard.
+#
+# In terms of external parsing libraries, the following ones were considered:
+#   * PLY (Python Lex-Yacc)
+#   * SLY (Sly Lex-Yacc)
+#   * Lark
+#
+# PLY attempts to model traditional Lex & Yacc and it does come with a lot of
+# their baggage. There is a lot of global state, that we'd either need to
+# recreate (e.g. regenerate the grammar) every time an SDB command is issued,
+# or alternatively we'd need to keep track of a few global objects and reset
+# their metadata in both success and error code paths. The latter is not that
+# bad but it can be very invasive in parts of the code base where we really
+# shouldn't care about parsing. In addition, error-handling isn't great and
+# there is a lot of boilerplate and magic to it.
+#
+# SLY is an improved version of PLY that deals with most issues of global
+# state and boilerplace code. The error-handling is still not optimal but a
+# lot better, optimizing for common cases. SLY would provide a reasonable
+# alternative implementation to our hand-written parser but it wasn't chosen
+# mainly for one reason. It tries to optimize for traditional full-fledged
+# languages which results in a few workarounds given SDB's simplistic but
+# quirky command language.
+#
+# Lark is probably the best option compared to the above in terms of features,
+# ergonomics like error-handling, and clean parser code. The only drawback of
+# this library in the context of SDB is that it is hard to debug incorrect
+# grammars - the grammar is generally one whole string and if it is wrong the
+# resuting stack traces end up showing methods in the library, not in the
+# code that the consumer of the library wrote (which is what would geenrally
+# happen with SLY). This is not a big deal in general but for SDB we still
+# haven't finalized all the command language features (i.e. subshells or
+# defining alias commands in the runtime) and our grammar isn't stable yet.
+#
+# Our hand-written parser below has a small implementation (less than 100
+# lines of code without the comments), provides friendly error messages,
+# and it falls cleanly to our existing code. As SDB's command language
+# grows and gets more stable it should be easy to replace the existing
+# parser with a library like Lark.
+#
+
+from enum import Enum
+from typing import Iterable, List, Optional, Tuple
+
+from sdb.error import ParserError
+
+
+class ExpressionType(Enum):
+    """
+    The Expression types supported by the SDB parser that
+    have semantic meaning. Their corresponding string values
+    are only used for debugging purposes.
+
+    Example:
+
+        sdb> cmd0 arg0 | cmd1 arg1 "arg 2" ! shell_cmd shell_args ...
+             ---------   -----------------   ========================
+
+    Everything underlined with '-' is a CMD (e.g. SDB command) and
+    '=' is a SHELL_CMD token.
+    """
+    CMD = "__cmd__"
+    SHELL_CMD = "__shell_cmd__"
+
+
+WHITESPACE = " \t"
+QUOTES = '"\''
+OPERATORS = "|!"
+DELIMETERS = OPERATORS + QUOTES + WHITESPACE
+
+
+def _next_non_whitespace(line: str, index: int) -> Optional[int]:
+    """
+    Return the index of the next non-whitespace character in `line`
+    starting from `index` or None if there is no such character until
+    the end of `line`.
+    """
+    for i, c in enumerate(line[index:]):
+        if c not in WHITESPACE:
+            return i + index
+    return None
+
+
+def _next_delimiter(line: str, index: int) -> Optional[int]:
+    """
+    Return the index of the next delimeter in `line` starting from
+    `index` or None if there is no such character until the end of
+    `line`. Generally used when we are in the middle of processing
+    an identifier/token and want to see where it ends.
+    """
+    for i, c in enumerate(line[index:]):
+        if c in DELIMETERS:
+            return i + index
+    return None
+
+
+def tokenize(line: str) -> Iterable[Tuple[List[str], ExpressionType]]:
+    """
+    Iterates over the line passed as an input (usually from the REPL) and
+    generates expressions to be evaluated by the SDB pipeline logic. The
+    actual expression information vary by expression type:
+
+    [1] CMD (e.g. SDB commands) expression contain a list of strings
+        which contains the command (first string of list) and its
+        arguments (the rest of the strings in the list).
+    [2] A SHELL_CMD expression (e.g. basically anything after a bang !)
+        is a single string that contains the whole shell command,
+        including its arguments and the spaces between them.
+
+    Example:
+
+        sdb> cmd0 arg0 | cmd1 arg1 "arg 2" ! shell_cmd shell_args ...
+             ---------   -----------------   ========================
+
+        Returns:
+            Iterable [
+                 (['cmd0', 'arg0'], CMD),
+                 (['cmd1', 'arg1', 'arg 2'], CMD),
+                 (['shell_cmd shell_args ...'], SHELL_CMD),
+            ]
+
+    Note: The reason that we split the arguments for CMDs here is so we
+    don't have to redo that work later in the Command class where we need
+    to parse the arguments in argparse. Furthermore, the tokenizer here
+    does a better job than doing a simple split() as it parses each string
+    containing spaces as a single argument (e.g. space in "arg 2" in our
+    example).
+    """
+    # pylint: disable=too-many-statements
+    # pylint: disable=too-many-branches
+
+    token_list: List[str] = []
+    idx: Optional[int] = 0
+    while True:
+        idx = _next_non_whitespace(line, idx)  # type: ignore[arg-type]
+        if idx is None:
+            break
+
+        c = line[idx]
+        if c == '|':
+            #
+            # We encountered a pipe which marks the end of a CMD expression.
+            # Yield the preceding token and move on to the next character.
+            # Raise error if there no CMD preceeding the pipe.
+            #
+            if not token_list or idx == (len(line) - 1):
+                raise ParserError(line, "freestanding pipe with no command",
+                                  idx)
+            yield token_list, ExpressionType.CMD
+            token_list = []
+            idx += 1
+        elif c == '!':
+            #
+            # We encountered an exclamation point which is the start of a
+            # SHELL_CMD. Look ahead just in case the user is trying to use
+            # the inequality operator (!=) and warn them if they try to do
+            # so. If all is good, consume everything after the bang as a
+            # single token for our SHELL_CMD.
+            #
+            lookahead = _next_non_whitespace(line, idx + 1)
+            if not lookahead:
+                raise ParserError(line, "no shell command specified", idx)
+            if line[lookahead] == "=":
+                raise ParserError(
+                    line,
+                    "predicates that use != as an operator should be quoted",
+                    idx)
+
+            if token_list:
+                yield token_list, ExpressionType.CMD
+                token_list = []
+            yield [line[lookahead:].strip()], ExpressionType.SHELL_CMD
+            break
+        elif c in QUOTES:
+            #
+            # We encountered a double or single quote that marks the beginning
+            # of a string. Consume the whole string as a single token and add
+            # it to  the token list of the current CMD that we are constructing.
+            #
+            # Note that the actual quotes enclosing the string are not part of
+            # the actual token.
+            #
+            str_contents: List[str] = []
+            str_end_idx = 0
+            for str_idx, str_c in enumerate(line[idx + 1:]):
+                #
+                # If we encounter the same kind of quote then we have one of
+                # the following scenarios:
+                #
+                # [A] Our string contains a quote that is being escaped, at
+                #     which point, we replace the slash preceding it with
+                #     the actual quote character and continue consuming the
+                #     string.
+                # [B] This is the end of the string, so we break out of this
+                #     loop.
+                #
+                if str_c == c:
+                    if str_contents and str_contents[-1] == '\\':
+                        str_contents[-1] = c
+                        continue
+                    str_end_idx = str_idx
+                    break
+                str_contents.append(str_c)
+            if str_end_idx == 0:
+                raise ParserError(line, "unfinished string expression", idx)
+            token_list.append(''.join(str_contents))
+            idx += str_end_idx + 2  # + 2 added for quotes on both sides
+        else:
+            #
+            # We found a token that is part of a CMD expression. Add
+            # the token to the CMD's token list and then move on to
+            # the next one character or break if we've hit the end
+            # of the input line.
+            #
+            lookahead = _next_delimiter(line, idx)
+            if not lookahead:
+                token_list.append(line[idx:])
+                break
+            token_list.append(line[idx:lookahead])
+            idx = lookahead
+
+    if token_list:
+        yield token_list, ExpressionType.CMD
+        token_list = []
diff --git a/sdb/pipeline.py b/sdb/pipeline.py
index bb833f67..3a73883f 100644
--- a/sdb/pipeline.py
+++ b/sdb/pipeline.py
@@ -15,7 +15,6 @@
 #
 """This module enables integration with the SDB REPL."""
 
-import shlex
 import subprocess
 import sys
 import itertools
@@ -24,6 +23,7 @@
 
 import drgn
 
+import sdb.parser as parser
 import sdb.target as target
 from sdb.error import CommandArgumentsError, CommandNotFoundError
 from sdb.command import Address, Cast, Command, get_registered_commands
@@ -55,7 +55,7 @@ def massage_input_and_call(
         # If we are passed a void*, cast it to the expected type.
         if (first_obj_type.kind is drgn.TypeKind.POINTER and
                 first_obj_type.type.primitive is drgn.PrimitiveType.C_VOID):
-            yield from execute_pipeline(objs, [Cast(cmd.input_type), cmd])
+            yield from execute_pipeline(objs, [Cast([cmd.input_type]), cmd])
             return
 
         # If we are passed a foo_t when we expect a foo_t*, use its address.
@@ -91,65 +91,42 @@ def invoke(myprog: drgn.Program, first_input: Iterable[drgn.Object],
     function is responsible for converting that string into the
     appropriate pipeline of Command objects, and executing it.
     """
-
-    # pylint: disable=too-many-locals
-    # pylint: disable=too-many-branches
-    # pylint: disable=too-many-statements
-
     target.set_prog(myprog)
 
-    shell_cmd = None
-    # Parse the argument string. Each pipeline stage is delimited by
-    # a pipe character "|". If there is a "!" character detected, then
-    # pipe all the remaining outout into a subshell.
-    lexer = shlex.shlex(line, posix=False, punctuation_chars="|!")
-    lexer.wordchars += "();<>&[]"
-    all_tokens = list(lexer)
-    pipe_stages = []
-    tokens: List[str] = []
-    for num, token in enumerate(all_tokens):
-        if token == "|":
-            pipe_stages.append(" ".join(tokens))
-            tokens = []
-        elif token == "!":
-            pipe_stages.append(" ".join(tokens))
-            if any(t == "!" for t in all_tokens[num + 1:]):
-                print("Multiple ! not supported")
-                return
-            shell_cmd = " ".join(all_tokens[num + 1:])
-            break
-        else:
-            tokens.append(token)
-    else:
-        # We didn't find a !, so all remaining tokens are part of
-        # the last pipe
-        pipe_stages.append(" ".join(tokens))
-
+    #
     # Build the pipeline by constructing each of the commands we want to
-    # use and building a list of them.
+    # use and building a list of them. If a shell pipeline is constructed
+    # at the end save it shell_cmd.
+    #
+    shell_cmd = None
     pipeline = []
-    for stage in pipe_stages:
-        (name, _, args) = stage.strip().partition(" ")
-        if name not in get_registered_commands():
-            raise CommandNotFoundError(name)
-        try:
-            pipeline.append(get_registered_commands()[name](args, name))
-        except SystemExit:
-            # The passed in arguments to each command will be parsed in
-            # the command object's constructor. We use "argparse" to do
-            # the argument parsing, and when that detects an error, it
-            # will throw this exception. Rather than exiting the entire
-            # SDB session, we only abort this specific pipeline by raising
-            # a CommandArgumentsError.
-            raise CommandArgumentsError(name)
-
-    pipeline[0].isfirst = True
-    pipeline[-1].islast = True
+    for cmd, cmd_type in parser.tokenize(line):
+        if cmd_type == parser.ExpressionType.CMD:
+            name, *args = cmd
+            if name not in get_registered_commands():
+                raise CommandNotFoundError(name)
+            try:
+                pipeline.append(get_registered_commands()[name](args, name))
+            except SystemExit:
+                #
+                # The passed in arguments to each command will be parsed in
+                # the command object's constructor. We use "argparse" to do
+                # the argument parsing, and when that detects an error, it
+                # will throw this exception. Rather than exiting the entire
+                # SDB session, we only abort this specific pipeline by raising
+                # a CommandArgumentsError.
+                #
+                raise CommandArgumentsError(name)
+        else:
+            assert cmd_type == parser.ExpressionType.SHELL_CMD
+            shell_cmd = cmd
 
+    #
     # If we have a !, redirect stdout to a shell process. This avoids
     # having to have a custom printing function that we pass around and
     # use everywhere. We'll fix stdout to point back to the normal stdout
     # at the end.
+    #
     if shell_cmd is not None:
         shell_proc = subprocess.Popen(shell_cmd,
                                       shell=True,
@@ -163,7 +140,10 @@ def invoke(myprog: drgn.Program, first_input: Iterable[drgn.Object],
         sys.stdout = shell_proc.stdin  # type: ignore[assignment]
 
     try:
-        yield from execute_pipeline(first_input, pipeline)
+        if pipeline:
+            pipeline[0].isfirst = True
+            pipeline[-1].islast = True
+            yield from execute_pipeline(first_input, pipeline)
 
         if shell_cmd is not None:
             shell_proc.stdin.flush()
diff --git a/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj < 1 b/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj < 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj < 1
rename to tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj < 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj <= 1 b/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj <= 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj <= 1
rename to tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj <= 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj == 1 b/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj == 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj == 1
rename to tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj == 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj > 1 b/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj > 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj > 1
rename to tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj > 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj >= 1 b/tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj >= 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter obj >= 1
rename to tests/integration/data/regression_output/core/echo 0x0 0x1 0x2 | filter 'obj >= 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 | filter obj == 0 b/tests/integration/data/regression_output/core/echo 0x0 | filter 'obj == 0'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 | filter obj == 0
rename to tests/integration/data/regression_output/core/echo 0x0 | filter 'obj == 0'
diff --git a/tests/integration/data/regression_output/core/echo 0x0 | filter obj == 1 b/tests/integration/data/regression_output/core/echo 0x0 | filter 'obj == 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x0 | filter obj == 1
rename to tests/integration/data/regression_output/core/echo 0x0 | filter 'obj == 1'
diff --git a/tests/integration/data/regression_output/core/echo 0x1 | filter obj == obj b/tests/integration/data/regression_output/core/echo 0x1 | filter 'obj == obj'
similarity index 100%
rename from tests/integration/data/regression_output/core/echo 0x1 | filter obj == obj
rename to tests/integration/data/regression_output/core/echo 0x1 | filter 'obj == obj'
diff --git a/tests/integration/data/regression_output/core/filter obj == 1 b/tests/integration/data/regression_output/core/filter 'obj == 1'
similarity index 100%
rename from tests/integration/data/regression_output/core/filter obj == 1
rename to tests/integration/data/regression_output/core/filter 'obj == 1'
diff --git a/tests/integration/data/regression_output/core/ptype 'struct spa' b/tests/integration/data/regression_output/core/ptype 'struct spa'
new file mode 100644
index 00000000..4a4a06ee
--- /dev/null
+++ b/tests/integration/data/regression_output/core/ptype 'struct spa'	
@@ -0,0 +1,195 @@
+struct spa {
+	char spa_name[256];
+	char *spa_comment;
+	avl_node_t spa_avl;
+	nvlist_t *spa_config;
+	nvlist_t *spa_config_syncing;
+	nvlist_t *spa_config_splitting;
+	nvlist_t *spa_load_info;
+	uint64_t spa_config_txg;
+	int spa_sync_pass;
+	pool_state_t spa_state;
+	int spa_inject_ref;
+	uint8_t spa_sync_on;
+	spa_load_state_t spa_load_state;
+	boolean_t spa_indirect_vdevs_loaded;
+	boolean_t spa_trust_config;
+	spa_config_source_t spa_config_source;
+	uint64_t spa_import_flags;
+	spa_taskqs_t spa_zio_taskq[7][4];
+	dsl_pool_t *spa_dsl_pool;
+	boolean_t spa_is_initializing;
+	boolean_t spa_is_exporting;
+	metaslab_class_t *spa_normal_class;
+	metaslab_class_t *spa_log_class;
+	int spa_log_devices;
+	metaslab_class_t *spa_special_class;
+	metaslab_class_t *spa_dedup_class;
+	uint64_t spa_first_txg;
+	uint64_t spa_final_txg;
+	uint64_t spa_freeze_txg;
+	uint64_t spa_load_max_txg;
+	uint64_t spa_claim_max_txg;
+	inode_timespec_t spa_loaded_ts;
+	objset_t *spa_meta_objset;
+	kmutex_t spa_evicting_os_lock;
+	list_t spa_evicting_os_list;
+	kcondvar_t spa_evicting_os_cv;
+	txg_list_t spa_vdev_txg_list;
+	vdev_t *spa_root_vdev;
+	int spa_min_ashift;
+	int spa_max_ashift;
+	uint64_t spa_config_guid;
+	uint64_t spa_load_guid;
+	uint64_t spa_last_synced_guid;
+	list_t spa_config_dirty_list;
+	list_t spa_state_dirty_list;
+	kmutex_t *spa_alloc_locks;
+	avl_tree_t *spa_alloc_trees;
+	int spa_alloc_count;
+	spa_aux_vdev_t spa_spares;
+	spa_aux_vdev_t spa_l2cache;
+	nvlist_t *spa_label_features;
+	uint64_t spa_config_object;
+	uint64_t spa_config_generation;
+	uint64_t spa_syncing_txg;
+	bpobj_t spa_deferred_bpobj;
+	bplist_t spa_free_bplist[4];
+	zio_cksum_salt_t spa_cksum_salt;
+	kmutex_t spa_cksum_tmpls_lock;
+	void *spa_cksum_tmpls[14];
+	uberblock_t spa_ubsync;
+	uberblock_t spa_uberblock;
+	boolean_t spa_extreme_rewind;
+	kmutex_t spa_scrub_lock;
+	uint64_t spa_scrub_inflight;
+	uint64_t spa_load_verify_bytes;
+	kcondvar_t spa_scrub_io_cv;
+	uint8_t spa_scrub_active;
+	uint8_t spa_scrub_type;
+	uint8_t spa_scrub_finished;
+	uint8_t spa_scrub_started;
+	uint8_t spa_scrub_reopen;
+	uint64_t spa_scan_pass_start;
+	uint64_t spa_scan_pass_scrub_pause;
+	uint64_t spa_scan_pass_scrub_spent_paused;
+	uint64_t spa_scan_pass_exam;
+	uint64_t spa_scan_pass_issued;
+	boolean_t spa_resilver_deferred;
+	kmutex_t spa_async_lock;
+	kthread_t *spa_async_thread;
+	int spa_async_suspended;
+	kcondvar_t spa_async_cv;
+	uint16_t spa_async_tasks;
+	uint64_t spa_missing_tvds;
+	uint64_t spa_missing_tvds_allowed;
+	spa_removing_phys_t spa_removing_phys;
+	spa_vdev_removal_t *spa_vdev_removal;
+	spa_condensing_indirect_phys_t spa_condensing_indirect_phys;
+	spa_condensing_indirect_t *spa_condensing_indirect;
+	zthr_t *spa_condense_zthr;
+	uint64_t spa_checkpoint_txg;
+	spa_checkpoint_info_t spa_checkpoint_info;
+	zthr_t *spa_checkpoint_discard_zthr;
+	space_map_t *spa_syncing_log_sm;
+	avl_tree_t spa_sm_logs_by_txg;
+	kmutex_t spa_flushed_ms_lock;
+	avl_tree_t spa_metaslabs_by_flushed;
+	spa_unflushed_stats_t spa_unflushed_stats;
+	list_t spa_log_summary;
+	uint64_t spa_log_flushall_txg;
+	zthr_t *spa_livelist_delete_zthr;
+	zthr_t *spa_livelist_condense_zthr;
+	uint64_t spa_livelists_to_delete;
+	livelist_condense_entry_t spa_to_condense;
+	char *spa_root;
+	uint64_t spa_ena;
+	int spa_last_open_failed;
+	uint64_t spa_last_ubsync_txg;
+	uint64_t spa_last_ubsync_txg_ts;
+	uint64_t spa_load_txg;
+	uint64_t spa_load_txg_ts;
+	uint64_t spa_load_meta_errors;
+	uint64_t spa_load_data_errors;
+	uint64_t spa_verify_min_txg;
+	kmutex_t spa_errlog_lock;
+	uint64_t spa_errlog_last;
+	uint64_t spa_errlog_scrub;
+	kmutex_t spa_errlist_lock;
+	avl_tree_t spa_errlist_last;
+	avl_tree_t spa_errlist_scrub;
+	uint64_t spa_deflate;
+	uint64_t spa_history;
+	kmutex_t spa_history_lock;
+	vdev_t *spa_pending_vdev;
+	kmutex_t spa_props_lock;
+	uint64_t spa_pool_props_object;
+	uint64_t spa_bootfs;
+	uint64_t spa_failmode;
+	uint64_t spa_deadman_failmode;
+	uint64_t spa_delegation;
+	list_t spa_config_list;
+	zio_t **spa_async_zio_root;
+	zio_t *spa_suspend_zio_root;
+	zio_t *spa_txg_zio[4];
+	kmutex_t spa_suspend_lock;
+	kcondvar_t spa_suspend_cv;
+	zio_suspend_reason_t spa_suspended;
+	uint8_t spa_claiming;
+	boolean_t spa_is_root;
+	int spa_minref;
+	spa_mode_t spa_mode;
+	spa_log_state_t spa_log_state;
+	uint64_t spa_autoexpand;
+	ddt_t *spa_ddt[14];
+	uint64_t spa_ddt_stat_object;
+	uint64_t spa_dedup_dspace;
+	uint64_t spa_dedup_checksum;
+	uint64_t spa_dspace;
+	kmutex_t spa_vdev_top_lock;
+	kmutex_t spa_proc_lock;
+	kcondvar_t spa_proc_cv;
+	spa_proc_state_t spa_proc_state;
+	proc_t *spa_proc;
+	uint64_t spa_did;
+	boolean_t spa_autoreplace;
+	int spa_vdev_locks;
+	uint64_t spa_creation_version;
+	uint64_t spa_prev_software_version;
+	uint64_t spa_feat_for_write_obj;
+	uint64_t spa_feat_for_read_obj;
+	uint64_t spa_feat_desc_obj;
+	uint64_t spa_feat_enabled_txg_obj;
+	kmutex_t spa_feat_stats_lock;
+	nvlist_t *spa_feat_stats;
+	uint64_t spa_feat_refcount_cache[31];
+	taskqid_t spa_deadman_tqid;
+	uint64_t spa_deadman_calls;
+	hrtime_t spa_sync_starttime;
+	uint64_t spa_deadman_synctime;
+	uint64_t spa_deadman_ziotime;
+	uint64_t spa_all_vdev_zaps;
+	spa_avz_action_t spa_avz_action;
+	uint64_t spa_autotrim;
+	uint64_t spa_errata;
+	spa_stats_t spa_stats;
+	spa_keystore_t spa_keystore;
+	uint64_t spa_lowmem_page_load;
+	uint64_t spa_lowmem_last_txg;
+	hrtime_t spa_ccw_fail_time;
+	taskq_t *spa_zvol_taskq;
+	taskq_t *spa_prefetch_taskq;
+	uint64_t spa_multihost;
+	mmp_thread_t spa_mmp;
+	list_t spa_leaf_list;
+	uint64_t spa_leaf_list_gen;
+	uint32_t spa_hostid;
+	kmutex_t spa_activities_lock;
+	kcondvar_t spa_activities_cv;
+	kcondvar_t spa_waiters_cv;
+	int spa_waiters;
+	boolean_t spa_waiters_cancel;
+	spa_config_lock_t spa_config_lock[7];
+	zfs_refcount_t spa_refcount;
+	taskq_t *spa_upgrade_taskq;
+}
diff --git a/tests/integration/data/regression_output/core/ptype struct spa b/tests/integration/data/regression_output/core/ptype struct spa
new file mode 100644
index 00000000..3a998af7
--- /dev/null
+++ b/tests/integration/data/regression_output/core/ptype struct spa	
@@ -0,0 +1 @@
+sdb: ptype: skip keyword 'struct' or quote your type "struct <typename>"
diff --git a/tests/integration/data/regression_output/core/sizeof struct spa b/tests/integration/data/regression_output/core/sizeof struct spa
index 019f62f1..096a29d3 100644
--- a/tests/integration/data/regression_output/core/sizeof struct spa	
+++ b/tests/integration/data/regression_output/core/sizeof struct spa	
@@ -1 +1 @@
-sdb: sizeof: skip keyword 'struct' and try again
+sdb: sizeof: skip keyword 'struct' or quote your type "struct <typename>"
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg < 1624 | member spa_name b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg < 1624' | member spa_name
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg < 1624 | member spa_name
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg < 1624' | member spa_name
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg <= 1624 | member spa_name b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg <= 1624' | member spa_name
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg <= 1624 | member spa_name
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg <= 1624' | member spa_name
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg == 1624 | member spa_name b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg == 1624' | member spa_name
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg == 1624 | member spa_name
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg == 1624' | member spa_name
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg > 1624 | member spa_name b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg > 1624' | member spa_name
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg > 1624 | member spa_name
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg > 1624' | member spa_name
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg >= 1624 | member spa_name b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg >= 1624' | member spa_name
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg >= 1624 | member spa_name
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg >= 1624' | member spa_name
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg bogus_op 1624 b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg bogus_op 1624'
similarity index 100%
rename from tests/integration/data/regression_output/core/spa rpool | filter obj.spa_syncing_txg bogus_op 1624
rename to tests/integration/data/regression_output/core/spa rpool | filter 'obj.spa_syncing_txg bogus_op 1624'
diff --git "a/tests/integration/data/regression_output/core/thread | filter obj.comm == \"bogus\" | thread" "b/tests/integration/data/regression_output/core/thread | filter \"obj.comm == \\\"bogus\\\"\" | thread"
similarity index 100%
rename from "tests/integration/data/regression_output/core/thread | filter obj.comm == \"bogus\" | thread"
rename to "tests/integration/data/regression_output/core/thread | filter \"obj.comm == \\\"bogus\\\"\" | thread"
diff --git a/tests/integration/data/regression_output/core/zfs_dbgmsg | filter  == obj b/tests/integration/data/regression_output/core/zfs_dbgmsg | filter  '== obj'
similarity index 100%
rename from tests/integration/data/regression_output/core/zfs_dbgmsg | filter  == obj
rename to tests/integration/data/regression_output/core/zfs_dbgmsg | filter  '== obj'
diff --git a/tests/integration/data/regression_output/core/zfs_dbgmsg | filter obj == b/tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj =='
similarity index 100%
rename from tests/integration/data/regression_output/core/zfs_dbgmsg | filter obj ==
rename to tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj =='
diff --git a/tests/integration/data/regression_output/core/zfs_dbgmsg | filter objspa rpool | filter obj.bogus == 1624 b/tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624'
similarity index 100%
rename from tests/integration/data/regression_output/core/zfs_dbgmsg | filter objspa rpool | filter obj.bogus == 1624
rename to tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624'
diff --git a/tests/integration/data/regression_output/linux/dmesg | filter obj.level == 3 | dmesg b/tests/integration/data/regression_output/linux/dmesg | filter 'obj.level == 3' | dmesg
similarity index 100%
rename from tests/integration/data/regression_output/linux/dmesg | filter obj.level == 3 | dmesg
rename to tests/integration/data/regression_output/linux/dmesg | filter 'obj.level == 3' | dmesg
diff --git "a/tests/integration/data/regression_output/linux/slabs -s active_objs -o \"active_objs,util,name\"" b/tests/integration/data/regression_output/linux/slabs -s active_objs -o active_objs,util,name
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs -s active_objs -o \"active_objs,util,name\""
rename to tests/integration/data/regression_output/linux/slabs -s active_objs -o active_objs,util,name
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"UNIX\" | slub_cache | count" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"UNIX\"' | slub_cache | count"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"UNIX\" | slub_cache | count"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"UNIX\"' | slub_cache | count"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 1" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 1"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 1"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 1"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 2 1" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 2 1"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 0 2 1"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 0 2 1"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 1" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 1"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 1"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 1"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 100" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 100"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 100"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 100"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 2" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 2"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 2"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 2"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 3" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 3"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"kmalloc-8\" | member cpu_slab | percpu 3"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"kmalloc-8\"' | member cpu_slab | percpu 3"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | cast zio_t * | member io_spa.spa_name" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | cast zio_t * | member io_spa.spa_name"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | cast zio_t * | member io_spa.spa_name"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | cast zio_t * | member io_spa.spa_name"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | count" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | count"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | slub_cache | count"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | slub_cache | count"
diff --git "a/tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | walk" "b/tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | walk"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/slabs | filter obj.name == \"zio_cache\" | walk"
rename to "tests/integration/data/regression_output/linux/slabs | filter 'obj.name == \"zio_cache\"' | walk"
diff --git "a/tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | stack" "b/tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | stack"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | stack"
rename to "tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | stack"
diff --git "a/tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | threads" "b/tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | threads"
similarity index 100%
rename from "tests/integration/data/regression_output/linux/threads | filter obj.comm == \"java\" | threads"
rename to "tests/integration/data/regression_output/linux/threads | filter 'obj.comm == \"java\"' | threads"
diff --git a/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,entry_size -s entry_size b/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,entry_size -s entry_size
new file mode 100644
index 00000000..b09ea5f6
--- /dev/null
+++ b/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,entry_size -s entry_size	
@@ -0,0 +1,140 @@
+name                     entry_size
+------------------------ ----------
+zio_data_buf_16777216      16785408
+zio_buf_16777216           16785408
+zio_data_buf_14680064      14686208
+zio_buf_14680064           14686208
+zio_data_buf_12582912      12589056
+zio_buf_12582912           12589056
+zio_data_buf_10485760      10491221
+zio_buf_10485760           10491221
+zio_data_buf_8388608        8394069
+zio_buf_8388608             8394069
+zio_data_buf_7340032        7345152
+zio_buf_7340032             7345152
+zio_data_buf_6291456        6296371
+zio_buf_6291456             6296371
+zio_data_buf_5242880        5247658
+zio_buf_5242880             5247658
+zio_data_buf_4194304        4198985
+zio_buf_4194304             4198985
+zio_data_buf_3670016        3674624
+zio_buf_3670016             3674624
+zio_data_buf_3145728        3150336
+zio_buf_3145728             3150336
+zio_data_buf_2621440        2626048
+zio_buf_2621440             2626048
+zio_data_buf_2097152        2101760
+zio_buf_2097152             2101760
+zio_data_buf_1835008        1839616
+zio_buf_1835008             1839616
+zio_data_buf_1572864        1577472
+zio_buf_1572864             1577472
+zio_data_buf_1310720        1315328
+zio_buf_1310720             1315328
+zio_data_buf_1048576        1053184
+zio_buf_1048576             1053184
+zio_data_buf_917504          922112
+zio_buf_917504               922112
+zio_data_buf_786432          791040
+zio_buf_786432               791040
+zio_data_buf_655360          659968
+zio_buf_655360               659968
+zio_data_buf_524288          528896
+zio_buf_524288               528896
+zio_data_buf_458752          463360
+zio_buf_458752               463360
+zio_data_buf_393216          397824
+zio_buf_393216               397824
+zio_data_buf_327680          332288
+zio_buf_327680               332288
+spl_zlib_workspace_cache     268152
+zio_data_buf_262144          266752
+zio_buf_262144               266752
+zio_data_buf_229376          233984
+zio_buf_229376               233984
+zio_data_buf_196608          201216
+zio_buf_196608               201216
+zio_data_buf_163840          168448
+zio_buf_163840               168448
+zio_data_buf_131072          135680
+zio_buf_131072               135680
+zio_data_buf_114688          119296
+zio_buf_114688               119296
+zio_data_buf_98304           102912
+zio_buf_98304                102912
+zio_data_buf_81920            86528
+zio_buf_81920                 86528
+zio_data_buf_65536            70144
+zio_buf_65536                 70144
+zio_data_buf_57344            61952
+zio_buf_57344                 61952
+zio_data_buf_49152            53760
+zio_buf_49152                 53760
+zio_data_buf_40960            45568
+zio_buf_40960                 45568
+zio_data_buf_32768            37376
+zio_buf_32768                 37376
+zio_data_buf_28672            33280
+zio_buf_28672                 33280
+zio_data_buf_24576            29184
+zio_buf_24576                 29184
+zio_data_buf_20480            25088
+zio_buf_20480                 25088
+ddt_cache                     24904
+zio_data_buf_16384            16384
+zio_data_buf_14336            16384
+zio_buf_16384                 16384
+zio_buf_14336                 16384
+lz4_cache                     16384
+zio_data_buf_12288            12288
+zio_data_buf_10240            12288
+zio_buf_12288                 12288
+zio_buf_10240                 12288
+zio_data_buf_8192              8192
+zio_data_buf_7168              8192
+zio_data_buf_6144              8192
+zio_data_buf_5120              8192
+zio_buf_8192                   8192
+zio_buf_7168                   8192
+zio_buf_6144                   8192
+zio_buf_5120                   8192
+zio_data_buf_4096              4096
+zio_buf_4096                   4096
+zfs_btree_leaf_cache           4096
+zio_data_buf_3584              3584
+zio_buf_3584                   3584
+zio_data_buf_3072              3072
+zio_buf_3072                   3072
+zio_data_buf_2560              2560
+zio_buf_2560                   2560
+zio_data_buf_2048              2048
+zio_buf_2048                   2048
+zio_data_buf_1536              1536
+zio_buf_1536                   1536
+zio_cache                      1248
+zfs_znode_cache                1088
+zio_data_buf_1024              1024
+zio_buf_1024                   1024
+dnode_t                         912
+zio_data_buf_512                512
+zio_buf_512                     512
+kcf_areq_cache                  512
+ddt_entry_cache                 448
+arc_buf_hdr_t_full_crypt        392
+zil_lwb_cache                   376
+dmu_buf_impl_t                  360
+arc_buf_hdr_t_full              328
+sa_cache                        248
+kcf_sreq_cache                  192
+kcf_context_cache               192
+sio_cache_2                     168
+zil_zcw_cache                   152
+sio_cache_1                     152
+sio_cache_0                     136
+arc_buf_hdr_t_l2only             96
+zfs_znode_hold_cache             88
+arc_buf_t                        80
+zio_link_cache                   48
+abd_t                            40
+mod_hash_entries                 24
diff --git a/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,source b/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,source
new file mode 100644
index 00000000..435d8165
--- /dev/null
+++ b/tests/integration/data/regression_output/spl/spl_kmem_caches -o name,source	
@@ -0,0 +1,140 @@
+name                                             source
+------------------------ ------------------------------
+abd_t                                       abd_t[SLUB]
+arc_buf_hdr_t_full             arc_buf_hdr_t_full[SLUB]
+arc_buf_hdr_t_full_crypt arc_buf_hdr_t_full_crypt[SLUB]
+arc_buf_hdr_t_l2only         arc_buf_hdr_t_l2only[SLUB]
+arc_buf_t                               arc_buf_t[SLUB]
+ddt_cache                               ddt_cache[SPL ]
+ddt_entry_cache                   ddt_entry_cache[SLUB]
+dmu_buf_impl_t                     dmu_buf_impl_t[SLUB]
+dnode_t                                   dnode_t[SLUB]
+kcf_areq_cache                     kcf_areq_cache[SLUB]
+kcf_context_cache               kcf_context_cache[SLUB]
+kcf_sreq_cache                     kcf_sreq_cache[SLUB]
+lz4_cache                               lz4_cache[SLUB]
+mod_hash_entries                 mod_hash_entries[SLUB]
+sa_cache                                 sa_cache[SLUB]
+sio_cache_0                           sio_cache_0[SLUB]
+sio_cache_1                           sio_cache_1[SLUB]
+sio_cache_2                           sio_cache_2[SLUB]
+spl_zlib_workspace_cache spl_zlib_workspace_cache[SPL ]
+zfs_btree_leaf_cache         zfs_btree_leaf_cache[SLUB]
+zfs_znode_cache                   zfs_znode_cache[SLUB]
+zfs_znode_hold_cache         zfs_znode_hold_cache[SLUB]
+zil_lwb_cache                       zil_lwb_cache[SLUB]
+zil_zcw_cache                       zil_zcw_cache[SLUB]
+zio_buf_1024                         zio_buf_1024[SLUB]
+zio_buf_10240                       zio_buf_10240[SLUB]
+zio_buf_1048576                   zio_buf_1048576[SPL ]
+zio_buf_10485760                 zio_buf_10485760[SPL ]
+zio_buf_114688                     zio_buf_114688[SPL ]
+zio_buf_12288                       zio_buf_12288[SLUB]
+zio_buf_12582912                 zio_buf_12582912[SPL ]
+zio_buf_131072                     zio_buf_131072[SPL ]
+zio_buf_1310720                   zio_buf_1310720[SPL ]
+zio_buf_14336                       zio_buf_14336[SLUB]
+zio_buf_14680064                 zio_buf_14680064[SPL ]
+zio_buf_1536                         zio_buf_1536[SLUB]
+zio_buf_1572864                   zio_buf_1572864[SPL ]
+zio_buf_16384                       zio_buf_16384[SLUB]
+zio_buf_163840                     zio_buf_163840[SPL ]
+zio_buf_16777216                 zio_buf_16777216[SPL ]
+zio_buf_1835008                   zio_buf_1835008[SPL ]
+zio_buf_196608                     zio_buf_196608[SPL ]
+zio_buf_2048                         zio_buf_2048[SLUB]
+zio_buf_20480                       zio_buf_20480[SPL ]
+zio_buf_2097152                   zio_buf_2097152[SPL ]
+zio_buf_229376                     zio_buf_229376[SPL ]
+zio_buf_24576                       zio_buf_24576[SPL ]
+zio_buf_2560                         zio_buf_2560[SLUB]
+zio_buf_262144                     zio_buf_262144[SPL ]
+zio_buf_2621440                   zio_buf_2621440[SPL ]
+zio_buf_28672                       zio_buf_28672[SPL ]
+zio_buf_3072                         zio_buf_3072[SLUB]
+zio_buf_3145728                   zio_buf_3145728[SPL ]
+zio_buf_32768                       zio_buf_32768[SPL ]
+zio_buf_327680                     zio_buf_327680[SPL ]
+zio_buf_3584                         zio_buf_3584[SLUB]
+zio_buf_3670016                   zio_buf_3670016[SPL ]
+zio_buf_393216                     zio_buf_393216[SPL ]
+zio_buf_4096                         zio_buf_4096[SLUB]
+zio_buf_40960                       zio_buf_40960[SPL ]
+zio_buf_4194304                   zio_buf_4194304[SPL ]
+zio_buf_458752                     zio_buf_458752[SPL ]
+zio_buf_49152                       zio_buf_49152[SPL ]
+zio_buf_512                           zio_buf_512[SLUB]
+zio_buf_5120                         zio_buf_5120[SLUB]
+zio_buf_524288                     zio_buf_524288[SPL ]
+zio_buf_5242880                   zio_buf_5242880[SPL ]
+zio_buf_57344                       zio_buf_57344[SPL ]
+zio_buf_6144                         zio_buf_6144[SLUB]
+zio_buf_6291456                   zio_buf_6291456[SPL ]
+zio_buf_65536                       zio_buf_65536[SPL ]
+zio_buf_655360                     zio_buf_655360[SPL ]
+zio_buf_7168                         zio_buf_7168[SLUB]
+zio_buf_7340032                   zio_buf_7340032[SPL ]
+zio_buf_786432                     zio_buf_786432[SPL ]
+zio_buf_8192                         zio_buf_8192[SLUB]
+zio_buf_81920                       zio_buf_81920[SPL ]
+zio_buf_8388608                   zio_buf_8388608[SPL ]
+zio_buf_917504                     zio_buf_917504[SPL ]
+zio_buf_98304                       zio_buf_98304[SPL ]
+zio_cache                               zio_cache[SLUB]
+zio_data_buf_1024               zio_data_buf_1024[SLUB]
+zio_data_buf_10240             zio_data_buf_10240[SLUB]
+zio_data_buf_1048576         zio_data_buf_1048576[SPL ]
+zio_data_buf_10485760       zio_data_buf_10485760[SPL ]
+zio_data_buf_114688           zio_data_buf_114688[SPL ]
+zio_data_buf_12288             zio_data_buf_12288[SLUB]
+zio_data_buf_12582912       zio_data_buf_12582912[SPL ]
+zio_data_buf_131072           zio_data_buf_131072[SPL ]
+zio_data_buf_1310720         zio_data_buf_1310720[SPL ]
+zio_data_buf_14336             zio_data_buf_14336[SLUB]
+zio_data_buf_14680064       zio_data_buf_14680064[SPL ]
+zio_data_buf_1536               zio_data_buf_1536[SLUB]
+zio_data_buf_1572864         zio_data_buf_1572864[SPL ]
+zio_data_buf_16384             zio_data_buf_16384[SLUB]
+zio_data_buf_163840           zio_data_buf_163840[SPL ]
+zio_data_buf_16777216       zio_data_buf_16777216[SPL ]
+zio_data_buf_1835008         zio_data_buf_1835008[SPL ]
+zio_data_buf_196608           zio_data_buf_196608[SPL ]
+zio_data_buf_2048               zio_data_buf_2048[SLUB]
+zio_data_buf_20480             zio_data_buf_20480[SPL ]
+zio_data_buf_2097152         zio_data_buf_2097152[SPL ]
+zio_data_buf_229376           zio_data_buf_229376[SPL ]
+zio_data_buf_24576             zio_data_buf_24576[SPL ]
+zio_data_buf_2560               zio_data_buf_2560[SLUB]
+zio_data_buf_262144           zio_data_buf_262144[SPL ]
+zio_data_buf_2621440         zio_data_buf_2621440[SPL ]
+zio_data_buf_28672             zio_data_buf_28672[SPL ]
+zio_data_buf_3072               zio_data_buf_3072[SLUB]
+zio_data_buf_3145728         zio_data_buf_3145728[SPL ]
+zio_data_buf_32768             zio_data_buf_32768[SPL ]
+zio_data_buf_327680           zio_data_buf_327680[SPL ]
+zio_data_buf_3584               zio_data_buf_3584[SLUB]
+zio_data_buf_3670016         zio_data_buf_3670016[SPL ]
+zio_data_buf_393216           zio_data_buf_393216[SPL ]
+zio_data_buf_4096               zio_data_buf_4096[SLUB]
+zio_data_buf_40960             zio_data_buf_40960[SPL ]
+zio_data_buf_4194304         zio_data_buf_4194304[SPL ]
+zio_data_buf_458752           zio_data_buf_458752[SPL ]
+zio_data_buf_49152             zio_data_buf_49152[SPL ]
+zio_data_buf_512                 zio_data_buf_512[SLUB]
+zio_data_buf_5120               zio_data_buf_5120[SLUB]
+zio_data_buf_524288           zio_data_buf_524288[SPL ]
+zio_data_buf_5242880         zio_data_buf_5242880[SPL ]
+zio_data_buf_57344             zio_data_buf_57344[SPL ]
+zio_data_buf_6144               zio_data_buf_6144[SLUB]
+zio_data_buf_6291456         zio_data_buf_6291456[SPL ]
+zio_data_buf_65536             zio_data_buf_65536[SPL ]
+zio_data_buf_655360           zio_data_buf_655360[SPL ]
+zio_data_buf_7168               zio_data_buf_7168[SLUB]
+zio_data_buf_7340032         zio_data_buf_7340032[SPL ]
+zio_data_buf_786432           zio_data_buf_786432[SPL ]
+zio_data_buf_8192               zio_data_buf_8192[SLUB]
+zio_data_buf_81920             zio_data_buf_81920[SPL ]
+zio_data_buf_8388608         zio_data_buf_8388608[SPL ]
+zio_data_buf_917504           zio_data_buf_917504[SPL ]
+zio_data_buf_98304             zio_data_buf_98304[SPL ]
+zio_link_cache                     zio_link_cache[SLUB]
diff --git a/tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache b/tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache
similarity index 100%
rename from tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache
rename to tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache
diff --git a/tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache | cnt b/tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache | cnt
similarity index 100%
rename from tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache | cnt
rename to tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache | cnt
diff --git a/tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache > 0 | filter obj.skc_obj_alloc > 0 | head 1 | spl_cache b/tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache > 0' | filter 'obj.skc_obj_alloc > 0' | head 1 | spl_cache
similarity index 100%
rename from tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_linux_cache > 0 | filter obj.skc_obj_alloc > 0 | head 1 | spl_cache
rename to tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_linux_cache > 0' | filter 'obj.skc_obj_alloc > 0' | head 1 | spl_cache
diff --git "a/tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_name == \"ddt_cache\" | walk" "b/tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_name == \"ddt_cache\"' | walk"
similarity index 100%
rename from "tests/integration/data/regression_output/spl/spl_kmem_caches | filter obj.skc_name == \"ddt_cache\" | walk"
rename to "tests/integration/data/regression_output/spl/spl_kmem_caches | filter 'obj.skc_name == \"ddt_cache\"' | walk"
diff --git a/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_allocatable.rt_histogram | zhist b/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_allocatable.rt_histogram | zhist
similarity index 100%
rename from tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_allocatable.rt_histogram | zhist
rename to tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_allocatable.rt_histogram | zhist
diff --git a/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist b/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist
similarity index 100%
rename from tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist
rename to tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist
diff --git a/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9 b/tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9
similarity index 100%
rename from tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9
rename to tests/integration/data/regression_output/zfs/spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9
diff --git a/tests/integration/test_core_generic.py b/tests/integration/test_core_generic.py
index 0934fd9f..4d9d52e7 100644
--- a/tests/integration/test_core_generic.py
+++ b/tests/integration/test_core_generic.py
@@ -48,31 +48,31 @@
     "addr jiffies | deref",
 
     # filter - no input
-    "filter obj == 1",
+    "filter 'obj == 1'",
     # filter - match
-    "echo 0x0 | filter obj == 0",
+    "echo 0x0 | filter 'obj == 0'",
     # filter - no match
-    "echo 0x0 | filter obj == 1",
+    "echo 0x0 | filter 'obj == 1'",
     # filter - identity
-    "echo 0x1 | filter obj == obj",
+    "echo 0x1 | filter 'obj == obj'",
     # filter - multiple entries match one (eq)
-    "echo 0x0 0x1 0x2 | filter obj == 1",
+    "echo 0x0 0x1 0x2 | filter 'obj == 1'",
     # filter - multiple entries match one (gt)
-    "echo 0x0 0x1 0x2 | filter obj > 1",
+    "echo 0x0 0x1 0x2 | filter 'obj > 1'",
     # filter - multiple entries match one (ge)
-    "echo 0x0 0x1 0x2 | filter obj >= 1",
+    "echo 0x0 0x1 0x2 | filter 'obj >= 1'",
     # filter - multiple entries match one (lt)
-    "echo 0x0 0x1 0x2 | filter obj < 1",
+    "echo 0x0 0x1 0x2 | filter 'obj < 1'",
     # filter - multiple entries match one (le)
-    "echo 0x0 0x1 0x2 | filter obj <= 1",
+    "echo 0x0 0x1 0x2 | filter 'obj <= 1'",
     # filter - deref member
-    "spa rpool | filter obj.spa_syncing_txg == 1624 | member spa_name",
-    "spa rpool | filter obj.spa_syncing_txg >= 1624 | member spa_name",
-    "spa rpool | filter obj.spa_syncing_txg <= 1624 | member spa_name",
-    "spa rpool | filter obj.spa_syncing_txg < 1624 | member spa_name",
-    "spa rpool | filter obj.spa_syncing_txg > 1624 | member spa_name",
+    "spa rpool | filter 'obj.spa_syncing_txg == 1624' | member spa_name",
+    "spa rpool | filter 'obj.spa_syncing_txg >= 1624' | member spa_name",
+    "spa rpool | filter 'obj.spa_syncing_txg <= 1624' | member spa_name",
+    "spa rpool | filter 'obj.spa_syncing_txg < 1624' | member spa_name",
+    "spa rpool | filter 'obj.spa_syncing_txg > 1624' | member spa_name",
     # locator that receives no input from a filter
-    'thread | filter obj.comm == \"bogus\" | thread',
+    'thread | filter "obj.comm == \\"bogus\\"" | thread',
 
     # member - generic
     "member no_object",
@@ -120,6 +120,7 @@
     "ptype spa_t",
     "ptype spa vdev",
     "ptype zfs_case v_t thread_union",
+    "ptype 'struct spa'",
 
     # sizeof
     "sizeof size_t",
@@ -155,15 +156,15 @@
     "addr jiffies | deref | deref",
 
     # filter - no right-hand side
-    "zfs_dbgmsg | filter obj ==",
+    "zfs_dbgmsg | filter 'obj =='",
     # filter - no left-hand side
-    "zfs_dbgmsg | filter  == obj",
+    "zfs_dbgmsg | filter  '== obj'",
     # filter - no operator
-    "zfs_dbgmsg | filter obj"
+    "zfs_dbgmsg | filter 'obj'"
     # filter - bogus member
-    "spa rpool | filter obj.bogus == 1624",
+    "spa rpool | filter 'obj.bogus == 1624'",
     # filter - bogus op
-    "spa rpool | filter obj.spa_syncing_txg bogus_op 1624",
+    "spa rpool | filter 'obj.spa_syncing_txg bogus_op 1624'",
 
     # member user arrow notation in embedded struct member
     "spa | member spa_ubsync->ub_rootbp",
@@ -180,6 +181,8 @@
 
     # ptype - bogus type
     "ptype bogus_t",
+    # ptype - freestanding C keyword
+    "ptype struct spa",
 
     # pretty printer passed incorrect type
     "spa | range_tree",
diff --git a/tests/integration/test_linux_generic.py b/tests/integration/test_linux_generic.py
index 8aa010c2..5a1f00ba 100644
--- a/tests/integration/test_linux_generic.py
+++ b/tests/integration/test_linux_generic.py
@@ -16,6 +16,7 @@
 
 # pylint: disable=missing-module-docstring
 # pylint: disable=missing-function-docstring
+# pylint: disable=line-too-long
 
 from typing import Any
 
@@ -32,10 +33,10 @@
     "addr tcp_sockets_allocated | cpu_counter_sum",
 
     # percpu
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 0',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 1',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 0 1',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 0',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 1',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 0 1',
 
     # fget
     "find_task 1 | fget 1 4",
@@ -63,19 +64,19 @@
     "slabs",
     "slabs -v",
     "slabs -s util",
-    'slabs -s active_objs -o "active_objs,util,name"',
+    'slabs -s active_objs -o active_objs,util,name',
     "slabs | pp",
     "slabs -s util | slabs",
     "slabs | head 2 | slabs",
 
     # slub
-    'slabs | filter obj.name == "zio_cache" | slub_cache',
-    'slabs | filter obj.name == "zio_cache" | walk',
-    'slabs | filter obj.name == "zio_cache" | slub_cache | count',
-    'slabs | filter obj.name == "zio_cache" | slub_cache | cast zio_t * | member io_spa.spa_name',
+    'slabs | filter \'obj.name == "zio_cache"\' | slub_cache',
+    'slabs | filter \'obj.name == "zio_cache"\' | walk',
+    'slabs | filter \'obj.name == "zio_cache"\' | slub_cache | count',
+    'slabs | filter \'obj.name == "zio_cache"\' | slub_cache | cast zio_t * | member io_spa.spa_name',
     # slub - expected inconsistent freelist test
     # (still a positive tests because we want to keep going besides inconsistencies)
-    'slabs | filter obj.name == "UNIX" | slub_cache | count',
+    'slabs | filter \'obj.name == "UNIX"\' | slub_cache | count',
 
     # stacks
     "stacks",
@@ -84,14 +85,14 @@
     "stacks -c spa_sync",
     "stacks -m zfs -c spa_sync",
     "stacks -m zfs -c zthr_procedure",
-    'threads | filter obj.comm == "java" | stack',
+    'threads | filter \'obj.comm == "java"\' | stack',
     "stacks -m zfs | count",
     "echo 0xffffa089669edc00 | stack",
 
     # threads
     "threads",
     "threads | count",
-    'threads | filter obj.comm == "java" | threads',
+    'threads | filter \'obj.comm == "java"\' | threads',
     "thread",
 ]
 
@@ -99,7 +100,7 @@
     # dmesg
     "dmesg",
     "dmesg | pp",
-    "dmesg | filter obj.level == 3 | dmesg",
+    "dmesg | filter 'obj.level == 3' | dmesg",
 ]
 
 NEG_CMDS = [
@@ -126,10 +127,10 @@
     "addr modules | lxlist module bogus_member | member name",
 
     # percpu - not valid CPU number
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 2',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 3',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 100',
-    'slabs | filter obj.name == "kmalloc-8" | member cpu_slab | percpu 0 2 1',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 2',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 3',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 100',
+    'slabs | filter \'obj.name == "kmalloc-8"\' | member cpu_slab | percpu 0 2 1',
 
     # rbtree
     "addr vmap_area_root | rbtree bogus_type rb_node",
diff --git a/tests/integration/test_spl_generic.py b/tests/integration/test_spl_generic.py
index eccfc282..8a40aba8 100644
--- a/tests/integration/test_spl_generic.py
+++ b/tests/integration/test_spl_generic.py
@@ -33,16 +33,18 @@
     "addr arc_mru | member [0].arcs_list[1] | multilist | head",
 
     # spl_cache walker
-    'spl_kmem_caches | filter obj.skc_name == "ddt_cache" | walk',
-    "spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache",
-    "spl_kmem_caches | filter obj.skc_linux_cache == 0 | spl_cache | cnt",
+    'spl_kmem_caches | filter \'obj.skc_name == "ddt_cache"\' | walk',
+    "spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache",
+    "spl_kmem_caches | filter 'obj.skc_linux_cache == 0' | spl_cache | cnt",
     # spl_cache - ensure we can walk caches backed by SLUB
-    "spl_kmem_caches | filter obj.skc_linux_cache > 0 | filter obj.skc_obj_alloc > 0 | head 1 | spl_cache",
+    "spl_kmem_caches | filter 'obj.skc_linux_cache > 0' | filter 'obj.skc_obj_alloc > 0' | head 1 | spl_cache",
 
     # spl_kmem_caches
     "spl_kmem_caches",
+    "spl_kmem_caches -o name,source",
     "spl_kmem_caches -v",
     "spl_kmem_caches -s entry_size",
+    "spl_kmem_caches -o name,entry_size -s entry_size",
     "spl_kmem_caches -s entry_size | head 4 | spl_kmem_caches",
     "spl_kmem_caches | pp",
 ]
diff --git a/tests/integration/test_zfs_generic.py b/tests/integration/test_zfs_generic.py
index ac5c23f1..3a295a27 100644
--- a/tests/integration/test_zfs_generic.py
+++ b/tests/integration/test_zfs_generic.py
@@ -57,9 +57,9 @@
 
     # zfs_histogram
     "spa data | member spa_normal_class.mc_histogram | zfs_histogram",
-    "spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist",
-    "spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9",
-    "spa data | vdev | metaslab | filter obj.ms_loaded == 1 | head 1 | member ms_allocatable.rt_histogram | zhist",
+    "spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist",
+    "spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_sm.sm_phys.smp_histogram | zhist 9",
+    "spa data | vdev | metaslab | filter 'obj.ms_loaded == 1' | head 1 | member ms_allocatable.rt_histogram | zhist",
 ] # yapf: disable
 
 
diff --git a/tests/unit/commands/test_filter.py b/tests/unit/commands/test_filter.py
index 0dc30207..dc03eeb9 100644
--- a/tests/unit/commands/test_filter.py
+++ b/tests/unit/commands/test_filter.py
@@ -60,28 +60,20 @@ def test_single_void_ptr_input_lhs_not_object() -> None:
 
 
 def test_multi_void_ptr_input_value_match_ne() -> None:
-    line = 'filter obj != 1'
+    line = 'filter "obj != 1"'
     objs = [
         drgn.Object(MOCK_PROGRAM, 'void *', value=0),
         drgn.Object(MOCK_PROGRAM, 'void *', value=1),
         drgn.Object(MOCK_PROGRAM, 'void *', value=2),
     ]
 
-    #
-    # This throws an error for all the wrong reasons. The operator this
-    # test is attempting to use is "!=", and due to a bug in the lexer
-    # used within "invoke", this operator does not reach the "filter"
-    # command. Instead, the lexer sees the "!" character and split the
-    # string into the following parts:
-    #
-    #     1. filter obj
-    #     2. = 1
-    #
-    # As a result, the "filter" command fails because it doesn't see a
-    # comparison operator as input to it.
-    #
-    with pytest.raises(sdb.CommandInvalidInputError):
-        invoke(MOCK_PROGRAM, objs, line)
+    ret = invoke(MOCK_PROGRAM, objs, line)
+
+    assert len(ret) == 2
+    assert ret[0].value_() == 0
+    assert ret[0].type_ == MOCK_PROGRAM.type('void *')
+    assert ret[1].value_() == 2
+    assert ret[1].type_ == MOCK_PROGRAM.type('void *')
 
 
 def test_char_array_input_object_match() -> None:
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
new file mode 100644
index 00000000..75626d59
--- /dev/null
+++ b/tests/unit/test_parser.py
@@ -0,0 +1,148 @@
+#
+# Copyright 2020 Delphix
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pylint: disable=missing-docstring
+
+from typing import List, Tuple
+
+import pytest
+
+from sdb import ParserError
+from sdb.parser import tokenize, ExpressionType
+
+PARSER_POSITIVE_TABLE = [
+    # single command and args
+    ("spa", [(["spa"], ExpressionType.CMD)]),
+    ("  spa", [(["spa"], ExpressionType.CMD)]),
+    ("spa  ", [(["spa"], ExpressionType.CMD)]),
+    ("spa rpool", [(["spa", "rpool"], ExpressionType.CMD)]),
+    ("spa rpool tank", [(["spa", "rpool", "tank"], ExpressionType.CMD)]),
+
+    # pipeline spaces
+    ("spa | vdev", [(["spa"], ExpressionType.CMD),
+                    (["vdev"], ExpressionType.CMD)]),
+    ("spa |vdev", [(["spa"], ExpressionType.CMD),
+                   (["vdev"], ExpressionType.CMD)]),
+    ("spa| vdev", [(["spa"], ExpressionType.CMD),
+                   (["vdev"], ExpressionType.CMD)]),
+    ("spa|vdev", [(["spa"], ExpressionType.CMD),
+                  (["vdev"], ExpressionType.CMD)]),
+
+    # shell pipe spaces
+    ("cmd ! shell_cmd", [(["cmd"], ExpressionType.CMD),
+                         (["shell_cmd"], ExpressionType.SHELL_CMD)]),
+    ("cmd! shell_cmd", [(["cmd"], ExpressionType.CMD),
+                        (["shell_cmd"], ExpressionType.SHELL_CMD)]),
+    ("cmd !shell_cmd", [(["cmd"], ExpressionType.CMD),
+                        (["shell_cmd"], ExpressionType.SHELL_CMD)]),
+    ("cmd!shell_cmd", [(["cmd"], ExpressionType.CMD),
+                       (["shell_cmd"], ExpressionType.SHELL_CMD)]),
+
+    # longer pipeline + shell pipeline
+    ("spa rpool| vdev 0 |metaslab| count", [(["spa",
+                                              "rpool"], ExpressionType.CMD),
+                                            (["vdev", "0"], ExpressionType.CMD),
+                                            (["metaslab"], ExpressionType.CMD),
+                                            (["count"], ExpressionType.CMD)]),
+    ("spa rpool| vdev 0 |metaslab| count! less", [
+        (["spa", "rpool"], ExpressionType.CMD),
+        (["vdev", "0"], ExpressionType.CMD), (["metaslab"], ExpressionType.CMD),
+        (["count"], ExpressionType.CMD), (["less"], ExpressionType.SHELL_CMD)
+    ]),
+    ("spa rpool| vdev 0 |metaslab! wc | less", [
+        (["spa", "rpool"], ExpressionType.CMD),
+        (["vdev", "0"], ExpressionType.CMD), (["metaslab"], ExpressionType.CMD),
+        (["wc | less"], ExpressionType.SHELL_CMD)
+    ]),
+
+    # quoted argument with spaces, and other special characters
+    ('cmd "arg"', [(["cmd", 'arg'], ExpressionType.CMD)]),
+    ('cmd "arg same_arg"', [(["cmd", 'arg same_arg'], ExpressionType.CMD)]),
+    ('cmd "arg \\"same_arg\\""', [(["cmd",
+                                    'arg "same_arg"'], ExpressionType.CMD)]),
+    ('cmd "arg|same_arg"', [(["cmd", 'arg|same_arg'], ExpressionType.CMD)]),
+    ('cmd "arg ! same_arg"', [(["cmd", 'arg ! same_arg'], ExpressionType.CMD)]),
+
+    # existing filter cases with quoted strings
+    ('cmd | filter "obj.member_flag | 0b0010"',
+     [(["cmd"], ExpressionType.CMD),
+      (["filter", 'obj.member_flag | 0b0010'], ExpressionType.CMD)]),
+    ('cmd | filter "obj.member_int > 3"', [(["cmd"], ExpressionType.CMD),
+                                           (["filter", 'obj.member_int > 3'],
+                                            ExpressionType.CMD)]),
+    ('cmd | filter "obj.member_int != 0"', [(["cmd"], ExpressionType.CMD),
+                                            (["filter", 'obj.member_int != 0'],
+                                             ExpressionType.CMD)]),
+    ('cmd | filter "obj.member_str != \\"test\\""',
+     [(["cmd"], ExpressionType.CMD),
+      (["filter", 'obj.member_str != "test"'], ExpressionType.CMD)]),
+    ('cmd | filter \'obj.member_str != "test"\'',
+     [(["cmd"], ExpressionType.CMD),
+      (["filter", 'obj.member_str != "test"'], ExpressionType.CMD)]),
+
+    # extreme quote cases
+    ('cmd"arg"', [(["cmd", 'arg'], ExpressionType.CMD)]),
+    ('cmd"arg same_arg"', [(["cmd", 'arg same_arg'], ExpressionType.CMD)]),
+    ('cmd"arg" arg2', [(["cmd", 'arg', "arg2"], ExpressionType.CMD)]),
+    ('cmd arg"arg2"', [(["cmd", 'arg', 'arg2'], ExpressionType.CMD)]),
+    ('cmd\'arg\'', [(["cmd", 'arg'], ExpressionType.CMD)]),
+    ('cmd\'arg same_arg\'', [(["cmd", 'arg same_arg'], ExpressionType.CMD)]),
+    ('cmd\'arg\' arg2', [(["cmd", 'arg', "arg2"], ExpressionType.CMD)]),
+    ('cmd arg\'arg2\'', [(["cmd", 'arg', 'arg2'], ExpressionType.CMD)]),
+]
+
+
+@pytest.mark.parametrize(  # type: ignore[misc]
+    'entry,expected', PARSER_POSITIVE_TABLE)
+def test_parser(entry: str, expected: List[Tuple[List[str],
+                                                 ExpressionType]]) -> None:
+    assert list(tokenize(entry)) == expected
+
+
+PARSER_NEGATIVE_TABLE = [
+    # quote-related
+    ('cmd"', "unfinished string expression"),
+    ('cmd "', "unfinished string expression"),
+    ('cmd"arg', "unfinished string expression"),
+    ('cmd arg "', "unfinished string expression"),
+    ('cmd arg "arg2', "unfinished string expression"),
+    ('cmd arg "arg2 | cmd1 arg3 arg4 | cmd2', "unfinished string expression"),
+    ('cmd\'', "unfinished string expression"),
+    ('cmd \'', "unfinished string expression"),
+    ('cmd\'arg', "unfinished string expression"),
+    ('cmd arg \'', "unfinished string expression"),
+    ('cmd arg \'arg2', "unfinished string expression"),
+    ('cmd arg \'arg2 | cmd1 arg3 arg4 | cmd2', "unfinished string expression"),
+
+    # pipe-related
+    ("|", "freestanding pipe with no command"),
+    ("cmd |", "freestanding pipe with no command"),
+    ("cmd ||", "freestanding pipe with no command"),
+    ("cmd || cmd2", "freestanding pipe with no command"),
+
+    # shell-related
+    ("echo !", "no shell command specified"),
+    ('cmd | filter obj.member_int != 0',
+     "predicates that use != as an operator should be quoted"),
+]
+
+
+@pytest.mark.parametrize(  # type: ignore[misc]
+    'entry,expected_cause', PARSER_NEGATIVE_TABLE)
+def test_parser_negative(entry: str, expected_cause: str) -> None:
+    with pytest.raises(ParserError) as err:
+        list(tokenize(entry))
+    assert expected_cause in str(err.value)

From e5189ca7675ac884159a99f519e5b44fc17a653c Mon Sep 17 00:00:00 2001
From: Serapheim Dimitropoulos <serapheim@delphix.com>
Date: Fri, 1 May 2020 03:23:38 +0000
Subject: [PATCH 2/2] filter command should always use quotes

---
 sdb/commands/filter.py                        | 20 ++++---------
 .../spa rpool | filter 'obj.bogus == 1624'    |  1 +
 ...gus == 1624' => zfs_dbgmsg | filter 'obj'} |  0
 tests/integration/test_core_generic.py        |  2 +-
 tests/unit/commands/test_filter.py            | 28 ++++++++++++++-----
 5 files changed, 28 insertions(+), 23 deletions(-)
 create mode 100644 tests/integration/data/regression_output/core/spa rpool | filter 'obj.bogus == 1624'
 rename tests/integration/data/regression_output/core/{zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624' => zfs_dbgmsg | filter 'obj'} (100%)

diff --git a/sdb/commands/filter.py b/sdb/commands/filter.py
index 4367db59..195e7e5d 100644
--- a/sdb/commands/filter.py
+++ b/sdb/commands/filter.py
@@ -30,19 +30,19 @@ class Filter(sdb.SingleInputCommand):
     EXAMPLES
         Print addresses greater than or equal to 4
 
-            sdb> addr 0 1 2 3 4 5 6 | filter obj >= 4
+            sdb> addr 0 1 2 3 4 5 6 | filter "obj >= 4"
             (void *)0x4
             (void *)0x5
             (void *)0x6
 
         Find the SPA object of the ZFS pool named "jax" and print its 'spa_name'
 
-            sdb> spa | filter obj.spa_name == "jax" | member spa_name
+            sdb> spa | filter 'obj.spa_name == "jax"' | member spa_name
             (char [256])"jax"
 
         Print the number of level 3 log statements in the kernel log buffer
 
-            sdb> dmesg | filter obj.level == 3 | count
+            sdb> dmesg | filter 'obj.level == 3' | count
             (unsigned long long)24
     """
     # pylint: disable=eval-used
@@ -52,7 +52,7 @@ class Filter(sdb.SingleInputCommand):
     @classmethod
     def _init_parser(cls, name: str) -> argparse.ArgumentParser:
         parser = super()._init_parser(name)
-        parser.add_argument("expr", nargs=argparse.REMAINDER)
+        parser.add_argument("expr", nargs=1)
         return parser
 
     @staticmethod
@@ -63,17 +63,7 @@ def __init__(self,
                  args: Optional[List[str]] = None,
                  name: str = "_") -> None:
         super().__init__(args, name)
-        if not self.args.expr:
-            self.parser.error("no expression specified")
-
-        #
-        # This is a stop-gap solution until we figure out
-        # exactly how we want the filter command to behave.
-        #
-        if len(self.args.expr) == 1:
-            self.expr = self.args.expr[0].split()
-        else:
-            self.expr = self.args.expr
+        self.expr = self.args.expr[0].split()
 
         index = None
         operators = ["==", "!=", ">", "<", ">=", "<="]
diff --git a/tests/integration/data/regression_output/core/spa rpool | filter 'obj.bogus == 1624' b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.bogus == 1624'
new file mode 100644
index 00000000..53f844e8
--- /dev/null
+++ b/tests/integration/data/regression_output/core/spa rpool | filter 'obj.bogus == 1624'	
@@ -0,0 +1 @@
+sdb: filter: 'spa_t' has no member 'bogus'
diff --git a/tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624' b/tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'
similarity index 100%
rename from tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'spa rpool | filter 'obj.bogus == 1624'
rename to tests/integration/data/regression_output/core/zfs_dbgmsg | filter 'obj'
diff --git a/tests/integration/test_core_generic.py b/tests/integration/test_core_generic.py
index 4d9d52e7..0a0fea28 100644
--- a/tests/integration/test_core_generic.py
+++ b/tests/integration/test_core_generic.py
@@ -160,7 +160,7 @@
     # filter - no left-hand side
     "zfs_dbgmsg | filter  '== obj'",
     # filter - no operator
-    "zfs_dbgmsg | filter 'obj'"
+    "zfs_dbgmsg | filter 'obj'",
     # filter - bogus member
     "spa rpool | filter 'obj.bogus == 1624'",
     # filter - bogus op
diff --git a/tests/unit/commands/test_filter.py b/tests/unit/commands/test_filter.py
index dc03eeb9..86ec86be 100644
--- a/tests/unit/commands/test_filter.py
+++ b/tests/unit/commands/test_filter.py
@@ -30,29 +30,43 @@ def test_no_arg() -> None:
         invoke(MOCK_PROGRAM, [], line)
 
 
+def test_no_quotes_0() -> None:
+    line = 'filter obj'
+
+    with pytest.raises(sdb.CommandInvalidInputError):
+        invoke(MOCK_PROGRAM, [], line)
+
+
+def test_no_quotes_1() -> None:
+    line = 'filter obj == 1'
+
+    with pytest.raises(sdb.CommandArgumentsError):
+        invoke(MOCK_PROGRAM, [], line)
+
+
 def test_no_rhs() -> None:
-    line = 'filter obj =='
+    line = 'filter "obj =="'
 
     with pytest.raises(sdb.CommandInvalidInputError):
         invoke(MOCK_PROGRAM, [], line)
 
 
 def test_no_lhs() -> None:
-    line = 'filter == obj'
+    line = 'filter "== obj"'
 
     with pytest.raises(sdb.CommandInvalidInputError):
         invoke(MOCK_PROGRAM, [], line)
 
 
 def test_no_operator() -> None:
-    line = 'filter obj'
+    line = 'filter "obj"'
 
     with pytest.raises(sdb.CommandInvalidInputError):
         invoke(MOCK_PROGRAM, [], line)
 
 
 def test_single_void_ptr_input_lhs_not_object() -> None:
-    line = 'filter 0 == obj'
+    line = 'filter "0 == obj"'
     objs = [drgn.Object(MOCK_PROGRAM, 'void *', value=0)]
 
     with pytest.raises(sdb.CommandInvalidInputError):
@@ -77,7 +91,7 @@ def test_multi_void_ptr_input_value_match_ne() -> None:
 
 
 def test_char_array_input_object_match() -> None:
-    line = 'filter obj == obj'
+    line = 'filter "obj == obj"'
     objs = [drgn.Object(MOCK_PROGRAM, 'char [4]', value=b"foo")]
 
     with pytest.raises(sdb.CommandError):
@@ -85,7 +99,7 @@ def test_char_array_input_object_match() -> None:
 
 
 def test_struct_input_invalid_syntax() -> None:
-    line = 'filter obj->ts_int == 1'
+    line = 'filter "obj->ts_int == 1"'
     objs = [MOCK_PROGRAM["global_struct"]]
 
     with pytest.raises(sdb.CommandEvalSyntaxError):
@@ -93,7 +107,7 @@ def test_struct_input_invalid_syntax() -> None:
 
 
 def test_struct_input_bogus_member() -> None:
-    line = 'filter obj.ts_bogus == 1'
+    line = 'filter "obj.ts_bogus == 1"'
     objs = [MOCK_PROGRAM["global_struct"]]
 
     with pytest.raises(sdb.CommandError):