From bac953fadc3c0d09bf46633ce0e2c138e8483786 Mon Sep 17 00:00:00 2001
From: Gerald Berger <gberger.work@gmail.com>
Date: Thu, 9 Jun 2022 10:51:13 +0200
Subject: [PATCH 1/5] Add schema support for `load_csv`

---
 examples/load_csv.py | 16 +++++++++++++---
 railib/api.py        | 43 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/examples/load_csv.py b/examples/load_csv.py
index a407494..91d625e 100644
--- a/examples/load_csv.py
+++ b/examples/load_csv.py
@@ -31,12 +31,12 @@ def _sansext(fname: str) -> str:
 
 
 def run(database: str, engine: str, fname: str, relation: str,
-        syntax: dict, profile: str):
+        syntax: dict, schema: dict, profile: str):
     data = _read(fname)
     relation = relation or _sansext(fname)
     cfg = config.read(profile=profile)
     ctx = api.Context(**cfg)
-    rsp = api.load_csv(ctx, database, engine, relation, data, syntax)
+    rsp = api.load_csv(ctx, database, engine, relation, data, syntax, schema)
     print(json.dumps(rsp, indent=2))
 
 
@@ -57,6 +57,13 @@ def run(database: str, engine: str, fname: str, relation: str,
                    help="relation name (default: file name)")
     p.add_argument("-p", "--profile", type=str, default="default",
                    help="profile name")
+    p.add_argument(
+        "--schema", 
+        type=str, 
+        default="", 
+        help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
+    )
+    
     args = p.parse_args()
     syntax = {}  # find full list of syntax options in the RAI docs
     if args.header_row is not None:
@@ -67,8 +74,11 @@ def run(database: str, engine: str, fname: str, relation: str,
         syntax["escapechar"] = args.escapechar
     if args.quotechar:
         syntax["quotechar"] = args.quotechar
+    
+    schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}    
+    
     try:
         run(args.database, args.engine, args.file,
-            args.relation, syntax, args.profile)
+            args.relation, syntax, schema, args.profile)
     except HTTPError as e:
         show.http_error(e)
diff --git a/railib/api.py b/railib/api.py
index 149087b..afdc349 100644
--- a/railib/api.py
+++ b/railib/api.py
@@ -612,27 +612,48 @@ def _gen_syntax_config(syntax: dict = {}) -> str:
     return result
 
 
-# `syntax`:
-#   * header: a map from col number to name (base 1)
-#   * header_row: row number of header, 0 means no header (default: 1)
-#   * delim: default: ,
-#   * quotechar: default: "
-#   * escapechar: default: \
-#
-# Schema: a map from col name to rel type name, eg:
-#   {'a': "int", 'b': "string"}
 def load_csv(ctx: Context, database: str, engine: str, relation: str,
-             data: str or io.TextIOBase, syntax: dict = {}) -> dict:
+             data: str or io.TextIOBase, syntax: dict = {}, schema = {}) -> dict:
+    """
+    Loads CSV data present in `data` into `database` using `engine`. Upon
+    success, parsed CSV data is stored in `relation`.
+
+    Args:
+        - `ctx` (`Context`): The RAI API context.
+        - `database` (`str`): The target database name. 
+        - `engine` (`str`): The engine used for loading.
+        - `relation` (`str`): Relation name used to store CSV data. 
+        - `data` (`str or or io.TextIOBase`): Data specified either as a string or as a stream of type `io.TextIOBase`.
+        - `syntax` (`dict`, optional): Dictionary containing parsing configuration, defaults to {}. Valid entries are:
+            - `header`: A dictionary mapping column numbers to a names.
+            - `header_row`: the row number of the header row; 0 means no header. Defaults to `1`.
+            - `delim`: Column delimiter used. Defaults to `,`.
+            - `quotechar`: Quotation character used. Defaults to `"`.
+            - `escapechar`: Escape charater used. Defaults to `\`.
+        - `schema` (`dict`, optional): Dictionary mapping column names to Rel type names. Defaults to `{}`.
+    Raises:
+        `TypeError`: If `data` is neither `str` nor `io.TextIOBase`.
+
+    Returns:
+        `dict`: The response of the query action.
+    """
     if isinstance(data, str):
         pass  # ok
     elif isinstance(data, io.TextIOBase):
         data = data.read()
     else:
         raise TypeError(f"bad type for arg 'data': {data.__class__.__name__}")
+    
     inputs = {'data': data}
     command = _gen_syntax_config(syntax)
+    command += "".join(
+        [f'def config:schema[:"{col}"] = "{type}"\n' for col, type in schema.items()]
+    )
     command += ("def config:data = data\n"
-                "def insert:%s = load_csv[config]" % relation)
+                f"def insert[:{relation}] = load_csv[config]")
+    
+    print(command)
+    
     return query(ctx, database, engine, command, inputs=inputs, readonly=False)
 
 

From 97b10fd784f97b1d462d86d877cb86becd0fb0a0 Mon Sep 17 00:00:00 2001
From: Gerald Berger <gberger.work@gmail.com>
Date: Tue, 11 Oct 2022 19:50:03 +0200
Subject: [PATCH 2/5] Adapt formatting

---
 examples/load_csv.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/examples/load_csv.py b/examples/load_csv.py
index 91d625e..bab149b 100644
--- a/examples/load_csv.py
+++ b/examples/load_csv.py
@@ -57,12 +57,8 @@ def run(database: str, engine: str, fname: str, relation: str,
                    help="relation name (default: file name)")
     p.add_argument("-p", "--profile", type=str, default="default",
                    help="profile name")
-    p.add_argument(
-        "--schema", 
-        type=str, 
-        default="", 
-        help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
-    )
+    p.add_argument("--schema", type=str, default="", 
+                   help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`.")
     
     args = p.parse_args()
     syntax = {}  # find full list of syntax options in the RAI docs

From 0f1f66471004b098cc02b9718a3843b559feddd2 Mon Sep 17 00:00:00 2001
From: Gerald Berger <gberger.work@gmail.com>
Date: Tue, 11 Oct 2022 19:50:13 +0200
Subject: [PATCH 3/5] Change schema string composition

---
 railib/api.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/railib/api.py b/railib/api.py
index afdc349..0077f3a 100644
--- a/railib/api.py
+++ b/railib/api.py
@@ -646,14 +646,13 @@ def load_csv(ctx: Context, database: str, engine: str, relation: str,
     
     inputs = {'data': data}
     command = _gen_syntax_config(syntax)
-    command += "".join(
-        [f'def config:schema[:"{col}"] = "{type}"\n' for col, type in schema.items()]
-    )
+    
+    for col, type in schema.items():
+        command += f'def config:schema[:"{col}"] = "{type}"\n'
+        
     command += ("def config:data = data\n"
                 f"def insert[:{relation}] = load_csv[config]")
-    
-    print(command)
-    
+        
     return query(ctx, database, engine, command, inputs=inputs, readonly=False)
 
 

From 68a93d8ec9e68d93cf4f6562395d12802b2a32d1 Mon Sep 17 00:00:00 2001
From: Gerald Berger <gberger.work@gmail.com>
Date: Tue, 11 Oct 2022 20:05:03 +0200
Subject: [PATCH 4/5] autoformat using pep8

---
 examples/load_csv.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/load_csv.py b/examples/load_csv.py
index 5e85043..41f0ada 100644
--- a/examples/load_csv.py
+++ b/examples/load_csv.py
@@ -65,12 +65,12 @@ def run(database: str, engine: str, fname: str, relation: str,
     )
     p.add_argument("-p", "--profile", type=str, default="default", help="profile name")
     p.add_argument(
-        "--schema", 
-        type=str, 
-        default="", 
+        "--schema",
+        type=str,
+        default="",
         help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
     )
-    
+
     args = p.parse_args()
     syntax = {}  # find full list of syntax options in the RAI docs
     if args.header_row is not None:
@@ -81,17 +81,17 @@ def run(database: str, engine: str, fname: str, relation: str,
         syntax["escapechar"] = args.escapechar
     if args.quotechar:
         syntax["quotechar"] = args.quotechar
-    
-    schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}    
-    
+
+    schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}
+
     try:
         run(
-            args.database, 
-            args.engine, 
-            args.file, 
-            args.relation, 
-            syntax, 
-            args.profile, 
+            args.database,
+            args.engine,
+            args.file,
+            args.relation,
+            syntax,
+            args.profile,
             args.schema
         )
     except HTTPError as e:

From 4ae5eef642bcc4cb2059bff2d44b36701206c61b Mon Sep 17 00:00:00 2001
From: Gerald Berger <gberger.work@gmail.com>
Date: Tue, 11 Oct 2022 20:21:23 +0200
Subject: [PATCH 5/5] Autoformat using pep8

---
 railib/api.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/railib/api.py b/railib/api.py
index 2f511c9..3a99854 100644
--- a/railib/api.py
+++ b/railib/api.py
@@ -762,23 +762,23 @@ def _gen_syntax_config(syntax: dict = {}) -> str:
 
 
 def load_csv(ctx: Context, database: str, engine: str, relation: str,
-             data: str or io.TextIOBase, syntax: dict = {}, schema = {}) -> dict:
+             data: str or io.TextIOBase, syntax: dict = {}, schema={}) -> dict:
     """
     Loads CSV data present in `data` into `database` using `engine`. Upon
     success, parsed CSV data is stored in `relation`.
 
     Args:
         - `ctx` (`Context`): The RAI API context.
-        - `database` (`str`): The target database name. 
+        - `database` (`str`): The target database name.
         - `engine` (`str`): The engine used for loading.
-        - `relation` (`str`): Relation name used to store CSV data. 
+        - `relation` (`str`): Relation name used to store CSV data.
         - `data` (`str or or io.TextIOBase`): Data specified either as a string or as a stream of type `io.TextIOBase`.
         - `syntax` (`dict`, optional): Dictionary containing parsing configuration, defaults to {}. Valid entries are:
             - `header`: A dictionary mapping column numbers to a names.
             - `header_row`: the row number of the header row; 0 means no header. Defaults to `1`.
             - `delim`: Column delimiter used. Defaults to `,`.
             - `quotechar`: Quotation character used. Defaults to `"`.
-            - `escapechar`: Escape charater used. Defaults to `\`.
+            - `escapechar`: Escape charater used. Defaults to `\\`.
         - `schema` (`dict`, optional): Dictionary mapping column names to Rel type names. Defaults to `{}`.
     Raises:
         `TypeError`: If `data` is neither `str` nor `io.TextIOBase`.
@@ -792,16 +792,16 @@ def load_csv(ctx: Context, database: str, engine: str, relation: str,
         data = data.read()
     else:
         raise TypeError(f"bad type for arg 'data': {data.__class__.__name__}")
-    
+
     inputs = {'data': data}
     command = _gen_syntax_config(syntax)
-    
+
     for col, type in schema.items():
         command += f'def config:schema[:"{col}"] = "{type}"\n'
-        
+
     command += ("def config:data = data\n"
                 f"def insert[:{relation}] = load_csv[config]")
-        
+
     return exec_v1(ctx, database, engine, command, inputs=inputs, readonly=False)