From bac953fadc3c0d09bf46633ce0e2c138e8483786 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 9 Jun 2022 10:51:13 +0200 Subject: [PATCH 1/5] Add schema support for `load_csv` --- examples/load_csv.py | 16 +++++++++++++--- railib/api.py | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/examples/load_csv.py b/examples/load_csv.py index a407494..91d625e 100644 --- a/examples/load_csv.py +++ b/examples/load_csv.py @@ -31,12 +31,12 @@ def _sansext(fname: str) -> str: def run(database: str, engine: str, fname: str, relation: str, - syntax: dict, profile: str): + syntax: dict, schema: dict, profile: str): data = _read(fname) relation = relation or _sansext(fname) cfg = config.read(profile=profile) ctx = api.Context(**cfg) - rsp = api.load_csv(ctx, database, engine, relation, data, syntax) + rsp = api.load_csv(ctx, database, engine, relation, data, syntax, schema) print(json.dumps(rsp, indent=2)) @@ -57,6 +57,13 @@ def run(database: str, engine: str, fname: str, relation: str, help="relation name (default: file name)") p.add_argument("-p", "--profile", type=str, default="default", help="profile name") + p.add_argument( + "--schema", + type=str, + default="", + help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`." + ) + args = p.parse_args() syntax = {} # find full list of syntax options in the RAI docs if args.header_row is not None: @@ -67,8 +74,11 @@ def run(database: str, engine: str, fname: str, relation: str, syntax["escapechar"] = args.escapechar if args.quotechar: syntax["quotechar"] = args.quotechar + + schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]} + try: run(args.database, args.engine, args.file, - args.relation, syntax, args.profile) + args.relation, syntax, schema, args.profile) except HTTPError as e: show.http_error(e) diff --git a/railib/api.py b/railib/api.py index 149087b..afdc349 100644 --- a/railib/api.py +++ b/railib/api.py @@ -612,27 +612,48 @@ def _gen_syntax_config(syntax: dict = {}) -> str: return result -# `syntax`: -# * header: a map from col number to name (base 1) -# * header_row: row number of header, 0 means no header (default: 1) -# * delim: default: , -# * quotechar: default: " -# * escapechar: default: \ -# -# Schema: a map from col name to rel type name, eg: -# {'a': "int", 'b': "string"} def load_csv(ctx: Context, database: str, engine: str, relation: str, - data: str or io.TextIOBase, syntax: dict = {}) -> dict: + data: str or io.TextIOBase, syntax: dict = {}, schema = {}) -> dict: + """ + Loads CSV data present in `data` into `database` using `engine`. Upon + success, parsed CSV data is stored in `relation`. + + Args: + - `ctx` (`Context`): The RAI API context. + - `database` (`str`): The target database name. + - `engine` (`str`): The engine used for loading. + - `relation` (`str`): Relation name used to store CSV data. + - `data` (`str or or io.TextIOBase`): Data specified either as a string or as a stream of type `io.TextIOBase`. + - `syntax` (`dict`, optional): Dictionary containing parsing configuration, defaults to {}. Valid entries are: + - `header`: A dictionary mapping column numbers to a names. + - `header_row`: the row number of the header row; 0 means no header. Defaults to `1`. + - `delim`: Column delimiter used. Defaults to `,`. + - `quotechar`: Quotation character used. Defaults to `"`. + - `escapechar`: Escape charater used. Defaults to `\`. + - `schema` (`dict`, optional): Dictionary mapping column names to Rel type names. Defaults to `{}`. + Raises: + `TypeError`: If `data` is neither `str` nor `io.TextIOBase`. + + Returns: + `dict`: The response of the query action. + """ if isinstance(data, str): pass # ok elif isinstance(data, io.TextIOBase): data = data.read() else: raise TypeError(f"bad type for arg 'data': {data.__class__.__name__}") + inputs = {'data': data} command = _gen_syntax_config(syntax) + command += "".join( + [f'def config:schema[:"{col}"] = "{type}"\n' for col, type in schema.items()] + ) command += ("def config:data = data\n" - "def insert:%s = load_csv[config]" % relation) + f"def insert[:{relation}] = load_csv[config]") + + print(command) + return query(ctx, database, engine, command, inputs=inputs, readonly=False) From 97b10fd784f97b1d462d86d877cb86becd0fb0a0 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Tue, 11 Oct 2022 19:50:03 +0200 Subject: [PATCH 2/5] Adapt formatting --- examples/load_csv.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/examples/load_csv.py b/examples/load_csv.py index 91d625e..bab149b 100644 --- a/examples/load_csv.py +++ b/examples/load_csv.py @@ -57,12 +57,8 @@ def run(database: str, engine: str, fname: str, relation: str, help="relation name (default: file name)") p.add_argument("-p", "--profile", type=str, default="default", help="profile name") - p.add_argument( - "--schema", - type=str, - default="", - help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`." - ) + p.add_argument("--schema", type=str, default="", + help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`.") args = p.parse_args() syntax = {} # find full list of syntax options in the RAI docs From 0f1f66471004b098cc02b9718a3843b559feddd2 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Tue, 11 Oct 2022 19:50:13 +0200 Subject: [PATCH 3/5] Change schema string composition --- railib/api.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/railib/api.py b/railib/api.py index afdc349..0077f3a 100644 --- a/railib/api.py +++ b/railib/api.py @@ -646,14 +646,13 @@ def load_csv(ctx: Context, database: str, engine: str, relation: str, inputs = {'data': data} command = _gen_syntax_config(syntax) - command += "".join( - [f'def config:schema[:"{col}"] = "{type}"\n' for col, type in schema.items()] - ) + + for col, type in schema.items(): + command += f'def config:schema[:"{col}"] = "{type}"\n' + command += ("def config:data = data\n" f"def insert[:{relation}] = load_csv[config]") - - print(command) - + return query(ctx, database, engine, command, inputs=inputs, readonly=False) From 68a93d8ec9e68d93cf4f6562395d12802b2a32d1 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Tue, 11 Oct 2022 20:05:03 +0200 Subject: [PATCH 4/5] autoformat using pep8 --- examples/load_csv.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/load_csv.py b/examples/load_csv.py index 5e85043..41f0ada 100644 --- a/examples/load_csv.py +++ b/examples/load_csv.py @@ -65,12 +65,12 @@ def run(database: str, engine: str, fname: str, relation: str, ) p.add_argument("-p", "--profile", type=str, default="default", help="profile name") p.add_argument( - "--schema", - type=str, - default="", + "--schema", + type=str, + default="", help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`." ) - + args = p.parse_args() syntax = {} # find full list of syntax options in the RAI docs if args.header_row is not None: @@ -81,17 +81,17 @@ def run(database: str, engine: str, fname: str, relation: str, syntax["escapechar"] = args.escapechar if args.quotechar: syntax["quotechar"] = args.quotechar - - schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]} - + + schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]} + try: run( - args.database, - args.engine, - args.file, - args.relation, - syntax, - args.profile, + args.database, + args.engine, + args.file, + args.relation, + syntax, + args.profile, args.schema ) except HTTPError as e: From 4ae5eef642bcc4cb2059bff2d44b36701206c61b Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Tue, 11 Oct 2022 20:21:23 +0200 Subject: [PATCH 5/5] Autoformat using pep8 --- railib/api.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/railib/api.py b/railib/api.py index 2f511c9..3a99854 100644 --- a/railib/api.py +++ b/railib/api.py @@ -762,23 +762,23 @@ def _gen_syntax_config(syntax: dict = {}) -> str: def load_csv(ctx: Context, database: str, engine: str, relation: str, - data: str or io.TextIOBase, syntax: dict = {}, schema = {}) -> dict: + data: str or io.TextIOBase, syntax: dict = {}, schema={}) -> dict: """ Loads CSV data present in `data` into `database` using `engine`. Upon success, parsed CSV data is stored in `relation`. Args: - `ctx` (`Context`): The RAI API context. - - `database` (`str`): The target database name. + - `database` (`str`): The target database name. - `engine` (`str`): The engine used for loading. - - `relation` (`str`): Relation name used to store CSV data. + - `relation` (`str`): Relation name used to store CSV data. - `data` (`str or or io.TextIOBase`): Data specified either as a string or as a stream of type `io.TextIOBase`. - `syntax` (`dict`, optional): Dictionary containing parsing configuration, defaults to {}. Valid entries are: - `header`: A dictionary mapping column numbers to a names. - `header_row`: the row number of the header row; 0 means no header. Defaults to `1`. - `delim`: Column delimiter used. Defaults to `,`. - `quotechar`: Quotation character used. Defaults to `"`. - - `escapechar`: Escape charater used. Defaults to `\`. + - `escapechar`: Escape charater used. Defaults to `\\`. - `schema` (`dict`, optional): Dictionary mapping column names to Rel type names. Defaults to `{}`. Raises: `TypeError`: If `data` is neither `str` nor `io.TextIOBase`. @@ -792,16 +792,16 @@ def load_csv(ctx: Context, database: str, engine: str, relation: str, data = data.read() else: raise TypeError(f"bad type for arg 'data': {data.__class__.__name__}") - + inputs = {'data': data} command = _gen_syntax_config(syntax) - + for col, type in schema.items(): command += f'def config:schema[:"{col}"] = "{type}"\n' - + command += ("def config:data = data\n" f"def insert[:{relation}] = load_csv[config]") - + return exec_v1(ctx, database, engine, command, inputs=inputs, readonly=False)