From 3d2fd7c1366d8167041fe89a57a9bacaa1ae7f22 Mon Sep 17 00:00:00 2001 From: Charlie Gordon Date: Mon, 31 Mar 2025 17:39:27 +0200 Subject: [PATCH] c2cat: improve output consistency * output tokens with the original spelling * use Style enum to support custom colors * show syntax errors * add `--color` and `--nocolor` to force/disable color output --- ast_utils/color-custom.c2 | 131 +++++++++++ parser/c2_tokenizer.c2 | 5 +- recipe.txt | 1 + tools/c2cat.c2 | 461 ++++++++++++++++++++------------------ 4 files changed, 374 insertions(+), 224 deletions(-) create mode 100644 ast_utils/color-custom.c2 diff --git a/ast_utils/color-custom.c2 b/ast_utils/color-custom.c2 new file mode 100644 index 00000000..ed298c90 --- /dev/null +++ b/ast_utils/color-custom.c2 @@ -0,0 +1,131 @@ +/* Copyright 2025 Charlie Gordon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module color; + +import ctype local; +import stdio local; +import stdlib local; +import string local; + +const char*[] standardColors = { + "black", Black, + "red", Red, + "green", Green, + "yellow", Yellow, + "blue", Blue, + "magenta", Magenta, + "cyan", Cyan, + "grey", Grey, + "darkgrey", Darkgrey, + "bred", Bred, + "bgreen", Bgreen, + "byellow", Byellow, + "bblue", Bblue, + "bmagenta", Bmagenta, + "bcyan", Bcyan, + "white", White, + "normal", Normal, +} + +fn bool getStyleDef(char* buf1, u32 size1, char* buf2, u32 size2, const char** pp) { + const char *p = *pp; + while (isspace(*p)) + p++; + if (!*p) + return false; + u32 i = 0; + while (isalpha(*p) || *p == '.' || *p == '_') { + char c = (char)tolower(*p++); + if (i + 1 < size1) + buf1[i++] = c; + } + buf1[i] = '\0'; + if (*p != '=' && *p != ':') + return false; + p++; + i = 0; + while (*p && *p != ' ' && *p != ',' && *p != ';') { + char c = (char)tolower(*p++); + if (i + 1 < size2 && c != '-' && c != '_') + buf2[i++] = c; + } + buf2[i] = '\0'; + if (*p == ',' || *p == ';') + p++; + *pp = p; + return true; +} + +fn bool matchColorName(const char *p, const char *name) { + while (*p) { + char c = *p++; + if (c == 'b' && !strncmp(p, "right", 5)) + p += 5; + if (c != *name++) + return false; + } + return *name == '\0'; +} + +fn const char* convertColor(const char *val, const char *def) { + if (*val == '\0') + return ""; + + for (u32 i = 0; i < elemsof(standardColors); i += 2) { + if (matchColorName(val, standardColors[i])) + return standardColors[i + 1]; + } + if (!strcasecmp(val, "default")) + return def; + + char[32] buf; + i32 pal, r, g, b; + if (sscanf(val, "%*1[pP]%d", &pal) == 1) { + snprintf(buf, elemsof(buf), "\033[38;5;%dm", pal); + } else + if (sscanf(val, "#%2x%2x%2x", &r, &g, &b) == 3) { + snprintf(buf, elemsof(buf), "\033[38;2;%d;%d;%dm", r, g, b); + } else { + // TODO: complain about unknown color + return def; + } + return strdup(buf); +} + +public fn void freeConfigColor(const char* p) { + if (p && *p) { + for (u32 i = 0; i < elemsof(standardColors); i++) { + if (standardColors[i] == p) return; + } + free((void*)p); + } +} + +public fn const char* getConfigColor(const char* cat, const char* def) { + const char *c2_colors = getenv("C2_COLORS"); + if (c2_colors) { + const char *p = c2_colors; + char[16] style; + char[16] val; + if (!strcmp(p, "none")) + return ""; + while (getStyleDef(style, elemsof(style), val, elemsof(val), &p)) { + if (!strcmp(style, cat)) + return convertColor(val, def); + } + } + return def; +} diff --git a/parser/c2_tokenizer.c2 b/parser/c2_tokenizer.c2 index 7c0657a5..d2857eec 100644 --- a/parser/c2_tokenizer.c2 +++ b/parser/c2_tokenizer.c2 @@ -694,8 +694,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch vsnprintf(t.error_msg, sizeof(t.error_msg), format, args); va_end(args); - // XXX: error position should be passed separately from token start - result.loc = t.loc_start + (SrcLoc)(p - t.input_start); + SrcLoc err_loc = t.loc_start + (SrcLoc)(p - t.input_start); // read the rest of the pp-number token for (;;) { if ((*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P') && (p[1] == '+' || p[1] == '-')) { @@ -712,7 +711,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch } t.cur = p; result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start)); - if (t.on_warning) t.on_warning(t.fn_arg, result.loc); + if (t.on_warning) t.on_warning(t.fn_arg, err_loc); } fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) { diff --git a/recipe.txt b/recipe.txt index e28ea1eb..c4393d0c 100644 --- a/recipe.txt +++ b/recipe.txt @@ -378,6 +378,7 @@ executable c2cat $backend c ast_utils/color.c2 + ast_utils/color-custom.c2 ast_utils/constants.c2 ast_utils/number_radix.c2 ast_utils/src_loc.c2 diff --git a/tools/c2cat.c2 b/tools/c2cat.c2 index 7c3aacba..a1229c00 100644 --- a/tools/c2cat.c2 +++ b/tools/c2cat.c2 @@ -19,43 +19,55 @@ import c2_tokenizer; import color; import file_utils; import keywords; +import src_loc local; import string_buffer; import string_list; import string_pool; import number_radix; import token local; -import ctype; import stdio local; import stdlib local; import string local; -const char* col_keyword = color.Byellow; -const char* col_type = color.Green; -const char* col_feature = color.Blue; -const char* col_attr = color.Blue; -const char* col_identifier = ""; -const char* col_integer = color.Magenta; -const char* col_float = color.Magenta; -const char* col_charconst = color.Magenta; -const char* col_string = color.Magenta; -const char* col_comment = color.Bcyan; -const char* col_invalid = color.Bred; -const char* col_error = color.Bred; -const char* col_normal = color.Normal; +type Style enum u8 { + Normal, + Identifier, + Integer, + Float, + Charconst, + String, + Operator, + Type, + Keyword, + Function, + Attr, + Feature, + Invalid, + Comment, + Warning, + Error, +} fn void usage(const char* me) { - printf("Usage: %s file.c2 ...\n", me); + printf("Usage: %s [options] file.c2 ...\n" + " --color force colorized output\n" + " --nocolor disable colorized output\n" + , me); exit(1); } type C2cat struct { string_pool.Pool* pool; - string_buffer.Buf* out; c2_tokenizer.Tokenizer* tokenizer; const char* input; u32 offset; - u32 in_attributes; // 0 no, 1 seen @, 2 (, ) -> 0 + u32 length; + string_buffer.Buf* out; + u8 in_attributes; // 0 no, 1 seen @, 2 (, ) -> 0 + bool has_error; + Style* token_style; + const char** style_color; } const char*[] attr_names = { @@ -80,27 +92,107 @@ const char*[] attr_names = { "auto_func", } +const char*[] default_colors = { + [Style.Normal] = color.Normal, + [Style.Identifier] = "", + [Style.Integer] = color.Magenta, + [Style.Float] = color.Magenta, + [Style.Charconst] = color.Magenta, + [Style.String] = color.Magenta, + [Style.Operator] = "", + [Style.Type] = color.Green, + [Style.Keyword] = color.Byellow, + [Style.Function] = color.White, + [Style.Attr] = color.Blue, + [Style.Feature] = color.Blue, + [Style.Invalid] = color.Bred, + [Style.Comment] = color.Bcyan, + [Style.Warning] = color.Bred, + [Style.Error] = color.Bred, +} + +const char*[] style_names = { + [Style.Normal] = "normal", + [Style.Identifier] = "identifier", + [Style.Integer] = "integer", + [Style.Float] = "float", + [Style.Charconst] = "charconst", + [Style.String] = "string", + [Style.Operator] = "operator", + [Style.Type] = "type", + [Style.Keyword] = "keyword", + [Style.Function] = "function", + [Style.Attr] = "attr", + [Style.Feature] = "feature", + [Style.Invalid] = "invalid", + [Style.Comment] = "comment", + [Style.Warning] = "warning", + [Style.Error] = "error", +} -fn void init_colors() { - if (!color.useColor()) { - col_keyword = ""; - col_type = ""; - col_feature = ""; - col_attr = ""; - col_identifier = ""; - col_integer = ""; - col_float = ""; - col_charconst = ""; - col_string = ""; - col_comment = ""; - col_invalid = ""; - col_error = ""; - col_normal = ""; +fn bool init_colors(Style* token_style, const char** style_color) { + for (Kind k = Kind.min; k <= Kind.max; k++) { + Style style = Normal; + switch (k) { + case None: + style = Normal; + break; + case Identifier: + style = Identifier; + break; + case IntegerLiteral: + style = Integer; + break; + case FloatLiteral: + style = Float; + break; + case CharLiteral: + style = Charconst; + break; + case StringLiteral: + style = String; + break; + case LParen ... GreaterGreaterEqual: + style = Operator; + break; + case KW_bool ... KW_void: + style = Type; + break; + case KW_as ... KW_while: + if (k.isQualifier()) style = Type; + else style = Keyword; + break; + case Feat_if ... Feat_warning: + style = Feature; + break; + case Invalid: + style = Invalid; + break; + case LineComment: + case BlockComment: + style = Comment; + break; + case Eof: + style = Normal; + break; + case Warning: + style = Warning; + break; + case Error: + style = Error; + break; + } + token_style[k] = style; } + for (Style s = Style.min; s <= Style.max; s++) { + style_color[s] = color.getConfigColor(style_names[s], default_colors[s]); + } + return color.useColor(); } -fn bool is_attribute(const char* str) { - for (u32 i=0; i ctx.offset) { // copy stuff from file to out (from end of last token to start of current) - if (tok.done) return; - if (ctx.offset <= tok.loc) { - u32 len = tok.loc - ctx.offset; - if (len) out.add2(ctx.input + ctx.offset, len); - } else { - out.add1('\n'); - out.color(col_error); - out.print("error: offset=%d tok.loc=%d", ctx.offset, tok.loc); - out.color(col_normal); - out.add1('\n'); - } + // TODO: check for whitespace only + out.add2(ctx.input + ctx.offset, pos - ctx.offset); + ctx.offset = pos; } - if (tok.kind >= Kind.LParen && tok.kind < Kind.KW_bool) { - const char* str = tok.kind.str(); - out.add(str); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isBuiltinType()) { - const char* str = tok.kind.str(); - out.color(col_type); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isQualifier()) { - const char* str = tok.kind.str(); - out.color(col_type); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isKeyword()) { - const char* str = tok.kind.str(); - out.color(col_keyword); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; + Style s = ctx.token_style[tok.kind]; + if (tok.kind == Kind.Identifier) { + if (ctx.in_attributes && ctx.is_attribute(tok.name_idx)) { + s = Style.Attr; + } else + if (ctx.input[ctx.offset + tok_len] == '(') { + s = Style.Function; + } } - if (tok.kind >= Kind.Feat_if && tok.kind <= Kind.Feat_endif) { - const char* str = tok.kind.str(); - out.color(col_feature); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; + if (s) { + out.color(ctx.style_color[s]); } - switch (tok.kind) { - case Identifier: - const char* str = ctx.pool.idx2str(tok.name_idx); + out.add2(ctx.input + ctx.offset, tok_len); - if (ctx.in_attributes && is_attribute(str)) { - out.color(col_attr); - out.add(str); - out.color(col_normal); - } else { - out.color(col_identifier); - out.add(str); - out.color(col_normal); - } - ctx.offset = tok.loc + (u32)strlen(str); - return; - case IntegerLiteral: - out.color(col_integer); - char[64] tmp; - i32 len; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "0x%x", tok.int_value); - break; - default: - len = sprintf(tmp, "%d", tok.int_value); - break; - } - out.add(tmp); - ctx.offset = tok.loc + len; - break; - case FloatLiteral: - out.color(col_float); - char[64] tmp; - i32 len; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "%a", tok.float_value); - break; - default: - len = sprintf(tmp, "%#.16g", tok.float_value); - break; - } - out.add(tmp); - ctx.offset = tok.loc + len; - break; - case CharLiteral: - out.color(col_charconst); - char[64] tmp; - i32 len = 0; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "'\\x%02x'", tok.char_value); - break; - case Octal: - len = sprintf(tmp, "'\\%o'", tok.char_value); - break; - default: - if (ctype.isprint(tok.char_value)) { - len = sprintf(tmp, "'%c'", tok.char_value); - } else { - tmp[0] = 0; - // TODO print nicely (eg \n etc) - } - break; - } - out.add(tmp); - ctx.offset = tok.loc + len; - break; - case StringLiteral: - out.color(col_string); - out.add1('"'); - u32 len = out.encodeBytes(ctx.pool.idx2str(tok.text_idx), tok.text_len, '"'); - out.add1('"'); - ctx.offset = tok.loc + len + 2; - break; - case LineComment: - out.color(col_comment); - const char* str = ctx.pool.idx2str(tok.text_idx); - out.print("//%s", str); - ctx.offset = tok.loc + (u32)strlen(str) + 2; - break; - case BlockComment: - out.color(col_comment); - const char* str = ctx.pool.idx2str(tok.text_idx); - out.print("/*%s*/", str); - ctx.offset = tok.loc + (u32)strlen(str) + 4; - break; - case Invalid: - out.color(col_invalid); - out.print("%s", tok.invalid); - ctx.offset = tok.loc + (u32)strlen(tok.invalid); - break; - case Error: - out.add1('\n'); - out.color(col_error); - out.print("error: %s", ctx.tokenizer.error_msg); - out.color(col_normal); - out.add1('\n'); - break; - default: - out.color(col_error); - out.print("token %s\n", tok.kind.str()); - ctx.offset = tok.loc + 1; - break; + if (s && *ctx.style_color[s]) { + out.color(ctx.style_color[Style.Normal]); } - out.color(col_normal); + ctx.offset += tok_len; } -public fn i32 c2cat(const char* filename) +fn void C2cat.on_tokenizer_error(void* arg, SrcLoc loc) { + C2cat* ctx = arg; + ctx.has_error = true; +} + +fn i32 c2cat(const char* filename, + bool use_color, + Style* token_style, + const char** style_color) { file_utils.File file.init("", filename); if (!file.load()) { @@ -292,51 +270,62 @@ public fn i32 c2cat(const char* filename) return -1; } - C2cat ctx = { } - ctx.pool = string_pool.create(16*1024, 1024); - ctx.out = string_buffer.create(16*1024, true, 2); - ctx.offset = 0; - ctx.input = file.data(); - ctx.in_attributes = 0; + string_pool.Pool* pool = string_pool.create(16*1024, 1024); + string_buffer.Buf* buf = string_buffer.create(1024, false, 0); + const char* input = file.data(); u32 file_size = file.data_size(); + keywords.Info kwinfo.init(pool); + string_list.List features.init(pool); + string_buffer.Buf* out = string_buffer.create(16*1024, use_color, 2); - string_list.List features; - features.init(ctx.pool); - string_buffer.Buf* buf = string_buffer.create(1024, false, 0); - keywords.Info kwinfo; - kwinfo.init(ctx.pool); - c2_tokenizer.Tokenizer tokenizer; - tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, nil, true); + C2cat ctx = { + .pool = pool, + .input = input, + .offset = 0, + .length = file_size, + .out = out, + .in_attributes = 0, + .token_style = token_style, + .style_color = style_color, + } + + c2_tokenizer.Tokenizer tokenizer.init(pool, buf, input, 1, &kwinfo, &features, + C2cat.on_tokenizer_error, C2cat.on_tokenizer_error, &ctx, true); ctx.tokenizer = &tokenizer; - Token tok; - tok.init(); + Token tok.init(); while (!tok.done) { tokenizer.lex(&tok); - //printf("%4d %s\n", tok.loc, tok.kind.str()); - + if (ctx.has_error) { + tok.kind = Error; + ctx.has_error = false; + } ctx.update_state(&tok); - ctx.print_token(&tok); } - if (ctx.offset <= file_size) { - u32 len = file_size - ctx.offset; - if (len) ctx.out.add2(ctx.input + ctx.offset, len); + if (ctx.offset <= ctx.length) { + // TODO: EOF token should have ctx.offset == ctx.length + u32 len = ctx.length - ctx.offset; + if (len) { + out.color(style_color[Style.Error]); + out.add2(ctx.input + ctx.offset, len); + out.color(style_color[Style.Normal]); + } } else { - ctx.out.add1('\n'); - ctx.out.color(col_error); - ctx.out.print("error: offset=%d file_size=%d", ctx.offset, file_size); - ctx.out.color(col_normal); - ctx.out.add1('\n'); + out.add1('\n'); + out.color(style_color[Style.Error]); + out.print("error: offset=%d file.size=%d", ctx.offset, ctx.length); + out.color(style_color[Style.Normal]); + out.add1('\n'); } - fputs(ctx.out.data(), stdout); + fputs(out.data(), stdout); fflush(stdout); - ctx.pool.free(); - ctx.out.free(); + out.free(); buf.free(); + pool.free(); file.close(); return 0; @@ -344,12 +333,42 @@ public fn i32 c2cat(const char* filename) public fn i32 main(i32 argc, const char** argv) { - if (argc == 1) usage(argv[0]); - init_colors(); + Style[elemsof(Kind)] token_style; + const char*[elemsof(Style)] style_color; + // TODO: use custom colors + bool use_color = init_colors(token_style, style_color); + i32 filenum = 0; + i32 nfiles = 0; + for (i32 i = 1; i < argc; i++) { + nfiles += (*argv[i] != '-'); + } for (i32 i = 1; i < argc; i++) { - if (argc > 2) - printf("==> %s <==\n", argv[i]); - c2cat(argv[i]); + const char* arg = argv[i]; + if (*arg == '-') { + switch (arg) { + case "--color": + use_color = true; + break; + case "--nocolor": + use_color = false; + break; + case "-?": + case "-h": + case "--help": + usage(argv[0]); + break; + default: + fprintf(stderr, "c2cat: unknown option %s\n", arg); + exit(EXIT_FAILURE); + } + } else { + if (nfiles > 1) { + if (filenum++) printf("\n"); + printf("==> %s <==\n", arg); + } + c2cat(arg, use_color, token_style, style_color); + } } + if (!nfiles) usage(argv[0]); return 0; }