@@ -255,7 +255,8 @@ public type Feature struct {
255255 bool is_else; // inside the #else block
256256}
257257
258- public type HandlerFn fn void (void* arg, SrcLoc loc);
258+ public type ErrorLevel enum u8 { Warning, Error, FatalError }
259+ public type ErrorFn fn void (void* arg, ErrorLevel level, SrcLoc loc, const char* msg);
259260
260261public type Tokenizer struct {
261262 const char* cur;
@@ -270,9 +271,8 @@ public type Tokenizer struct {
270271
271272 string_pool.Pool* pool; // no ownership
272273 string_buffer.Buf* buf; // no ownership, used for strings and character constants
273- HandlerFn on_error;
274- HandlerFn on_warning;
275- void* fn_arg;
274+ ErrorFn on_error;
275+ void* on_error_arg;
276276
277277 // Feature handling
278278 Feature[constants.MaxFeatureDepth+1] feature_stack;
@@ -283,7 +283,7 @@ public type Tokenizer struct {
283283
284284 char[256] error_msg;
285285}
286- static_assert(1448 , sizeof(Tokenizer));
286+ static_assert(1440 , sizeof(Tokenizer));
287287
288288public fn void Tokenizer.init(Tokenizer* t,
289289 string_pool.Pool* pool,
@@ -292,9 +292,8 @@ public fn void Tokenizer.init(Tokenizer* t,
292292 SrcLoc loc_start,
293293 const keywords.Info* kwinfo,
294294 const string_list.List* features,
295- HandlerFn on_error,
296- HandlerFn on_warning,
297- void* fn_arg,
295+ ErrorFn on_error,
296+ void* on_error_arg,
298297 bool raw_mode)
299298{
300299 string.memset(t, 0, sizeof(Tokenizer));
@@ -307,8 +306,7 @@ public fn void Tokenizer.init(Tokenizer* t,
307306 t.pool = pool;
308307 t.buf = buf;
309308 t.on_error = on_error;
310- t.on_warning = on_warning;
311- t.fn_arg = fn_arg;
309+ t.on_error_arg = on_error_arg;
312310
313311 t.features = features;
314312 t.raw_mode = raw_mode;
@@ -708,7 +706,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
708706 result.kind = Kind.Error;
709707 result.error_msg = t.error_msg;
710708 result.done = true;
711- if (t.on_error) t.on_error(t.fn_arg, result.loc);
709+ if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.FatalError, result.loc, t.error_msg );
712710}
713711
714712// generate an error but keep parsing
@@ -736,7 +734,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
736734 }
737735 t.cur = p;
738736 result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start));
739- if (t.on_warning ) t.on_warning (t.fn_arg, result.loc);
737+ if (t.on_error ) t.on_error (t.on_error_arg, ErrorLevel.Error, result.loc, t.error_msg );
740738}
741739
742740fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) {
@@ -1490,28 +1488,36 @@ fn bool Tokenizer.at_bol(Tokenizer* t) {
14901488
14911489fn bool Tokenizer.parse_error_warn(Tokenizer* t, Token* result, Kind kind) {
14921490 const char* start = t.cur;
1493- while (*t.cur != '\0' && *t.cur != '\r' && *t.cur != '\n')
1494- t.cur++;
1495- usize len = (usize)(t.cur - start);
1496- if (len > constants.MaxErrorMsgLen) {
1497- t.error(result, "error msg too long (max %d bytes)", constants.MaxErrorMsgLen);
1498- return true;
1491+
1492+ // parse pptokens instead of raw text
1493+ string_buffer.Buf* msg = string_buffer.create_static(elemsof(t.error_msg), false, t.error_msg);
1494+ SrcLoc last_loc = 0;
1495+ while (t.lex_preproc(result) != Kind.Eof) {
1496+ // replace blanks with a single space
1497+ if (last_loc && last_loc < result.loc) msg.add1(' ');
1498+ // copy string text or token source
1499+ if (result.kind == Kind.StringLiteral) {
1500+ msg.add2(t.pool.idx2str(result.text_idx), result.text_len);
1501+ } else {
1502+ msg.add2(t.input_start + (result.loc - t.loc_start), result.len);
1503+ }
1504+ last_loc = result.loc + result.len;
14991505 }
1500- char[constants.MaxErrorMsgLen+1] msg;
1501- string.memcpy(msg, start, len);
1502- msg[len] = 0;
1506+ msg.size(); // ensure null terminator
15031507
15041508 if (kind == Kind.Feat_error) {
1505- t.cur = t.line_start;
1506- t.error(result, "%s", msg);
1507- } else {
1508- // TODO: output diagnostic synchronously
1509- string.strcpy(t.error_msg, msg);
1510- result.kind = Kind.Warning;
1511- result.len = (u16)((t.cur - t.input_start) - (result.loc - t.loc_start));
1509+ t.cur = t.line_start; // restart on the same line
1510+ result.kind = Kind.Error;
1511+ result.done = true;
1512+ result.loc = t.loc_start + (SrcLoc)(t.line_start - t.input_start);
1513+ result.len = (u16)(t.cur - start);
15121514 result.error_msg = t.error_msg;
1515+ if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.FatalError, result.loc, t.error_msg);
1516+ return true; // return error token with result.done set
1517+ } else {
1518+ if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.Warning, result.loc, t.error_msg);
1519+ return false; // continue reading tokens
15131520 }
1514- return true;
15151521}
15161522
15171523fn bool Tokenizer.is_enabled(const Tokenizer* t) {
0 commit comments