From 7e7c5a144b1ddcce0de6812ac47b68bd28ec0159 Mon Sep 17 00:00:00 2001 From: brandonspark Date: Tue, 14 Jan 2025 13:49:46 -0800 Subject: [PATCH] update again --- .../src/semgrep-kotlin/src/scanner.c | 20 ++++++++++++++----- lang/semgrep-grammars/src/tree-sitter-kotlin | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lang/semgrep-grammars/src/semgrep-kotlin/src/scanner.c b/lang/semgrep-grammars/src/semgrep-kotlin/src/scanner.c index 30d532d..87a8926 100644 --- a/lang/semgrep-grammars/src/semgrep-kotlin/src/scanner.c +++ b/lang/semgrep-grammars/src/semgrep-kotlin/src/scanner.c @@ -239,12 +239,23 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) { return lexer->lookahead != '/'; } +// Test for any identifier character other than the first character. +// This is meant to match the regexp [\p{L}_\p{Nd}] +// as found in '_alpha_identifier' (see grammar.js). +static bool is_word_char(int32_t c) { + return (iswalnum(c) || c == '_'); +} + +// Scan for [the end of] a nonempty alphanumeric identifier or +// alphanumeric keyword (including '_'). static bool scan_for_word(TSLexer *lexer, const char* word, unsigned len) { skip(lexer); for (unsigned i = 0; i < len; ++i) { if (lexer->lookahead != word[i]) return false; skip(lexer); } + // check that the identifier stops here + if (is_word_char(lexer->lookahead)) return false; return true; } @@ -288,10 +299,8 @@ static bool scan_automatic_semicolon(TSLexer *lexer) { if (sameline) { switch (lexer->lookahead) { - // Don't insert a semicolon before an else - case 'e': - return !scan_for_word(lexer, "lse", 3); - + // Insert imaginary semicolon before an 'import' but not in front + // of other words or keywords starting with 'i' case 'i': return scan_for_word(lexer, "mport", 5); @@ -300,6 +309,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer) { lexer->mark_end(lexer); return true; + // Don't insert a semicolon in other cases default: return false; } @@ -538,4 +548,4 @@ void tree_sitter_kotlin_external_scanner_deserialize(void *payload, const char * } else { array_clear(stack); } -} +} \ No newline at end of file diff --git a/lang/semgrep-grammars/src/tree-sitter-kotlin b/lang/semgrep-grammars/src/tree-sitter-kotlin index 76f53c4..0662afb 160000 --- a/lang/semgrep-grammars/src/tree-sitter-kotlin +++ b/lang/semgrep-grammars/src/tree-sitter-kotlin @@ -1 +1 @@ -Subproject commit 76f53c48d29e8588934fb55b0240d7bdfe00bfe5 +Subproject commit 0662afbd2ce19b17c603acf67ae707b4d69ab8f4