gh-105017: Fix including additional NL token when using CRLF (#105022)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index abb6885..293592b 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -84,6 +84,14 @@
NEWLINE '\\n' (4, 26) (4, 27)
DEDENT '' (5, 0) (5, 0)
""")
+
+ self.check_tokenize("foo='bar'\r\n", """\
+ NAME 'foo' (1, 0) (1, 3)
+ OP '=' (1, 3) (1, 4)
+ STRING "'bar'" (1, 4) (1, 9)
+ NEWLINE '\\n' (1, 9) (1, 10)
+ """)
+
indent_error_file = b"""\
def k(x):
x += 2
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst
new file mode 100644
index 0000000..d41a216
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst
@@ -0,0 +1 @@
+Do not include an additional final ``NL`` token when parsing files having CRLF lines. Patch by Marta Gómez.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a7651b1..a84c249 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -800,7 +800,7 @@
}
/* If this is exec input, add a newline to the end of the string if
there isn't one already. */
- if (exec_input && c != '\n') {
+ if (exec_input && c != '\n' && c != '\0') {
*current = '\n';
current++;
}