Skip to content

Commit

Permalink
Improved javascript template string expression extracting (#939)
Browse files Browse the repository at this point in the history
Co-authored-by: Rik <[email protected]>
Co-authored-by: Aarni Koskela <[email protected]>
  • Loading branch information
3 people authored Jan 6, 2023
1 parent 82c41cc commit d425f86
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 8 deletions.
59 changes: 53 additions & 6 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
:license: BSD, see LICENSE for more details.
"""
import ast
import io
import os
from os.path import relpath
import sys
from os.path import relpath
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING

from babel.util import parse_encoding, parse_future_flags, pathmatch
Expand Down Expand Up @@ -532,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags):
return None


def extract_javascript(fileobj, keywords, comment_tags, options):
def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1):
"""Extract messages from JavaScript source code.
:param fileobj: the seekable, file-like object the messages should be
Expand All @@ -544,7 +545,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
* `template_string` -- set to false to disable ES6 template string support.
* `template_string` -- if `True`, supports gettext(`key`)
* `parse_template_string` -- if `True` will parse the
contents of javascript
template strings.
:param lineno: line number offset (for parsing embedded fragments)
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
funcname = message_lineno = None
Expand All @@ -556,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
last_token = None
call_stack = -1
dotted = any('.' in kw for kw in keywords)

for token in tokenize(
fileobj.read().decode(encoding),
jsx=options.get("jsx", True),
template_string=options.get("template_string", True),
dotted=dotted
dotted=dotted,
lineno=lineno
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname and # have a keyword...
Expand All @@ -573,7 +578,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
call_stack = 0
token = Token('operator', ')', token.lineno)

if token.type == 'operator' and token.value == '(':
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno):
yield item

elif token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
Expand Down Expand Up @@ -665,3 +674,41 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
funcname = token.value

last_token = token


def parse_template_string(template_string, keywords, comment_tags, options, lineno=1):
"""Parse JavaScript template string.
:param template_string: the template string to be parsed
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:param lineno: starting line number (optional)
"""
from babel.messages.jslexer import line_re
prev_character = None
level = 0
inside_str = False
expression_contents = ''
for character in template_string[1:-1]:
if not inside_str and character in ('"', "'", '`'):
inside_str = character
elif inside_str == character and prev_character != r'\\':
inside_str = False
if level:
expression_contents += character
if not inside_str:
if character == '{' and prev_character == '$':
level += 1
elif level and character == '}':
level -= 1
if level == 0 and expression_contents:
expression_contents = expression_contents[0:-1]
fake_file_obj = io.BytesIO(expression_contents.encode())
for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno):
yield item
lineno += len(line_re.findall(expression_contents))
expression_contents = ''
prev_character = character
4 changes: 2 additions & 2 deletions babel/messages/jslexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@ def unquote_string(string):
return u''.join(result)


def tokenize(source, jsx=True, dotted=True, template_string=True):
def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1):
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.
:param jsx: Enable (limited) JSX parsing.
:param dotted: Read dotted names as single name token.
:param template_string: Support ES6 template strings
:param lineno: starting line number (optional)
"""
may_divide = False
pos = 0
lineno = 1
end = len(source)
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)

Expand Down
39 changes: 39 additions & 0 deletions tests/messages/test_js_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,42 @@ def test_template_string_tag_usage():
)

assert messages == [(1, 'Tag template, wow', [], None)]


def test_inside_template_string():
buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Hello', [], None)]


def test_inside_template_string_with_linebreaks():
buf = BytesIO(b"""\
const userName = gettext('Username')
const msg = `${
gettext('Hello')
} ${userName} ${
gettext('Are you having a nice day?')
}`
const msg2 = `${
gettext('Howdy')
} ${userName} ${
gettext('Are you doing ok?')
}`
""")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)]


def test_inside_nested_template_string():
buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]

0 comments on commit d425f86

Please sign in to comment.