Skip to content

Commit

Permalink
Allow extracting nested calls in Javascript
Browse files Browse the repository at this point in the history
Currently the Javascript extractor does not support nested gettext calls
at all.

The extraction code was refactored a bit to resemble the Python code
as much as possible and support this use-case.
  • Loading branch information
dylankiss committed Oct 17, 2024
1 parent 71b33d0 commit 4df7e66
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 77 deletions.
208 changes: 131 additions & 77 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,54 +706,109 @@ def extract_javascript(
:param lineno: line number offset (for parsing embedded fragments)
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
funcname = message_lineno = None
messages = []
last_argument = None
translator_comments = []
concatenate_next = False

encoding = options.get('encoding', 'utf-8')
last_token = None
call_stack = -1
dotted = any('.' in kw for kw in keywords)
last_token = None
# Keep the stack of all function calls and its related contextual variables,
# so we can handle nested gettext calls.
function_stack = []
# Keep track of whether we're in a class or function definition
in_def = False
# Keep track of whether we're in a block of translator comments
in_translator_comments = False
# Keep track of the last encountered translator comments
translator_comments = []
# Keep track of the (split) strings encountered
message_buffer = []

for token in tokenize(
fileobj.read().decode(encoding),
jsx=options.get("jsx", True),
template_string=options.get("template_string", True),
jsx=options.get('jsx', True),
template_string=options.get('template_string', True),
dotted=dotted,
lineno=lineno,
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname and # have a keyword...
(last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
token.type == 'template_string' # this is a template string
if token.type == 'name' and token.value in ('class', 'function'):
# We're entering a class or function definition
in_def = True

elif in_def and token.type == 'operator' and token.value in ('(', '{'):
# We're in a class or function definition and should not do anything
in_def = False
continue

elif (
last_token
and last_token.type == 'name'
and last_token.value in keywords
and token.type == 'template_string'
):
message_lineno = token.lineno
messages = [unquote_string(token.value)]
call_stack = 0
# Turn keyword`foo` expressions into keyword("foo") function calls
string_value = unquote_string(token.value)
cur_translator_comments = translator_comments
if function_stack and function_stack[-1]['function_line_no'] == last_token.lineno:
# If our current function call is on the same line as the previous one,
# copy their translator comments, since they also apply to us.
cur_translator_comments = function_stack[-1]['translator_comments']

# We add all information needed later for the current function call
function_stack.append({
'function_line_no': last_token.lineno,
'function_name': last_token.value,
'message_line_no': token.lineno,
'messages': [string_value],
'translator_comments': cur_translator_comments,
})
translator_comments = []

# We act as if we are closing the function call now
token = Token('operator', ')', token.lineno)

if options.get('parse_template_string') and not funcname and token.type == 'template_string':
if (
options.get('parse_template_string')
and (not last_token or last_token.type != 'name' or last_token.value not in keywords)
and token.type == 'template_string'
):
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)

elif token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
if last_token.type == 'name':
# We're entering a function call
cur_translator_comments = translator_comments
if function_stack and function_stack[-1]['function_line_no'] == token.lineno:
# If our current function call is on the same line as the previous one,
# copy their translator comments, since they also apply to us.
cur_translator_comments = function_stack[-1]['translator_comments']

# We add all information needed later for the current function call
function_stack.append({
'function_line_no': token.lineno,
'function_name': last_token.value,
'message_line_no': None,
'messages': [],
'translator_comments': cur_translator_comments,
})
translator_comments = []

elif call_stack == -1 and token.type == 'linecomment':
elif token.type == 'linecomment':
# Strip the comment token from the line
value = token.value[2:].strip()
if translator_comments and \
translator_comments[-1][0] == token.lineno - 1:
if in_translator_comments and translator_comments[-1][0] == token.lineno - 1:
# We're already inside a translator comment, continue appending
translator_comments.append((token.lineno, value))
continue

for comment_tag in comment_tags:
if value.startswith(comment_tag):
translator_comments.append((token.lineno, value.strip()))
# Comment starts with one of the comment tags,
# so let's start capturing it
in_translator_comments = True
translator_comments.append((token.lineno, value))
break

elif token.type == 'multilinecomment':
# only one multi-line comment may precede a translation
# Only one multi-line comment may precede a translation
translator_comments = []
value = token.value[2:-2].strip()
for comment_tag in comment_tags:
Expand All @@ -763,68 +818,67 @@ def extract_javascript(
lines[0] = lines[0].strip()
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
for offset, line in enumerate(lines):
translator_comments.append((token.lineno + offset,
line))
translator_comments.append((token.lineno + offset, line))
break

elif funcname and call_stack == 0:
elif function_stack and function_stack[-1]['function_name'] in keywords:
# We're inside a translation function call
if token.type == 'operator' and token.value == ')':
if last_argument is not None:
messages.append(last_argument)
if len(messages) > 1:
messages = tuple(messages)
elif messages:
messages = messages[0]
# The call has ended, so we yield the translatable term(s)
messages = function_stack[-1]['messages']
line_no = (
function_stack[-1]['message_line_no']
or function_stack[-1]['function_line_no']
)
cur_translator_comments = function_stack[-1]['translator_comments']

if message_buffer:
messages.append(''.join(message_buffer))
message_buffer.clear()
else:
messages = None
messages.append(None)

# Comments don't apply unless they immediately precede the
# message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
messages = tuple(messages) if len(messages) > 1 else messages[0]
if (
cur_translator_comments
and cur_translator_comments[-1][0] < line_no - 1
):
# The translator comments are not immediately preceding the current
# term, so we skip them.
cur_translator_comments = []

if messages is not None:
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
yield (
line_no,
function_stack[-1]['function_name'],
messages,
[comment[1] for comment in cur_translator_comments],
)

funcname = message_lineno = last_argument = None
concatenate_next = False
translator_comments = []
messages = []
call_stack = -1
function_stack.pop()

elif token.type in ('string', 'template_string'):
new_value = unquote_string(token.value)
if concatenate_next:
last_argument = (last_argument or '') + new_value
concatenate_next = False
# We've encountered a string inside a translation function call
string_value = unquote_string(token.value)
if not function_stack[-1]['message_line_no']:
function_stack[-1]['message_line_no'] = token.lineno
if string_value is not None:
message_buffer.append(string_value)

elif token.type == 'operator' and token.value == ',':
# End of a function call argument
if message_buffer:
function_stack[-1]['messages'].append(''.join(message_buffer))
message_buffer.clear()
else:
last_argument = new_value

elif token.type == 'operator':
if token.value == ',':
if last_argument is not None:
messages.append(last_argument)
last_argument = None
else:
messages.append(None)
concatenate_next = False
elif token.value == '+':
concatenate_next = True

elif call_stack > 0 and token.type == 'operator' \
and token.value == ')':
call_stack -= 1

elif funcname and call_stack == -1:
funcname = None

elif call_stack == -1 and token.type == 'name' and \
token.value in keywords and \
(last_token is None or last_token.type != 'name' or
last_token.value != 'function'):
funcname = token.value
function_stack[-1]['messages'].append(None)

elif function_stack and token.type == 'operator' and token.value == ')':
function_stack.pop()

if in_translator_comments and translator_comments[-1][0] < token.lineno:
# We have a newline in between the comments, so they don't belong
# together anymore
in_translator_comments = False

last_token = token

Expand Down
25 changes: 25 additions & 0 deletions tests/messages/test_js_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,28 @@ def test_inside_nested_template_string():
)

assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]

def test_nested_gettext_calls():
buf = BytesIO(b"""\
gettext("Hello %s", gettext("User"));
gettext("Hello %(user)s", { user: gettext("User") });
gettext("Hello %s", dummy.dummyFunction(gettext("User")));
gettext(
"Hello %(user)s",
{ user: dummy.dummyFunction(gettext("User")) },
);
""")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {}),
)

assert messages == [
(1, 'User', [], None),
(1, 'Hello %s', [], None),
(2, 'User', [], None),
(2, 'Hello %(user)s', [], None),
(3, 'User', [], None),
(3, 'Hello %s', [], None),
(6, 'User', [], None),
(5, 'Hello %(user)s', [], None),
]

0 comments on commit 4df7e66

Please sign in to comment.