Skip to content

Commit

Permalink
extract: Determine python-format flag explicitly
Browse files Browse the repository at this point in the history
During extraction, Message instances can be created with the
"python-format" flag, indicating that the message string contains Python
percent-formatting placeholders. To avoid setting the flag erroneously
because the string source is not Python code or otherwise is not
expected to contain such placeholders, the extractor interface must be
extended to allow extractor functions to indicate which flags are
valid.

Fixes python-babel#35
  • Loading branch information
Jonathan Ross Rogers committed Oct 21, 2015
1 parent 5116c16 commit a40b955
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 55 deletions.
12 changes: 8 additions & 4 deletions babel/messages/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,7 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
self.string = string #: The message translation
self.locations = list(distinct(locations))
self.flags = set(flags)
if id and self.python_format:
self.flags.add('python-format')
else:
self.flags.discard('python-format')

self.auto_comments = list(distinct(auto_comments))
self.user_comments = list(distinct(user_comments))
if isinstance(previous_id, string_types):
Expand All @@ -112,6 +109,13 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
self.lineno = lineno
self.context = context

def determine_python_format(self):
"""Sets python-format flag if message contains a format string"""
if self.id and self.python_format:
self.flags.add('python-format')
else:
self.flags.discard('python-format')

def __repr__(self):
return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
list(self.flags))
Expand Down
32 changes: 20 additions & 12 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING,
"""Extract messages from any source files found in the given directory.
This function generates tuples of the form ``(filename, lineno, message,
comments, context)``.
comments, context, flags)``.
Which extraction method is used per file is determined by the `method_map`
parameter, which maps extended glob patterns to extraction method names.
Expand Down Expand Up @@ -154,23 +154,23 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING,
options = odict
if callback:
callback(filename, method, options)
for lineno, message, comments, context in \
for lineno, message, comments, context, flags in \
extract_from_file(method, filepath,
keywords=keywords,
comment_tags=comment_tags,
options=options,
strip_comment_tags=
strip_comment_tags):
yield filename, lineno, message, comments, context
yield filename, lineno, message, comments, context, flags
break


def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
comment_tags=(), options=None, strip_comment_tags=False):
"""Extract messages from a specific file.
This function returns a list of tuples of the form ``(lineno, funcname,
message)``.
This function returns a list of tuples of the form
``(lineno, messages, comments, context, flags)``.
:param filename: the path to the file to extract messages from
:param method: a string specifying the extraction method (.e.g. "python")
Expand All @@ -197,7 +197,8 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
"""Extract messages from the given file-like object using the specified
extraction method.
This function returns tuples of the form ``(lineno, message, comments)``.
This generator function yields tuples of the form
``(lineno, messages, comments, context, flags)``.
The implementation dispatches the actual extraction to plugins, based on the
value of the ``method`` parameter.
Expand All @@ -210,7 +211,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
>>> from StringIO import StringIO
>>> for message in extract('python', StringIO(source)):
... print message
(3, u'Hello, world!', [], None)
(3, u'Hello, world!', [], None, ())
:param method: a string specifying the extraction method (.e.g. "python");
if this is a simple name, the extraction function will be
Expand Down Expand Up @@ -261,10 +262,17 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
if func is None:
raise ValueError('Unknown extraction method %r' % method)

results = func(fileobj, keywords.keys(), comment_tags,
options=options or {})
for result in func(fileobj, keywords.keys(), comment_tags,
options=options or {}):
flags = ()
if len(result) == 4:
lineno, funcname, messages, comments = result
elif len(result) == 5:
lineno, funcname, messages, comments, flags = result
else:
raise ValueError(
'Extraction function must yield tuples with 4 or 5 values')

for lineno, funcname, messages, comments in results:
if funcname:
spec = keywords[funcname] or (1,)
else:
Expand Down Expand Up @@ -315,7 +323,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),

if strip_comment_tags:
_strip_comment_tags(comments, comment_tags)
yield lineno, messages, comments, context
yield lineno, messages, comments, context, flags


def extract_nothing(fileobj, keywords, comment_tags, options):
Expand Down Expand Up @@ -408,7 +416,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
translator_comments = []

yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
[comment[1] for comment in translator_comments], ())

funcname = lineno = message_lineno = None
call_stack = -1
Expand Down
9 changes: 5 additions & 4 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,10 @@ def callback(filename, method, options):
callback=callback,
strip_comment_tags=
self.strip_comments)
for filename, lineno, message, comments, context in extracted:
for filename, lineno, message, comments, context, flags \
in extracted:
filepath = os.path.normpath(os.path.join(dirname, filename))
catalog.add(message, None, [(filepath, lineno)],
catalog.add(message, None, [(filepath, lineno)], flags=flags,
auto_comments=comments, context=context)

log.info('writing PO template file to %s' % self.output_file)
Expand Down Expand Up @@ -916,9 +917,9 @@ def callback(filename, method, options):
callback=callback,
strip_comment_tags=
options.strip_comment_tags)
for filename, lineno, message, comments, context in extracted:
for filename, lineno, message, comments, context, flags in extracted:
filepath = os.path.normpath(os.path.join(dirname, filename))
catalog.add(message, None, [(filepath, lineno)],
catalog.add(message, None, [(filepath, lineno)], flags=flags,
auto_comments=comments, context=context)

catalog_charset = catalog.charset
Expand Down
4 changes: 2 additions & 2 deletions babel/messages/pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,9 @@ def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
message catalog to the provided file-like object.
>>> catalog = Catalog()
>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
>>> message = catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
... flags=('fuzzy',))
<Message...>
>>> message.determine_python_format()
>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
<Message...>
>>> from io import BytesIO
Expand Down
75 changes: 42 additions & 33 deletions tests/messages/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ def test_nested_calls(self):
extract.DEFAULT_KEYWORDS.keys(),
[], {}))
self.assertEqual([
(1, '_', None, []),
(2, 'ungettext', (None, None, None), []),
(3, 'ungettext', (u'Babel', None, None), []),
(4, 'ungettext', (None, u'Babels', None), []),
(5, 'ungettext', (u'bunny', u'bunnies', None), []),
(6, 'ungettext', (None, u'bunnies', None), []),
(7, '_', None, []),
(8, 'gettext', u'Rabbit', []),
(9, 'dgettext', (u'wiki', None), []),
(10, 'dngettext', (None, u'Page', u'Pages', None), [])],
(1, '_', None, [], ()),
(2, 'ungettext', (None, None, None), [], ()),
(3, 'ungettext', (u'Babel', None, None), [], ()),
(4, 'ungettext', (None, u'Babels', None), [], ()),
(5, 'ungettext', (u'bunny', u'bunnies', None), [], ()),
(6, 'ungettext', (None, u'bunnies', None), [], ()),
(7, '_', None, [], ()),
(8, 'gettext', u'Rabbit', [], ()),
(9, 'dgettext', (u'wiki', None), [], ()),
(10, 'dngettext', (None, u'Page', u'Pages', None), [], ())],
messages)

def test_nested_comments(self):
Expand All @@ -58,7 +58,7 @@ def test_nested_comments(self):
""")
messages = list(extract.extract_python(buf, ('ngettext',),
['TRANSLATORS:'], {}))
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])],
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ())],
messages)

def test_comments_with_calls_that_spawn_multiple_lines(self):
Expand All @@ -83,21 +83,21 @@ def test_comments_with_calls_that_spawn_multiple_lines(self):

{'strip_comment_tags':False}))
self.assertEqual((6, '_', 'Locale deleted.',
[u'NOTE: This Comment SHOULD Be Extracted']),
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
messages[1])
self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.',
None),
[u'NOTE: This Comment SHOULD Be Extracted']),
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
messages[2])
self.assertEqual((3, 'ngettext',
(u'Catalog deleted.',
u'Catalogs deleted.', None),
[u'NOTE: This Comment SHOULD Be Extracted']),
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
messages[0])
self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.',
None),
[u'NOTE: This Comment SHOULD Be Extracted',
u'NOTE: And This One Too']),
u'NOTE: And This One Too'], ()),
messages[3])

def test_declarations(self):
Expand All @@ -114,9 +114,9 @@ class Meta:
messages = list(extract.extract_python(buf,
extract.DEFAULT_KEYWORDS.keys(),
[], {}))
self.assertEqual([(3, '_', u'Page arg 1', []),
(3, '_', u'Page arg 2', []),
(8, '_', u'log entry', [])],
self.assertEqual([(3, '_', u'Page arg 1', [], ()),
(3, '_', u'Page arg 2', [], ()),
(8, '_', u'log entry', [], ())],
messages)

def test_multiline(self):
Expand All @@ -128,8 +128,8 @@ def test_multiline(self):
count)
""")
messages = list(extract.extract_python(buf, ('ngettext',), [], {}))
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), []),
(3, 'ngettext', (u'elvis', u'elvises', None), [])],
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ()),
(3, 'ngettext', (u'elvis', u'elvises', None), [], ())],
messages)

def test_triple_quoted_strings(self):
Expand All @@ -141,9 +141,9 @@ def test_triple_quoted_strings(self):
messages = list(extract.extract_python(buf,
extract.DEFAULT_KEYWORDS.keys(),
[], {}))
self.assertEqual([(1, '_', (u'pylons'), []),
(2, 'ngettext', (u'elvis', u'elvises', None), []),
(3, 'ngettext', (u'elvis', u'elvises', None), [])],
self.assertEqual([(1, '_', (u'pylons'), [], ()),
(2, 'ngettext', (u'elvis', u'elvises', None), [], ()),
(3, 'ngettext', (u'elvis', u'elvises', None), [], ())],
messages)

def test_multiline_strings(self):
Expand All @@ -159,7 +159,7 @@ def test_multiline_strings(self):
[(1, '_',
u'This module provides internationalization and localization\n'
'support for your Python programs by providing an interface to '
'the GNU\ngettext message catalog library.', [])],
'the GNU\ngettext message catalog library.', [], ())],
messages)

def test_concatenated_strings(self):
Expand Down Expand Up @@ -400,9 +400,9 @@ def test_simple_extract(self):
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS,
[], {}))

self.assertEqual([(1, 'simple', [], None),
(2, 'simple', [], None),
(3, ('s', 'p'), [], None)], messages)
self.assertEqual([(1, 'simple', [], None, ()),
(2, 'simple', [], None, ()),
(3, ('s', 'p'), [], None, ())], messages)

def test_various_calls(self):
buf = BytesIO(b"""\
Expand All @@ -420,9 +420,9 @@ def test_various_calls(self):
messages = \
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [],
{}))
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None),
(8, u'Rabbit', [], None),
(10, (u'Page', u'Pages'), [], None)], messages)
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, ()),
(8, u'Rabbit', [], None, ()),
(10, (u'Page', u'Pages'), [], None, ())], messages)

def test_message_with_line_comment(self):
buf = BytesIO(u"""\
Expand Down Expand Up @@ -479,6 +479,10 @@ def test_misplaced_comments(self):
self.assertEqual([], messages[2][3])


def extract_bad(fileobj, keywords, comment_tags, options):
yield (None,)


class ExtractTestCase(unittest.TestCase):

def test_invalid_filter(self):
Expand All @@ -497,14 +501,19 @@ def test_invalid_filter(self):
messages = \
list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [],
{}))
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None),
(8, u'Rabbit', [], None),
(10, (u'Page', u'Pages'), [], None)], messages)
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, ()),
(8, u'Rabbit', [], None, ()),
(10, (u'Page', u'Pages'), [], None, ())], messages)

def test_invalid_extract_method(self):
buf = BytesIO(b'')
self.assertRaises(ValueError, list, extract.extract('spam', buf))

def test_bad_extract_function(self):
self.assertRaises(
ValueError, list,
extract.extract('tests.messages.test_extract:extract_bad', ''))

def test_different_signatures(self):
buf = BytesIO(b"""
foo = _('foo', 'bar')
Expand Down

0 comments on commit a40b955

Please sign in to comment.