From a9781b11f1e549844345fcc1b25e6f4db0fa4315 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Rogers Date: Wed, 10 Feb 2016 21:26:18 -0500 Subject: [PATCH] extract: Determine python-format flag explicitly During extraction, Message instances can be created with the "python-format" flag, indicating that the message string contains Python percent-formatting placeholders. To avoid setting the flag erroneously because the string source is not Python code or otherwise is not expected to contain such placeholders, the extractor interface must be extended to allow extractor functions to indicate which flags are valid. Fixes https://github.com/python-babel/babel/issues/35 --- babel/messages/catalog.py | 12 ++++-- babel/messages/extract.py | 32 +++++++++------ babel/messages/frontend.py | 9 ++-- babel/messages/pofile.py | 4 +- tests/messages/test_extract.py | 75 +++++++++++++++++++--------------- 5 files changed, 77 insertions(+), 55 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index f72a34fca..876609f47 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -99,10 +99,7 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), self.string = string self.locations = list(distinct(locations)) self.flags = set(flags) - if id and self.python_format: - self.flags.add('python-format') - else: - self.flags.discard('python-format') + self.auto_comments = list(distinct(auto_comments)) self.user_comments = list(distinct(user_comments)) if isinstance(previous_id, string_types): @@ -112,6 +109,13 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), self.lineno = lineno self.context = context + def determine_python_format(self): + """Sets python-format flag if message contains a format string""" + if self.id and self.python_format: + self.flags.add('python-format') + else: + self.flags.discard('python-format') + def __repr__(self): return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, list(self.flags)) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 8fe3f606c..2d9942179 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -66,7 +66,7 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING, """Extract messages from any source files found in the given directory. This function generates tuples of the form ``(filename, lineno, message, - comments, context)``. + comments, context, flags)``. Which extraction method is used per file is determined by the `method_map` parameter, which maps extended glob patterns to extraction method names. @@ -155,14 +155,14 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING, options = odict if callback: callback(filename, method, options) - for lineno, message, comments, context in \ + for lineno, message, comments, context, flags in \ extract_from_file(method, filepath, keywords=keywords, comment_tags=comment_tags, options=options, strip_comment_tags= strip_comment_tags): - yield filename, lineno, message, comments, context + yield filename, lineno, message, comments, context, flags break @@ -170,8 +170,8 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, comment_tags=(), options=None, strip_comment_tags=False): """Extract messages from a specific file. - This function returns a list of tuples of the form ``(lineno, funcname, - message)``. + This function returns a list of tuples of the form + ``(lineno, messages, comments, context, flags)``. :param filename: the path to the file to extract messages from :param method: a string specifying the extraction method (.e.g. "python") @@ -198,7 +198,8 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), """Extract messages from the given file-like object using the specified extraction method. - This function returns tuples of the form ``(lineno, message, comments)``. + This generator function yields tuples of the form + ``(lineno, messages, comments, context, flags)``. The implementation dispatches the actual extraction to plugins, based on the value of the ``method`` parameter. @@ -211,7 +212,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), >>> from babel._compat import BytesIO >>> for message in extract('python', BytesIO(source)): ... print(message) - (3, u'Hello, world!', [], None) + (3, u'Hello, world!', [], None, ()) :param method: an extraction method (a callable), or a string specifying the extraction method (.e.g. "python"); @@ -266,10 +267,17 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), if func is None: raise ValueError('Unknown extraction method %r' % method) - results = func(fileobj, keywords.keys(), comment_tags, - options=options or {}) + for result in func(fileobj, keywords.keys(), comment_tags, + options=options or {}): + flags = () + if len(result) == 4: + lineno, funcname, messages, comments = result + elif len(result) == 5: + lineno, funcname, messages, comments, flags = result + else: + raise ValueError( + 'Extraction function must yield tuples with 4 or 5 values') - for lineno, funcname, messages, comments in results: if funcname: spec = keywords[funcname] or (1,) else: @@ -320,7 +328,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), if strip_comment_tags: _strip_comment_tags(comments, comment_tags) - yield lineno, messages, comments, context + yield lineno, messages, comments, context, flags def extract_nothing(fileobj, keywords, comment_tags, options): @@ -413,7 +421,7 @@ def extract_python(fileobj, keywords, comment_tags, options): translator_comments = [] yield (message_lineno, funcname, messages, - [comment[1] for comment in translator_comments]) + [comment[1] for comment in translator_comments], ()) funcname = lineno = message_lineno = None call_stack = -1 diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 56f1b7677..42a84f880 100755 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -302,9 +302,10 @@ def callback(filename, method, options): callback=callback, strip_comment_tags= self.strip_comments) - for filename, lineno, message, comments, context in extracted: + for filename, lineno, message, comments, context, flags \ + in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) - catalog.add(message, None, [(filepath, lineno)], + catalog.add(message, None, [(filepath, lineno)], flags=flags, auto_comments=comments, context=context) log.info('writing PO template file to %s' % self.output_file) @@ -916,9 +917,9 @@ def callback(filename, method, options): callback=callback, strip_comment_tags= options.strip_comment_tags) - for filename, lineno, message, comments, context in extracted: + for filename, lineno, message, comments, context, flags in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) - catalog.add(message, None, [(filepath, lineno)], + catalog.add(message, None, [(filepath, lineno)], flags=flags, auto_comments=comments, context=context) catalog_charset = catalog.charset diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 226ac1ce9..0532e3976 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -344,9 +344,9 @@ def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, message catalog to the provided file-like object. >>> catalog = Catalog() - >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], + >>> message = catalog.add(u'foo %(name)s', locations=[('main.py', 1)], ... flags=('fuzzy',)) - + >>> message.determine_python_format() >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) >>> from babel._compat import BytesIO diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index fa03207c4..15824235b 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -38,16 +38,16 @@ def test_nested_calls(self): extract.DEFAULT_KEYWORDS.keys(), [], {})) self.assertEqual([ - (1, '_', None, []), - (2, 'ungettext', (None, None, None), []), - (3, 'ungettext', (u'Babel', None, None), []), - (4, 'ungettext', (None, u'Babels', None), []), - (5, 'ungettext', (u'bunny', u'bunnies', None), []), - (6, 'ungettext', (None, u'bunnies', None), []), - (7, '_', None, []), - (8, 'gettext', u'Rabbit', []), - (9, 'dgettext', (u'wiki', None), []), - (10, 'dngettext', (None, u'Page', u'Pages', None), [])], + (1, '_', None, [], ()), + (2, 'ungettext', (None, None, None), [], ()), + (3, 'ungettext', (u'Babel', None, None), [], ()), + (4, 'ungettext', (None, u'Babels', None), [], ()), + (5, 'ungettext', (u'bunny', u'bunnies', None), [], ()), + (6, 'ungettext', (None, u'bunnies', None), [], ()), + (7, '_', None, [], ()), + (8, 'gettext', u'Rabbit', [], ()), + (9, 'dgettext', (u'wiki', None), [], ()), + (10, 'dngettext', (None, u'Page', u'Pages', None), [], ())], messages) def test_nested_comments(self): @@ -58,7 +58,7 @@ def test_nested_comments(self): """) messages = list(extract.extract_python(buf, ('ngettext',), ['TRANSLATORS:'], {})) - self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])], + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ())], messages) def test_comments_with_calls_that_spawn_multiple_lines(self): @@ -83,21 +83,21 @@ def test_comments_with_calls_that_spawn_multiple_lines(self): {'strip_comment_tags':False})) self.assertEqual((6, '_', 'Locale deleted.', - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], ()), messages[1]) self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.', None), - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], ()), messages[2]) self.assertEqual((3, 'ngettext', (u'Catalog deleted.', u'Catalogs deleted.', None), - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], ()), messages[0]) self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.', None), [u'NOTE: This Comment SHOULD Be Extracted', - u'NOTE: And This One Too']), + u'NOTE: And This One Too'], ()), messages[3]) def test_declarations(self): @@ -114,9 +114,9 @@ class Meta: messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) - self.assertEqual([(3, '_', u'Page arg 1', []), - (3, '_', u'Page arg 2', []), - (8, '_', u'log entry', [])], + self.assertEqual([(3, '_', u'Page arg 1', [], ()), + (3, '_', u'Page arg 2', [], ()), + (8, '_', u'log entry', [], ())], messages) def test_multiline(self): @@ -128,8 +128,8 @@ def test_multiline(self): count) """) messages = list(extract.extract_python(buf, ('ngettext',), [], {})) - self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), []), - (3, 'ngettext', (u'elvis', u'elvises', None), [])], + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ()), + (3, 'ngettext', (u'elvis', u'elvises', None), [], ())], messages) def test_triple_quoted_strings(self): @@ -141,9 +141,9 @@ def test_triple_quoted_strings(self): messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) - self.assertEqual([(1, '_', (u'pylons'), []), - (2, 'ngettext', (u'elvis', u'elvises', None), []), - (3, 'ngettext', (u'elvis', u'elvises', None), [])], + self.assertEqual([(1, '_', (u'pylons'), [], ()), + (2, 'ngettext', (u'elvis', u'elvises', None), [], ()), + (3, 'ngettext', (u'elvis', u'elvises', None), [], ())], messages) def test_multiline_strings(self): @@ -159,7 +159,7 @@ def test_multiline_strings(self): [(1, '_', u'This module provides internationalization and localization\n' 'support for your Python programs by providing an interface to ' - 'the GNU\ngettext message catalog library.', [])], + 'the GNU\ngettext message catalog library.', [], ())], messages) def test_concatenated_strings(self): @@ -400,9 +400,9 @@ def test_simple_extract(self): list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) - self.assertEqual([(1, 'simple', [], None), - (2, 'simple', [], None), - (3, ('s', 'p'), [], None)], messages) + self.assertEqual([(1, 'simple', [], None, ()), + (2, 'simple', [], None, ()), + (3, ('s', 'p'), [], None, ())], messages) def test_various_calls(self): buf = BytesIO(b"""\ @@ -420,9 +420,9 @@ def test_various_calls(self): messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) - self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), - (8, u'Rabbit', [], None), - (10, (u'Page', u'Pages'), [], None)], messages) + self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, ()), + (8, u'Rabbit', [], None, ()), + (10, (u'Page', u'Pages'), [], None, ())], messages) def test_message_with_line_comment(self): buf = BytesIO(u"""\ @@ -479,6 +479,10 @@ def test_misplaced_comments(self): self.assertEqual([], messages[2][3]) +def extract_bad(fileobj, keywords, comment_tags, options): + yield (None,) + + class ExtractTestCase(unittest.TestCase): def test_invalid_filter(self): @@ -497,14 +501,19 @@ def test_invalid_filter(self): messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) - self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), - (8, u'Rabbit', [], None), - (10, (u'Page', u'Pages'), [], None)], messages) + self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, ()), + (8, u'Rabbit', [], None, ()), + (10, (u'Page', u'Pages'), [], None, ())], messages) def test_invalid_extract_method(self): buf = BytesIO(b'') self.assertRaises(ValueError, list, extract.extract('spam', buf)) + def test_bad_extract_function(self): + self.assertRaises( + ValueError, list, + extract.extract('tests.messages.test_extract:extract_bad', '')) + def test_different_signatures(self): buf = BytesIO(b""" foo = _('foo', 'bar')