# Own modules
-__version__ = '0.2.1'
+__version__ = '0.2.2'
LOG = logging.getLogger(__name__)
DEFAULT_COMMENT_CHAR = '#'
pat_linejunk = r'^\s*(?:\#.*)?$'
re_linejunk = re.compile(pat_linejunk)
- re_empty = re.compile(r'^\s*$')
# -------------------------------------------------------------------------
@classmethod
return cls.re_linejunk.search(line) is not None
# -------------------------------------------------------------------------
- def __init__(self, comment_chars=DEFAULT_COMMENT_CHAR, ignore_empty=False,
+ def __init__(self, comment_chars=None, ignore_empty=False,
ignore_whitespace=False, ignore_comment=False, case_insensitive=False):
self.comment_chars = []
- self.comment_re = []
- self.token_re = []
+ self.re_comment_list = []
+ self.re_token_list = []
self.ignore_empty = ignore_empty
self.ignore_whitespace = ignore_whitespace
self.ignore_comment = ignore_comment
self.comment_chars.append(str(char))
else:
self.comment_chars.append(str(comment_chars))
+ elif comment_chars is None:
+ self.comment_chars.append(DEFAULT_COMMENT_CHAR)
super(ConfigDiffer, self).__init__(
linejunk=IS_LINE_JUNK, charjunk=IS_CHARACTER_JUNK)
- pat = r'^(\s*"(?:[^"]|\\")*")'
- self.token_re.append(re.compile(pat))
- pat = r"^(\s*'(?:[^']|\\')*')"
- self.token_re.append(re.compile(pat))
+ re_flags = re.MULTILINE
+ if six.PY3:
+ re_flags = re.MULTILINE | re.UNICODE
+
+ # a single quoted token
+ pat = r"^(\s*'(?:\\(?!')|\\'|(?:(?<!\\)[^']))*)(?#single-q-token)"
+ self.re_token_list.append(re.compile(pat, re_flags))
+
+ # a double quoted token
+ pat = r'^(\s*"(?:\\(?!")|\\"|(?:(?<!\\)[^"]))*")(?#double-q-token)'
+ self.re_token_list.append(re.compile(pat, re_flags))
+
+ # a token without quotings
+ pat = r'^(\s*(?:[^\s"' + r"'" + r']+|\\["' + r"'" + r'])+)(?#token-wo-quote)'
+ self.re_token_list.append(re.compile(pat, re_flags))
+
+ self.re_empty = re.compile(r'^(\s*)$')
if self.comment_chars:
+ i = 0
for char in self.comment_chars:
- pat = r'^\s*[^"' + r"'" + r']*?' + re.escape(char) + r'.*$'
- self.comment_re.append(re.compile(pat, re.MULTILINE))
+ pat = r'^\s*' + re.escape(char) + r'.*$(?#sole-comment)'
+ self.re_comment_list.append(re.compile(pat, re_flags))
+
+ pat = (
+ r'^(\s*"(?:[^"](?!' + re.escape(char) + r'))*)\s*' +
+ re.escape(char) + r'.*$(?#comment-{}-wo-dq)'.format(i))
+ self.re_token_list.append(re.compile(pat, re_flags))
+
+ pat = (
+ r"^(\s*'(?:[^'](?!" + re.escape(char) + r'))*)\s*' +
+ re.escape(char) + r'.*$(?#comment-{}-wo-sq)'.format(i))
+ self.re_token_list.append(re.compile(pat, re_flags))
+
+ i += 1
# -------------------------------------------------------------------------
def is_equal(self, a, b):
# -------------------------------------------------------------------------
def remove_comments(self, line):
- if not self.comment_chars:
+ if not self.re_comment_list:
+ # LOG.debug('line false %r', line)
+ return line
+
+ if self.re_empty.match(line):
+ # LOG.debug('line empty %r', line)
return line
+ old_line = line
+ new_line = ''
+
+ while True:
+
+ # LOG.debug('loop: old_line: %r, new_line: %r', old_line, new_line)
+
+ for regex in self.re_comment_list:
+ if regex.search(old_line):
+ new_line += regex.sub('', old_line)
+ # LOG.debug(
+ # 'found comment: old_line: %r, new_line: %r, pattern: %r',
+ # old_line, new_line, regex.pattern)
+ return new_line
+
+ token_found = False
+ for regex in self.re_token_list:
+ match = regex.search(old_line)
+ if match:
+ new_line += match.group(1)
+ old_line = regex.sub('', old_line)
+ # LOG.debug(
+ # 'found token: old_line: %r, new_line: %r, pattern: %r',
+ # old_line, new_line, regex.pattern)
+ token_found = True
+ break
+
+ match = self.re_empty.search(old_line)
+ if match:
+ # LOG.debug('old_line empty %r', old_line)
+ new_line += match.group(1)
+ return new_line
+
+ if token_found:
+ continue
+
+ return new_line + old_line
+
+
# -------------------------------------------------------------------------
def __str__(self):
"""
from pp_lib.differ import ConfigDiffer
- d = ConfigDiffer()
+ ccs = ['#', '//']
+ d = ConfigDiffer(
+ comment_chars=ccs, ignore_empty=True,
+ ignore_whitespace=True, ignore_comment=True, case_insensitive=True)
LOG.debug("ConfigDiffer %%r: %r", d)
LOG.debug("ConfigDiffer %%s:\n%s", pp(d.__dict__))
+ self.assertEqual(d.comment_chars, ccs)
+ self.assertTrue(d.ignore_empty)
+ self.assertTrue(d.ignore_whitespace)
+ self.assertTrue(d.ignore_comment)
+ self.assertTrue(d.case_insensitive)
+
+ d = None
+
+ # -------------------------------------------------------------------------
+ def test_removing_comments(self):
+
+ LOG.info("Testing removing icomments from lines ...")
+
+ from pp_lib.differ import ConfigDiffer
+
+ ccs = ['#', '//']
+ d = ConfigDiffer(comment_chars=ccs, ignore_empty=True)
+
+ if self.verbose > 2:
+ LOG.debug("ConfigDiffer %%r: %r", d)
+ LOG.debug("ConfigDiffer %%s:\n%s", pp(d.__dict__))
+
+ lines = (
+ ('', ''),
+ (' ', ' '),
+ ('\n', '\n'),
+ (' \n', ' \n'),
+ ('bla = suelz', 'bla = suelz'),
+ ('narf "bla = suelz"', 'narf "bla = suelz"'),
+ ("narf 'bla = suelz', \'hallo\'", "narf 'bla = suelz', \'hallo\'"),
+ ('"hh \\" bb" jj', '"hh \\" bb" jj'),
+ ('"hh \\" bb" // comment', '"hh \\" bb"'),
+ ('# Comment', ''),
+ ('// Comment', ''),
+ (' # Comment', ''),
+ (' " # ", # Comment', ' " # ",'),
+ ('ggg " bla', 'ggg " bla'),
+ ('ggg " bla # uhu', 'ggg " bla'),
+ ('ggg " bla // uhu', 'ggg " bla'),
+ )
+
+ for pair in lines:
+ origin = pair[0]
+ expected = pair[1]
+
+ LOG.debug("Testing origin %r -> expected %r", origin, expected)
+ result = d.remove_comments(origin)
+ LOG.debug("Got %r.", result)
+
+ self.assertEqual(result, expected)
# =============================================================================
suite.addTest(ConfigDifferTest('test_import', verbose))
suite.addTest(ConfigDifferTest('test_init_config_differ', verbose))
- #suite.addTest(ConfigDifferTest('test_init_config_file_differ', verbose))
+ suite.addTest(ConfigDifferTest('test_removing_comments', verbose))
runner = unittest.TextTestRunner(verbosity=verbose)