From: Frank Brehm Date: Wed, 6 Nov 2024 12:50:23 +0000 (+0100) Subject: Adding function split_parts() to module pp_admintools.common X-Git-Url: https://git.uhu-banane.net/?a=commitdiff_plain;h=258dfd56811a316cd993bb5800a524cb3701a1f4;p=pixelpark%2Fpp-admin-tools.git Adding function split_parts() to module pp_admintools.common --- diff --git a/lib/pp_admintools/app/__init__.py b/lib/pp_admintools/app/__init__.py index aa899de..2009e6c 100644 --- a/lib/pp_admintools/app/__init__.py +++ b/lib/pp_admintools/app/__init__.py @@ -264,4 +264,4 @@ if __name__ == '__main__': pass -# vim: ts=4 et list +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 list diff --git a/lib/pp_admintools/common.py b/lib/pp_admintools/common.py index 5cc84c3..6ca850c 100644 --- a/lib/pp_admintools/common.py +++ b/lib/pp_admintools/common.py @@ -19,6 +19,7 @@ from fb_tools.common import timeinterval2delta # Own modules from . import DEFAULT_TZ_OFFSET +from .errors import BrokenStringSplit from .errors import DateFormatError from .errors import WrongDateIsoformatError from .xlate import XLATOR @@ -33,7 +34,7 @@ except ImportError: _ = XLATOR.gettext ngettext = XLATOR.ngettext -__version__ = '0.1.0' +__version__ = '0.2.0' LOG = logging.getLogger(__name__) @@ -51,6 +52,16 @@ RE_DEFAULT_TZ = re.compile(PATTERN_DEFAULT_TZ) RE_TZ = re.compile(r'^\s*(?P[01]\d)(?::?(?P[0-5]\d))?') +RE_SPLIT_WS_BEGIN = re.compile(r'^\s+') +RE_SPLIT_SQ = re.compile(r"\\'") +RE_SPLIT_SQ_CHUNK = re.compile(r"^'((?:\\'|[^'])*)'") +RE_SPLIT_DQ = re.compile(r'\\"') +RE_SPLIT_DQ_CHUNK = re.compile(r'^"((?:\\"|[^"])*)"') +RE_SPLIT_UQ_CHUNK = re.compile(r'^((?:[^\s\'"]+|\\\'|\\")+)') +RE_SPLIT_WS_ALL = re.compile(r'^\s*$') +RE_SPLIT_UNBALANCED = re.compile(r'^([\'"].*)\s*') + + DEFAULT_TZ = UTC # ============================================================================= @@ -143,6 +154,96 @@ def fromisoformat(datestr): return datetime.datetime(**params) +# ============================================================================== +def split_parts(text, keep_quotes=False, raise_on_unbalanced=True): + """ + Split the given text in chunks by whitespaces or single or double quoted strings. + + @param text: the text to split in chunks + @type text: str + @param keep_quotes: keep quotes of quoted chunks + @type keep_quotes: bool + @param raise_on_unbalanced: raise an exception on unbalanced quotes + @type raise_on_unbalanced: bool + + @return: list of chunks + @rtype: list + """ + chunks = [] + if text is None: + return chunks + + txt = str(text) + last_chunk = '' + + # Big loop to split the text - until it is empty + while txt != '': + + # add chunk, if there is a chunk left and a whitspace at the begin of the line + match = RE_SPLIT_WS_BEGIN.search(txt) + if (last_chunk != '') and match: + chunks.append(last_chunk) + last_chunk = '' + + # clean the line + txt = txt.strip() + if txt == '': + break + + # search for a single quoted string at the begin of the line + match = RE_SPLIT_SQ_CHUNK.search(txt) + if match: + chunk = match.group(1) + chunk = RE_SPLIT_SQ.sub("'", chunk) + if keep_quotes: + chunk = "'" + chunk + "'" + last_chunk += chunk + txt = RE_SPLIT_SQ_CHUNK.sub('', txt) + continue + + # search for a double quoted string at the begin of the line + match = RE_SPLIT_DQ_CHUNK.search(txt) + if match: + chunk = match.group(1) + chunk = RE_SPLIT_DQ('"', chunk) + if keep_quotes: + chunk = '"' + chunk + '"' + last_chunk += chunk + txt = RE_SPLIT_DQ_CHUNK.sub('', txt) + continue + + # search for unquoted, whitespace delimited text at the begin of the line + match = RE_SPLIT_UQ_CHUNK.search(txt) + if match: + last_chunk += match.group(1) + txt = RE_SPLIT_UQ_CHUNK.sub('', txt) + continue + + # Only whitespaces left + if RE_SPLIT_WS_ALL.search(txt): + break + + # Check for unbalanced quotes + match = RE_SPLIT_UNBALANCED.search(txt) + if match: + chunk = match.group(1) + if raise_on_unbalanced: + msg = _('Unbalanced quotes in {!r}.').format(text) + raise BrokenStringSplit(msg) + else: + last_chunk += chunk + continue + + # Here we should not come to ... + msg = _('Broken split of {text!r}: {txt!r} left.').format(text=text, txt=txt) + raise BrokenStringSplit(msg) + + if last_chunk != '': + chunks.append(last_chunk) + + return chunks + + # ============================================================================= if __name__ == '__main__': diff --git a/lib/pp_admintools/errors.py b/lib/pp_admintools/errors.py index 2eec508..24adf23 100644 --- a/lib/pp_admintools/errors.py +++ b/lib/pp_admintools/errors.py @@ -14,7 +14,7 @@ from .xlate import XLATOR _ = XLATOR.gettext -__version__ = '0.9.1' +__version__ = '0.10.0' # ============================================================================= @@ -24,6 +24,13 @@ class DpxError(FbError): pass +# ============================================================================= +class BrokenStringSplit(DpxError): + """Error class on an exception inside common.split_parts().""" + + pass + + # ============================================================================= class DpxAppError(DpxError, FbAppError): """Base error class for all self defined exceptions in applications."""