# Own modules
from . import DEFAULT_TZ_OFFSET
+from .errors import BrokenStringSplit
from .errors import DateFormatError
from .errors import WrongDateIsoformatError
from .xlate import XLATOR
_ = XLATOR.gettext
ngettext = XLATOR.ngettext
-__version__ = '0.1.0'
+__version__ = '0.2.0'
LOG = logging.getLogger(__name__)
RE_TZ = re.compile(r'^\s*(?P<tz_hours>[01]\d)(?::?(?P<tz_mins>[0-5]\d))?')
+RE_SPLIT_WS_BEGIN = re.compile(r'^\s+')
+RE_SPLIT_SQ = re.compile(r"\\'")
+RE_SPLIT_SQ_CHUNK = re.compile(r"^'((?:\\'|[^'])*)'")
+RE_SPLIT_DQ = re.compile(r'\\"')
+RE_SPLIT_DQ_CHUNK = re.compile(r'^"((?:\\"|[^"])*)"')
+RE_SPLIT_UQ_CHUNK = re.compile(r'^((?:[^\s\'"]+|\\\'|\\")+)')
+RE_SPLIT_WS_ALL = re.compile(r'^\s*$')
+RE_SPLIT_UNBALANCED = re.compile(r'^([\'"].*)\s*')
+
+
DEFAULT_TZ = UTC
# =============================================================================
return datetime.datetime(**params)
+# ==============================================================================
+def split_parts(text, keep_quotes=False, raise_on_unbalanced=True):
+ """
+ Split the given text in chunks by whitespaces or single or double quoted strings.
+
+ @param text: the text to split in chunks
+ @type text: str
+ @param keep_quotes: keep quotes of quoted chunks
+ @type keep_quotes: bool
+ @param raise_on_unbalanced: raise an exception on unbalanced quotes
+ @type raise_on_unbalanced: bool
+
+ @return: list of chunks
+ @rtype: list
+ """
+ chunks = []
+ if text is None:
+ return chunks
+
+ txt = str(text)
+ last_chunk = ''
+
+ # Big loop to split the text - until it is empty
+ while txt != '':
+
+ # add chunk, if there is a chunk left and a whitspace at the begin of the line
+ match = RE_SPLIT_WS_BEGIN.search(txt)
+ if (last_chunk != '') and match:
+ chunks.append(last_chunk)
+ last_chunk = ''
+
+ # clean the line
+ txt = txt.strip()
+ if txt == '':
+ break
+
+ # search for a single quoted string at the begin of the line
+ match = RE_SPLIT_SQ_CHUNK.search(txt)
+ if match:
+ chunk = match.group(1)
+ chunk = RE_SPLIT_SQ.sub("'", chunk)
+ if keep_quotes:
+ chunk = "'" + chunk + "'"
+ last_chunk += chunk
+ txt = RE_SPLIT_SQ_CHUNK.sub('', txt)
+ continue
+
+ # search for a double quoted string at the begin of the line
+ match = RE_SPLIT_DQ_CHUNK.search(txt)
+ if match:
+ chunk = match.group(1)
+ chunk = RE_SPLIT_DQ('"', chunk)
+ if keep_quotes:
+ chunk = '"' + chunk + '"'
+ last_chunk += chunk
+ txt = RE_SPLIT_DQ_CHUNK.sub('', txt)
+ continue
+
+ # search for unquoted, whitespace delimited text at the begin of the line
+ match = RE_SPLIT_UQ_CHUNK.search(txt)
+ if match:
+ last_chunk += match.group(1)
+ txt = RE_SPLIT_UQ_CHUNK.sub('', txt)
+ continue
+
+ # Only whitespaces left
+ if RE_SPLIT_WS_ALL.search(txt):
+ break
+
+ # Check for unbalanced quotes
+ match = RE_SPLIT_UNBALANCED.search(txt)
+ if match:
+ chunk = match.group(1)
+ if raise_on_unbalanced:
+ msg = _('Unbalanced quotes in {!r}.').format(text)
+ raise BrokenStringSplit(msg)
+ else:
+ last_chunk += chunk
+ continue
+
+ # Here we should not come to ...
+ msg = _('Broken split of {text!r}: {txt!r} left.').format(text=text, txt=txt)
+ raise BrokenStringSplit(msg)
+
+ if last_chunk != '':
+ chunks.append(last_chunk)
+
+ return chunks
+
+
# =============================================================================
if __name__ == '__main__':