From: Frank Brehm Date: Tue, 16 Apr 2024 10:28:26 +0000 (+0200) Subject: Exporting results into a CSV file in bin/get-intern-used-from-addresses X-Git-Url: https://git.uhu-banane.net/?a=commitdiff_plain;h=443f165660145fe948edde9c4d2135202c4c4ab3;p=pixelpark%2Fpp-admin-tools.git Exporting results into a CSV file in bin/get-intern-used-from-addresses --- diff --git a/.gitignore b/.gitignore index 531bb82..65df378 100644 --- a/.gitignore +++ b/.gitignore @@ -22,5 +22,5 @@ dist/ tmp/* venv/* etc/*.ini -get-from-addr-totals.yaml +get-from-addr-totals*.yaml get-from-addr-per-*.csv diff --git a/lib/pp_admintools/app/get_from_addr.py b/lib/pp_admintools/app/get_from_addr.py index 254f1e3..6c299a0 100644 --- a/lib/pp_admintools/app/get_from_addr.py +++ b/lib/pp_admintools/app/get_from_addr.py @@ -9,6 +9,8 @@ from __future__ import absolute_import # Standard modules +import copy +import csv import ipaddress import logging import re @@ -28,7 +30,7 @@ from ..errors import DpxFileError from ..handler.pflogparse import PostfixLogfileParser from ..xlate import XLATOR -__version__ = '0.8.0' +__version__ = '0.8.1' LOG = logging.getLogger(__name__) _ = XLATOR.gettext @@ -49,7 +51,6 @@ class GetFromAddressesApp(BaseDPXApplication): default_limit = 10 default_totals_yaml_file = Path('get-from-addr-totals.yaml') - default_stats_per_address_csv_file = Path('get-from-addr-per-address.csv') default_stats_per_domain_csv_file = Path('get-from-addr-per-domain.csv') re_mail_domain = re.compile(r'.*@') @@ -91,9 +92,9 @@ class GetFromAddressesApp(BaseDPXApplication): self.per_domain = {} self.per_address = {} + self.totals = {} self.totals_yaml_file = self.default_totals_yaml_file - self.stats_per_address_csv_file = self.default_stats_per_address_csv_file self.stats_per_domain_csv_file = self.default_stats_per_domain_csv_file self.oldest_entry = None @@ -142,15 +143,6 @@ class GetFromAddressesApp(BaseDPXApplication): 'Dafault: {!r}.').format(str(self.totals_yaml_file)) ) - app_group.add_argument( - '--address-csv-file', metavar=_('FILE'), dest='address_csv_file', - action=OutputFileOptionAction, - help=_( - 'The output CSV file about found FROM addresses for importing in a spread sheet ' - 'or into a database. Default: {!r}').format( - str(self.stats_per_address_csv_file)) - ) - app_group.add_argument( '--domain-csv-file', metavar=_('FILE'), dest='domain_csv_file', action=OutputFileOptionAction, @@ -186,10 +178,6 @@ class GetFromAddressesApp(BaseDPXApplication): if totals_yaml_file: self.totals_yaml_file = totals_yaml_file - address_csv_file = getattr(self.args, 'address_csv_file', None) - if address_csv_file: - self.stats_per_address_csv_file = address_csv_file - domain_csv_file = getattr(self.args, 'domain_csv_file', None) if domain_csv_file: self.stats_per_domain_csv_file = domain_csv_file @@ -197,7 +185,9 @@ class GetFromAddressesApp(BaseDPXApplication): if getattr(self.args, 'show_address_results', False): self.show_address_results = True - self.limit = getattr(self.args, 'limit', self.default_limit) + limit = getattr(self.args, 'limit', self.default_limit) + if limit: + self.limit = limit if self.args.logfiles: self._eval_given_logfiles() @@ -321,11 +311,12 @@ class GetFromAddressesApp(BaseDPXApplication): self.per_domain[loghost][domain] = 0 self.per_domain[loghost][domain] += 1 - if loghost not in self.per_address: - self.per_address[loghost] = {} - if address not in self.per_address[loghost]: - self.per_address[loghost][address] = 0 - self.per_address[loghost][address] += 1 + if self.show_address_results: + if loghost not in self.per_address: + self.per_address[loghost] = {} + if address not in self.per_address[loghost]: + self.per_address[loghost][address] = 0 + self.per_address[loghost][address] += 1 # ------------------------------------------------------------------------- def _is_local_client(self, chain): @@ -397,10 +388,12 @@ class GetFromAddressesApp(BaseDPXApplication): if self.verbose > 2: self.empty_line() LOG.debug(_('Results per domain:') + '\n' + pp(self.per_domain)) - self.empty_line() - LOG.debug(_('Results per address:') + '\n' + pp(self.per_address)) + if self.show_address_results: + self.empty_line() + LOG.debug(_('Results per address:') + '\n' + pp(self.per_address)) self._generate_totals() + self._write_domain_csv() if self.oldest_entry or self.newest_entry: self.empty_line() @@ -421,36 +414,28 @@ class GetFromAddressesApp(BaseDPXApplication): def _generate_totals(self): totals_file_abs = self.totals_yaml_file.resolve() - if self.verbose > 0: + if self.verbose > 1: LOG.debug(_('Using totals YAML file {!r}.').format(str(totals_file_abs))) - totals = self._read_totals(totals_file_abs) + self.totals = self._read_totals(totals_file_abs) - for loghost in self.per_address.keys(): - if loghost not in totals: - totals[loghost] = {} - - totals[loghost]['per_address'] = {} - for address in self.per_address[loghost].keys(): - count = self.per_address[loghost][address] - totals[loghost]['per_address'][address] = count - - for loghost in self.per_address.keys(): - if loghost not in totals: - totals[loghost] = {} + for loghost in self.per_domain.keys(): + if loghost not in self.totals: + self.totals[loghost] = {} - totals[loghost]['per_domain'] = {} + self.totals[loghost] = {} for domain in self.per_domain[loghost].keys(): count = self.per_domain[loghost][domain] - totals[loghost]['per_domain'][domain] = count + self.totals[loghost][domain] = count - if self.verbose > 0: + if self.verbose > 2: self.empty_line() - LOG.debug(_('Results total:') + '\n' + pp(totals)) + LOG.debug(_('Results total:') + '\n' + pp(self.totals)) + self.empty_line() LOG.debug(_('Writing file {!r} ...').format(str(totals_file_abs))) with totals_file_abs.open('wt', **self.open_args) as fh: - yaml.safe_dump(totals, fh, explicit_start=True) + yaml.safe_dump(self.totals, fh, explicit_start=True) # ------------------------------------------------------------------------- def _read_totals(self, totals_file_abs): @@ -463,13 +448,59 @@ class GetFromAddressesApp(BaseDPXApplication): self.exit(3) if totals_file_abs.exists(): - if self.verbose > 0: + if self.verbose > 2: LOG.debug(_('Reading file {!r} ...').format(str(totals_file_abs))) with totals_file_abs.open('rt', **self.open_args) as fh: totals = yaml.safe_load(fh) return totals + # ------------------------------------------------------------------------- + def _write_domain_csv(self): + + LOG.debug(_('Preparing CSV table for domains ...')) + loghosts = [] + domain_table = {} + + for loghost in sorted(self.totals.keys(), key=str.lower): + loghosts.append(loghost) + loghosts.append('total') + headers = ['domain'] + loghosts + + for loghost in self.totals.keys(): + + for domain in self.totals[loghost].keys(): + count = self.totals[loghost][domain] + + if domain not in domain_table: + domain_table[domain] = {} + + domain_table[domain][loghost] = count + + for lh in loghosts: + if lh not in domain_table[domain]: + domain_table[domain][lh] = 0 + + domain_table[domain]['total'] += count + + if self.verbose > 1: + self.empty_line() + LOG.debug(_('Prepared domains CSV table:') + '\n' + pp(domain_table)) + LOG.debug(_('Writing CSV file {!r} ...').format(str(self.stats_per_domain_csv_file))) + + with self.stats_per_domain_csv_file.open('w', newline='') as csvfile: + writer = csv.DictWriter( + csvfile, fieldnames=headers, quoting=csv.QUOTE_MINIMAL, + delimiter=',', quotechar='"') + writer.writeheader() + for domain in sorted( + domain_table.keys(), + key=lambda x: domain_table[x]['total'], + reverse=True): + row = copy.copy(domain_table[domain]) + row['domain'] = domain + writer.writerow(row) + # ------------------------------------------------------------------------- def _output_results_per_address(self):