]> Frank Brehm's Git Trees - pixelpark/pp-admin-tools.git/commitdiff
Exporting results into a CSV file in bin/get-intern-used-from-addresses
authorFrank Brehm <frank.brehm@pixelpark.com>
Tue, 16 Apr 2024 10:28:26 +0000 (12:28 +0200)
committerFrank Brehm <frank.brehm@pixelpark.com>
Tue, 16 Apr 2024 10:28:26 +0000 (12:28 +0200)
.gitignore
lib/pp_admintools/app/get_from_addr.py

index 531bb82f953bd7ad34708466188905540ea042e2..65df378a9409249c6074839b8c3e17004c513afe 100644 (file)
@@ -22,5 +22,5 @@ dist/
 tmp/*
 venv/*
 etc/*.ini
-get-from-addr-totals.yaml
+get-from-addr-totals*.yaml
 get-from-addr-per-*.csv
index 254f1e3aa4df923883bf79c69e581f43f19a0c6c..6c299a0d1a1589aac8ccd5638dcd115be2624817 100644 (file)
@@ -9,6 +9,8 @@
 from __future__ import absolute_import
 
 # Standard modules
+import copy
+import csv
 import ipaddress
 import logging
 import re
@@ -28,7 +30,7 @@ from ..errors import DpxFileError
 from ..handler.pflogparse import PostfixLogfileParser
 from ..xlate import XLATOR
 
-__version__ = '0.8.0'
+__version__ = '0.8.1'
 LOG = logging.getLogger(__name__)
 
 _ = XLATOR.gettext
@@ -49,7 +51,6 @@ class GetFromAddressesApp(BaseDPXApplication):
     default_limit = 10
 
     default_totals_yaml_file = Path('get-from-addr-totals.yaml')
-    default_stats_per_address_csv_file = Path('get-from-addr-per-address.csv')
     default_stats_per_domain_csv_file = Path('get-from-addr-per-domain.csv')
 
     re_mail_domain = re.compile(r'.*@')
@@ -91,9 +92,9 @@ class GetFromAddressesApp(BaseDPXApplication):
 
         self.per_domain = {}
         self.per_address = {}
+        self.totals = {}
 
         self.totals_yaml_file = self.default_totals_yaml_file
-        self.stats_per_address_csv_file = self.default_stats_per_address_csv_file
         self.stats_per_domain_csv_file = self.default_stats_per_domain_csv_file
 
         self.oldest_entry = None
@@ -142,15 +143,6 @@ class GetFromAddressesApp(BaseDPXApplication):
                 'Dafault: {!r}.').format(str(self.totals_yaml_file))
         )
 
-        app_group.add_argument(
-            '--address-csv-file', metavar=_('FILE'), dest='address_csv_file',
-            action=OutputFileOptionAction,
-            help=_(
-                'The output CSV file about found FROM addresses for importing in a spread sheet '
-                'or into a database. Default: {!r}').format(
-                str(self.stats_per_address_csv_file))
-        )
-
         app_group.add_argument(
             '--domain-csv-file', metavar=_('FILE'), dest='domain_csv_file',
             action=OutputFileOptionAction,
@@ -186,10 +178,6 @@ class GetFromAddressesApp(BaseDPXApplication):
         if totals_yaml_file:
             self.totals_yaml_file = totals_yaml_file
 
-        address_csv_file = getattr(self.args, 'address_csv_file', None)
-        if address_csv_file:
-            self.stats_per_address_csv_file = address_csv_file
-
         domain_csv_file = getattr(self.args, 'domain_csv_file', None)
         if domain_csv_file:
             self.stats_per_domain_csv_file = domain_csv_file
@@ -197,7 +185,9 @@ class GetFromAddressesApp(BaseDPXApplication):
         if getattr(self.args, 'show_address_results', False):
             self.show_address_results = True
 
-        self.limit = getattr(self.args, 'limit', self.default_limit)
+        limit = getattr(self.args, 'limit', self.default_limit)
+        if limit:
+            self.limit = limit
 
         if self.args.logfiles:
             self._eval_given_logfiles()
@@ -321,11 +311,12 @@ class GetFromAddressesApp(BaseDPXApplication):
                 self.per_domain[loghost][domain] = 0
             self.per_domain[loghost][domain] += 1
 
-            if loghost not in self.per_address:
-                self.per_address[loghost] = {}
-            if address not in self.per_address[loghost]:
-                self.per_address[loghost][address] = 0
-            self.per_address[loghost][address] += 1
+            if self.show_address_results:
+                if loghost not in self.per_address:
+                    self.per_address[loghost] = {}
+                if address not in self.per_address[loghost]:
+                    self.per_address[loghost][address] = 0
+                self.per_address[loghost][address] += 1
 
     # -------------------------------------------------------------------------
     def _is_local_client(self, chain):
@@ -397,10 +388,12 @@ class GetFromAddressesApp(BaseDPXApplication):
         if self.verbose > 2:
             self.empty_line()
             LOG.debug(_('Results per domain:') + '\n' + pp(self.per_domain))
-            self.empty_line()
-            LOG.debug(_('Results per address:') + '\n' + pp(self.per_address))
+            if self.show_address_results:
+                self.empty_line()
+                LOG.debug(_('Results per address:') + '\n' + pp(self.per_address))
 
         self._generate_totals()
+        self._write_domain_csv()
 
         if self.oldest_entry or self.newest_entry:
             self.empty_line()
@@ -421,36 +414,28 @@ class GetFromAddressesApp(BaseDPXApplication):
     def _generate_totals(self):
 
         totals_file_abs = self.totals_yaml_file.resolve()
-        if self.verbose > 0:
+        if self.verbose > 1:
             LOG.debug(_('Using totals YAML file {!r}.').format(str(totals_file_abs)))
 
-        totals = self._read_totals(totals_file_abs)
+        self.totals = self._read_totals(totals_file_abs)
 
-        for loghost in self.per_address.keys():
-            if loghost not in totals:
-                totals[loghost] = {}
-
-            totals[loghost]['per_address'] = {}
-            for address in self.per_address[loghost].keys():
-                count = self.per_address[loghost][address]
-                totals[loghost]['per_address'][address] = count
-
-        for loghost in self.per_address.keys():
-            if loghost not in totals:
-                totals[loghost] = {}
+        for loghost in self.per_domain.keys():
+            if loghost not in self.totals:
+                self.totals[loghost] = {}
 
-            totals[loghost]['per_domain'] = {}
+            self.totals[loghost] = {}
             for domain in self.per_domain[loghost].keys():
                 count = self.per_domain[loghost][domain]
-                totals[loghost]['per_domain'][domain] = count
+                self.totals[loghost][domain] = count
 
-        if self.verbose > 0:
+        if self.verbose > 2:
             self.empty_line()
-            LOG.debug(_('Results total:') + '\n' + pp(totals))
+            LOG.debug(_('Results total:') + '\n' + pp(self.totals))
 
+        self.empty_line()
         LOG.debug(_('Writing file {!r} ...').format(str(totals_file_abs)))
         with totals_file_abs.open('wt', **self.open_args) as fh:
-            yaml.safe_dump(totals, fh, explicit_start=True)
+            yaml.safe_dump(self.totals, fh, explicit_start=True)
 
     # -------------------------------------------------------------------------
     def _read_totals(self, totals_file_abs):
@@ -463,13 +448,59 @@ class GetFromAddressesApp(BaseDPXApplication):
             self.exit(3)
 
         if totals_file_abs.exists():
-            if self.verbose > 0:
+            if self.verbose > 2:
                 LOG.debug(_('Reading file {!r} ...').format(str(totals_file_abs)))
             with totals_file_abs.open('rt', **self.open_args) as fh:
                 totals = yaml.safe_load(fh)
 
         return totals
 
+    # -------------------------------------------------------------------------
+    def _write_domain_csv(self):
+
+        LOG.debug(_('Preparing CSV table for domains ...'))
+        loghosts = []
+        domain_table = {}
+
+        for loghost in sorted(self.totals.keys(), key=str.lower):
+            loghosts.append(loghost)
+        loghosts.append('total')
+        headers = ['domain'] + loghosts
+
+        for loghost in self.totals.keys():
+
+            for domain in self.totals[loghost].keys():
+                count = self.totals[loghost][domain]
+
+                if domain not in domain_table:
+                    domain_table[domain] = {}
+
+                domain_table[domain][loghost] = count
+
+                for lh in loghosts:
+                    if lh not in domain_table[domain]:
+                        domain_table[domain][lh] = 0
+
+                domain_table[domain]['total'] += count
+
+        if self.verbose > 1:
+            self.empty_line()
+            LOG.debug(_('Prepared domains CSV table:') + '\n' + pp(domain_table))
+        LOG.debug(_('Writing CSV file {!r} ...').format(str(self.stats_per_domain_csv_file)))
+
+        with self.stats_per_domain_csv_file.open('w', newline='') as csvfile:
+            writer = csv.DictWriter(
+                csvfile, fieldnames=headers, quoting=csv.QUOTE_MINIMAL,
+                delimiter=',', quotechar='"')
+            writer.writeheader()
+            for domain in sorted(
+                    domain_table.keys(),
+                    key=lambda x: domain_table[x]['total'],
+                    reverse=True):
+                row = copy.copy(domain_table[domain])
+                row['domain'] = domain
+                writer.writerow(row)
+
     # -------------------------------------------------------------------------
     def _output_results_per_address(self):