arches_extensions.management.commands.run_db_backup

  1import os
  2from datetime import datetime
  3import subprocess
  4from pathlib import Path
  5
  6from django.conf import settings
  7from django.core.management.base import BaseCommand
  8
  9from arches_extensions.utils import ArchesHelpTextFormatter
 10
 11class Command(BaseCommand):
 12    """Run pg_dump to SQL file. Creates a local file and then uploads to S3.
 13
 14Follows this backup pattern:
 15
 161. Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket
 172. Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally)
 18
 19Local storage directory structure:
 20
 21```
 22.db_backups/
 23    daily/
 24        <YYYYMMDD>__<db name>.sql <- repeated for the last 10 days
 25```
 26
 27S3 bucket directory structure:
 28
 29```
 30bucket_name/
 31    daily/ <- synced from local daily directory (see above)
 32    <YYYY>/ <- all 1st and 15th of the month backups for one year
 33        <YYYY>0101__<db name>.sql <- Jan 1st backup
 34        <YYYY>0115__<db name>.sql <- Jan 15th backup
 35        <YYYY>0201__<db name>.sql <- etc.
 36```
 37
 38This command assumes you have the [AWS CLI tool](https://aws.amazon.com/cli/) installed and configured.
 39
 40Args:
 41    
 42- `bucket-name`: Name of the S3 bucket the backups will be synced to.
 43- `--aws-profile`: Optionally pass a specific aws profile to be used for the `aws s3 cp ...` command
 44- `--skip-sync`: Don't sync the daily backups to S3 (useful during testing)
 45- `--skip-rotate`: "Don't rotate i.e. trim off dailies older than 10 days"
 46    """
 47
 48    def __init__(self, *args, **kwargs):
 49        self.help = self.__doc__
 50
 51    def add_arguments(self, parser):
 52        parser.formatter_class = ArchesHelpTextFormatter
 53        parser.add_argument(
 54            "bucket-name",
 55            help="Name of the S3 bucket the backups will be synced to.",
 56        )
 57        parser.add_argument(
 58            "--aws-profile",
 59            help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command"
 60        )
 61        parser.add_argument(
 62            "--skip-sync",
 63            action="store_true",
 64            help="Don't sync the daily backups to S3 (useful during testing)"
 65        )
 66        parser.add_argument(
 67            "--skip-rotate",
 68            action="store_true",
 69            help="Don't rotate i.e. trim off dailies older than 10 days"
 70        )
 71
 72    def handle(self, *args, **options):
 73
 74        def apply_profile(cmd: list):
 75            if options['aws_profile']:
 76                cmd += ["--profile", options['aws_profile']]
 77            return cmd
 78
 79        bucket = options['bucket-name']
 80        now = datetime.now()
 81
 82        backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily")
 83        backup_dir.mkdir(exist_ok=True, parents=True)
 84
 85        db_name = settings.DATABASES['default']['NAME']
 86        db_user = settings.DATABASES['default']['USER']
 87        db_host = settings.DATABASES['default']['HOST']
 88        db_pass = settings.DATABASES['default']['PASSWORD']
 89        db_port = settings.DATABASES['default']['PORT']
 90
 91        fname = now.strftime(f"%Y%m%d__{db_name}.sql")
 92        fpath = Path(backup_dir, fname)
 93
 94        cmd = [
 95            "pg_dump",
 96            "-U", db_user,
 97            "-h", db_host,
 98            "-p", str(db_port),
 99            "-f", str(fpath.resolve()),
100            db_name,
101        ]
102
103        use_env = os.environ.copy()
104        use_env['PGPASSWORD'] = db_pass
105        p = subprocess.Popen(cmd, env=use_env)
106        exit_code = p.wait()
107
108        day, year = now.day, now.year
109
110        ## on the 1st and 15th of the month upload the dump to yearly archive
111        if day in [1, 15]:
112
113            cmd2 = [
114                "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/"
115            ]
116            cmd2 = apply_profile(cmd2)
117            subprocess.run(cmd2)
118
119        if not options["skip_rotate"]:
120            ## cleanup any local files that are over 10 days old.
121            ## sort all of the existing local daily files
122            local_dailies = sorted(list(backup_dir.glob("*")))
123            limit = 10
124            ## slice off any local dailies beyond the limit
125            for path in local_dailies[:-limit]:
126                os.remove(path)
127
128        if not options["skip_sync"]:
129            cmd3 = [
130                "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete"
131            ]
132            cmd3 = apply_profile(cmd3)
133            subprocess.run(cmd3)
class Command(django.core.management.base.BaseCommand):
 12class Command(BaseCommand):
 13    """Run pg_dump to SQL file. Creates a local file and then uploads to S3.
 14
 15Follows this backup pattern:
 16
 171. Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket
 182. Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally)
 19
 20Local storage directory structure:
 21
 22```
 23.db_backups/
 24    daily/
 25        <YYYYMMDD>__<db name>.sql <- repeated for the last 10 days
 26```
 27
 28S3 bucket directory structure:
 29
 30```
 31bucket_name/
 32    daily/ <- synced from local daily directory (see above)
 33    <YYYY>/ <- all 1st and 15th of the month backups for one year
 34        <YYYY>0101__<db name>.sql <- Jan 1st backup
 35        <YYYY>0115__<db name>.sql <- Jan 15th backup
 36        <YYYY>0201__<db name>.sql <- etc.
 37```
 38
 39This command assumes you have the [AWS CLI tool](https://aws.amazon.com/cli/) installed and configured.
 40
 41Args:
 42    
 43- `bucket-name`: Name of the S3 bucket the backups will be synced to.
 44- `--aws-profile`: Optionally pass a specific aws profile to be used for the `aws s3 cp ...` command
 45- `--skip-sync`: Don't sync the daily backups to S3 (useful during testing)
 46- `--skip-rotate`: "Don't rotate i.e. trim off dailies older than 10 days"
 47    """
 48
 49    def __init__(self, *args, **kwargs):
 50        self.help = self.__doc__
 51
 52    def add_arguments(self, parser):
 53        parser.formatter_class = ArchesHelpTextFormatter
 54        parser.add_argument(
 55            "bucket-name",
 56            help="Name of the S3 bucket the backups will be synced to.",
 57        )
 58        parser.add_argument(
 59            "--aws-profile",
 60            help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command"
 61        )
 62        parser.add_argument(
 63            "--skip-sync",
 64            action="store_true",
 65            help="Don't sync the daily backups to S3 (useful during testing)"
 66        )
 67        parser.add_argument(
 68            "--skip-rotate",
 69            action="store_true",
 70            help="Don't rotate i.e. trim off dailies older than 10 days"
 71        )
 72
 73    def handle(self, *args, **options):
 74
 75        def apply_profile(cmd: list):
 76            if options['aws_profile']:
 77                cmd += ["--profile", options['aws_profile']]
 78            return cmd
 79
 80        bucket = options['bucket-name']
 81        now = datetime.now()
 82
 83        backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily")
 84        backup_dir.mkdir(exist_ok=True, parents=True)
 85
 86        db_name = settings.DATABASES['default']['NAME']
 87        db_user = settings.DATABASES['default']['USER']
 88        db_host = settings.DATABASES['default']['HOST']
 89        db_pass = settings.DATABASES['default']['PASSWORD']
 90        db_port = settings.DATABASES['default']['PORT']
 91
 92        fname = now.strftime(f"%Y%m%d__{db_name}.sql")
 93        fpath = Path(backup_dir, fname)
 94
 95        cmd = [
 96            "pg_dump",
 97            "-U", db_user,
 98            "-h", db_host,
 99            "-p", str(db_port),
100            "-f", str(fpath.resolve()),
101            db_name,
102        ]
103
104        use_env = os.environ.copy()
105        use_env['PGPASSWORD'] = db_pass
106        p = subprocess.Popen(cmd, env=use_env)
107        exit_code = p.wait()
108
109        day, year = now.day, now.year
110
111        ## on the 1st and 15th of the month upload the dump to yearly archive
112        if day in [1, 15]:
113
114            cmd2 = [
115                "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/"
116            ]
117            cmd2 = apply_profile(cmd2)
118            subprocess.run(cmd2)
119
120        if not options["skip_rotate"]:
121            ## cleanup any local files that are over 10 days old.
122            ## sort all of the existing local daily files
123            local_dailies = sorted(list(backup_dir.glob("*")))
124            limit = 10
125            ## slice off any local dailies beyond the limit
126            for path in local_dailies[:-limit]:
127                os.remove(path)
128
129        if not options["skip_sync"]:
130            cmd3 = [
131                "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete"
132            ]
133            cmd3 = apply_profile(cmd3)
134            subprocess.run(cmd3)

Run pg_dump to SQL file. Creates a local file and then uploads to S3.

Follows this backup pattern:

  1. Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket
  2. Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally)

Local storage directory structure:

.db_backups/
    daily/
        <YYYYMMDD>__<db name>.sql <- repeated for the last 10 days

S3 bucket directory structure:

bucket_name/
    daily/ <- synced from local daily directory (see above)
    <YYYY>/ <- all 1st and 15th of the month backups for one year
        <YYYY>0101__<db name>.sql <- Jan 1st backup
        <YYYY>0115__<db name>.sql <- Jan 15th backup
        <YYYY>0201__<db name>.sql <- etc.

This command assumes you have the AWS CLI tool installed and configured.

Args:

  • bucket-name: Name of the S3 bucket the backups will be synced to.
  • --aws-profile: Optionally pass a specific aws profile to be used for the aws s3 cp ... command
  • --skip-sync: Don't sync the daily backups to S3 (useful during testing)
  • --skip-rotate: "Don't rotate i.e. trim off dailies older than 10 days"
Command(*args, **kwargs)
49    def __init__(self, *args, **kwargs):
50        self.help = self.__doc__
help = ''
def add_arguments(self, parser):
52    def add_arguments(self, parser):
53        parser.formatter_class = ArchesHelpTextFormatter
54        parser.add_argument(
55            "bucket-name",
56            help="Name of the S3 bucket the backups will be synced to.",
57        )
58        parser.add_argument(
59            "--aws-profile",
60            help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command"
61        )
62        parser.add_argument(
63            "--skip-sync",
64            action="store_true",
65            help="Don't sync the daily backups to S3 (useful during testing)"
66        )
67        parser.add_argument(
68            "--skip-rotate",
69            action="store_true",
70            help="Don't rotate i.e. trim off dailies older than 10 days"
71        )

Entry point for subclassed commands to add custom arguments.

def handle(self, *args, **options):
 73    def handle(self, *args, **options):
 74
 75        def apply_profile(cmd: list):
 76            if options['aws_profile']:
 77                cmd += ["--profile", options['aws_profile']]
 78            return cmd
 79
 80        bucket = options['bucket-name']
 81        now = datetime.now()
 82
 83        backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily")
 84        backup_dir.mkdir(exist_ok=True, parents=True)
 85
 86        db_name = settings.DATABASES['default']['NAME']
 87        db_user = settings.DATABASES['default']['USER']
 88        db_host = settings.DATABASES['default']['HOST']
 89        db_pass = settings.DATABASES['default']['PASSWORD']
 90        db_port = settings.DATABASES['default']['PORT']
 91
 92        fname = now.strftime(f"%Y%m%d__{db_name}.sql")
 93        fpath = Path(backup_dir, fname)
 94
 95        cmd = [
 96            "pg_dump",
 97            "-U", db_user,
 98            "-h", db_host,
 99            "-p", str(db_port),
100            "-f", str(fpath.resolve()),
101            db_name,
102        ]
103
104        use_env = os.environ.copy()
105        use_env['PGPASSWORD'] = db_pass
106        p = subprocess.Popen(cmd, env=use_env)
107        exit_code = p.wait()
108
109        day, year = now.day, now.year
110
111        ## on the 1st and 15th of the month upload the dump to yearly archive
112        if day in [1, 15]:
113
114            cmd2 = [
115                "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/"
116            ]
117            cmd2 = apply_profile(cmd2)
118            subprocess.run(cmd2)
119
120        if not options["skip_rotate"]:
121            ## cleanup any local files that are over 10 days old.
122            ## sort all of the existing local daily files
123            local_dailies = sorted(list(backup_dir.glob("*")))
124            limit = 10
125            ## slice off any local dailies beyond the limit
126            for path in local_dailies[:-limit]:
127                os.remove(path)
128
129        if not options["skip_sync"]:
130            cmd3 = [
131                "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete"
132            ]
133            cmd3 = apply_profile(cmd3)
134            subprocess.run(cmd3)

The actual logic of the command. Subclasses must implement this method.