arches_extensions.management.commands.run_db_backup
1import os 2from datetime import datetime 3import subprocess 4from pathlib import Path 5 6from django.conf import settings 7from django.core.management.base import BaseCommand 8 9from arches_extensions.utils import ArchesHelpTextFormatter 10 11class Command(BaseCommand): 12 """Run pg_dump to SQL file. Creates a local file and then uploads to S3. 13 14Follows this backup pattern: 15 161. Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket 172. Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally) 18 19Local storage directory structure: 20 21``` 22.db_backups/ 23 daily/ 24 <YYYYMMDD>__<db name>.sql <- repeated for the last 10 days 25``` 26 27S3 bucket directory structure: 28 29``` 30bucket_name/ 31 daily/ <- synced from local daily directory (see above) 32 <YYYY>/ <- all 1st and 15th of the month backups for one year 33 <YYYY>0101__<db name>.sql <- Jan 1st backup 34 <YYYY>0115__<db name>.sql <- Jan 15th backup 35 <YYYY>0201__<db name>.sql <- etc. 36``` 37 38This command assumes you have the [AWS CLI tool](https://aws.amazon.com/cli/) installed and configured. 39 40Args: 41 42- `bucket-name`: Name of the S3 bucket the backups will be synced to. 43- `--aws-profile`: Optionally pass a specific aws profile to be used for the `aws s3 cp ...` command 44- `--skip-sync`: Don't sync the daily backups to S3 (useful during testing) 45- `--skip-rotate`: "Don't rotate i.e. trim off dailies older than 10 days" 46 """ 47 48 def __init__(self, *args, **kwargs): 49 self.help = self.__doc__ 50 51 def add_arguments(self, parser): 52 parser.formatter_class = ArchesHelpTextFormatter 53 parser.add_argument( 54 "bucket-name", 55 help="Name of the S3 bucket the backups will be synced to.", 56 ) 57 parser.add_argument( 58 "--aws-profile", 59 help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command" 60 ) 61 parser.add_argument( 62 "--skip-sync", 63 action="store_true", 64 help="Don't sync the daily backups to S3 (useful during testing)" 65 ) 66 parser.add_argument( 67 "--skip-rotate", 68 action="store_true", 69 help="Don't rotate i.e. trim off dailies older than 10 days" 70 ) 71 72 def handle(self, *args, **options): 73 74 def apply_profile(cmd: list): 75 if options['aws_profile']: 76 cmd += ["--profile", options['aws_profile']] 77 return cmd 78 79 bucket = options['bucket-name'] 80 now = datetime.now() 81 82 backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily") 83 backup_dir.mkdir(exist_ok=True, parents=True) 84 85 db_name = settings.DATABASES['default']['NAME'] 86 db_user = settings.DATABASES['default']['USER'] 87 db_host = settings.DATABASES['default']['HOST'] 88 db_pass = settings.DATABASES['default']['PASSWORD'] 89 db_port = settings.DATABASES['default']['PORT'] 90 91 fname = now.strftime(f"%Y%m%d__{db_name}.sql") 92 fpath = Path(backup_dir, fname) 93 94 cmd = [ 95 "pg_dump", 96 "-U", db_user, 97 "-h", db_host, 98 "-p", str(db_port), 99 "-f", str(fpath.resolve()), 100 db_name, 101 ] 102 103 use_env = os.environ.copy() 104 use_env['PGPASSWORD'] = db_pass 105 p = subprocess.Popen(cmd, env=use_env) 106 exit_code = p.wait() 107 108 day, year = now.day, now.year 109 110 ## on the 1st and 15th of the month upload the dump to yearly archive 111 if day in [1, 15]: 112 113 cmd2 = [ 114 "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/" 115 ] 116 cmd2 = apply_profile(cmd2) 117 subprocess.run(cmd2) 118 119 if not options["skip_rotate"]: 120 ## cleanup any local files that are over 10 days old. 121 ## sort all of the existing local daily files 122 local_dailies = sorted(list(backup_dir.glob("*"))) 123 limit = 10 124 ## slice off any local dailies beyond the limit 125 for path in local_dailies[:-limit]: 126 os.remove(path) 127 128 if not options["skip_sync"]: 129 cmd3 = [ 130 "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete" 131 ] 132 cmd3 = apply_profile(cmd3) 133 subprocess.run(cmd3)
class
Command(django.core.management.base.BaseCommand):
12class Command(BaseCommand): 13 """Run pg_dump to SQL file. Creates a local file and then uploads to S3. 14 15Follows this backup pattern: 16 171. Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket 182. Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally) 19 20Local storage directory structure: 21 22``` 23.db_backups/ 24 daily/ 25 <YYYYMMDD>__<db name>.sql <- repeated for the last 10 days 26``` 27 28S3 bucket directory structure: 29 30``` 31bucket_name/ 32 daily/ <- synced from local daily directory (see above) 33 <YYYY>/ <- all 1st and 15th of the month backups for one year 34 <YYYY>0101__<db name>.sql <- Jan 1st backup 35 <YYYY>0115__<db name>.sql <- Jan 15th backup 36 <YYYY>0201__<db name>.sql <- etc. 37``` 38 39This command assumes you have the [AWS CLI tool](https://aws.amazon.com/cli/) installed and configured. 40 41Args: 42 43- `bucket-name`: Name of the S3 bucket the backups will be synced to. 44- `--aws-profile`: Optionally pass a specific aws profile to be used for the `aws s3 cp ...` command 45- `--skip-sync`: Don't sync the daily backups to S3 (useful during testing) 46- `--skip-rotate`: "Don't rotate i.e. trim off dailies older than 10 days" 47 """ 48 49 def __init__(self, *args, **kwargs): 50 self.help = self.__doc__ 51 52 def add_arguments(self, parser): 53 parser.formatter_class = ArchesHelpTextFormatter 54 parser.add_argument( 55 "bucket-name", 56 help="Name of the S3 bucket the backups will be synced to.", 57 ) 58 parser.add_argument( 59 "--aws-profile", 60 help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command" 61 ) 62 parser.add_argument( 63 "--skip-sync", 64 action="store_true", 65 help="Don't sync the daily backups to S3 (useful during testing)" 66 ) 67 parser.add_argument( 68 "--skip-rotate", 69 action="store_true", 70 help="Don't rotate i.e. trim off dailies older than 10 days" 71 ) 72 73 def handle(self, *args, **options): 74 75 def apply_profile(cmd: list): 76 if options['aws_profile']: 77 cmd += ["--profile", options['aws_profile']] 78 return cmd 79 80 bucket = options['bucket-name'] 81 now = datetime.now() 82 83 backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily") 84 backup_dir.mkdir(exist_ok=True, parents=True) 85 86 db_name = settings.DATABASES['default']['NAME'] 87 db_user = settings.DATABASES['default']['USER'] 88 db_host = settings.DATABASES['default']['HOST'] 89 db_pass = settings.DATABASES['default']['PASSWORD'] 90 db_port = settings.DATABASES['default']['PORT'] 91 92 fname = now.strftime(f"%Y%m%d__{db_name}.sql") 93 fpath = Path(backup_dir, fname) 94 95 cmd = [ 96 "pg_dump", 97 "-U", db_user, 98 "-h", db_host, 99 "-p", str(db_port), 100 "-f", str(fpath.resolve()), 101 db_name, 102 ] 103 104 use_env = os.environ.copy() 105 use_env['PGPASSWORD'] = db_pass 106 p = subprocess.Popen(cmd, env=use_env) 107 exit_code = p.wait() 108 109 day, year = now.day, now.year 110 111 ## on the 1st and 15th of the month upload the dump to yearly archive 112 if day in [1, 15]: 113 114 cmd2 = [ 115 "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/" 116 ] 117 cmd2 = apply_profile(cmd2) 118 subprocess.run(cmd2) 119 120 if not options["skip_rotate"]: 121 ## cleanup any local files that are over 10 days old. 122 ## sort all of the existing local daily files 123 local_dailies = sorted(list(backup_dir.glob("*"))) 124 limit = 10 125 ## slice off any local dailies beyond the limit 126 for path in local_dailies[:-limit]: 127 os.remove(path) 128 129 if not options["skip_sync"]: 130 cmd3 = [ 131 "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete" 132 ] 133 cmd3 = apply_profile(cmd3) 134 subprocess.run(cmd3)
Run pg_dump to SQL file. Creates a local file and then uploads to S3.
Follows this backup pattern:
- Rotated daily backups for the last 10 days, stored locally and also synced to S3 bucket
- Backups on the 1st and 15th of each month forever, only uploaded to S3 bucket (not stored locally)
Local storage directory structure:
.db_backups/
daily/
<YYYYMMDD>__<db name>.sql <- repeated for the last 10 days
S3 bucket directory structure:
bucket_name/
daily/ <- synced from local daily directory (see above)
<YYYY>/ <- all 1st and 15th of the month backups for one year
<YYYY>0101__<db name>.sql <- Jan 1st backup
<YYYY>0115__<db name>.sql <- Jan 15th backup
<YYYY>0201__<db name>.sql <- etc.
This command assumes you have the AWS CLI tool installed and configured.
Args:
bucket-name: Name of the S3 bucket the backups will be synced to.--aws-profile: Optionally pass a specific aws profile to be used for theaws s3 cp ...command--skip-sync: Don't sync the daily backups to S3 (useful during testing)--skip-rotate: "Don't rotate i.e. trim off dailies older than 10 days"
def
add_arguments(self, parser):
52 def add_arguments(self, parser): 53 parser.formatter_class = ArchesHelpTextFormatter 54 parser.add_argument( 55 "bucket-name", 56 help="Name of the S3 bucket the backups will be synced to.", 57 ) 58 parser.add_argument( 59 "--aws-profile", 60 help="Optionally pass a specific aws profile to be used for the ``aws s3 cp`` command" 61 ) 62 parser.add_argument( 63 "--skip-sync", 64 action="store_true", 65 help="Don't sync the daily backups to S3 (useful during testing)" 66 ) 67 parser.add_argument( 68 "--skip-rotate", 69 action="store_true", 70 help="Don't rotate i.e. trim off dailies older than 10 days" 71 )
Entry point for subclassed commands to add custom arguments.
def
handle(self, *args, **options):
73 def handle(self, *args, **options): 74 75 def apply_profile(cmd: list): 76 if options['aws_profile']: 77 cmd += ["--profile", options['aws_profile']] 78 return cmd 79 80 bucket = options['bucket-name'] 81 now = datetime.now() 82 83 backup_dir = Path(Path(settings.APP_ROOT).parent, ".db_backups", "daily") 84 backup_dir.mkdir(exist_ok=True, parents=True) 85 86 db_name = settings.DATABASES['default']['NAME'] 87 db_user = settings.DATABASES['default']['USER'] 88 db_host = settings.DATABASES['default']['HOST'] 89 db_pass = settings.DATABASES['default']['PASSWORD'] 90 db_port = settings.DATABASES['default']['PORT'] 91 92 fname = now.strftime(f"%Y%m%d__{db_name}.sql") 93 fpath = Path(backup_dir, fname) 94 95 cmd = [ 96 "pg_dump", 97 "-U", db_user, 98 "-h", db_host, 99 "-p", str(db_port), 100 "-f", str(fpath.resolve()), 101 db_name, 102 ] 103 104 use_env = os.environ.copy() 105 use_env['PGPASSWORD'] = db_pass 106 p = subprocess.Popen(cmd, env=use_env) 107 exit_code = p.wait() 108 109 day, year = now.day, now.year 110 111 ## on the 1st and 15th of the month upload the dump to yearly archive 112 if day in [1, 15]: 113 114 cmd2 = [ 115 "aws", "s3", "cp", fpath, f"s3://{bucket}/{year}/" 116 ] 117 cmd2 = apply_profile(cmd2) 118 subprocess.run(cmd2) 119 120 if not options["skip_rotate"]: 121 ## cleanup any local files that are over 10 days old. 122 ## sort all of the existing local daily files 123 local_dailies = sorted(list(backup_dir.glob("*"))) 124 limit = 10 125 ## slice off any local dailies beyond the limit 126 for path in local_dailies[:-limit]: 127 os.remove(path) 128 129 if not options["skip_sync"]: 130 cmd3 = [ 131 "aws", "s3", "sync", str(backup_dir.resolve()), f"s3://{bucket}/daily/", "--delete" 132 ] 133 cmd3 = apply_profile(cmd3) 134 subprocess.run(cmd3)
The actual logic of the command. Subclasses must implement this method.