""" By Brian Tomasik (https://briantomasik.com/). First published: 2019-11-10. Last update of any kind: 2019-11-10T14-59. To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. This script was built with Python 3.6. Hopefully it should work for later versions of Python 3 also. # Description This script archives, extracts, or tests several different archive formats that I care about. It's essentially a shortcut and cheatsheat so that I don't have to look up the specific command for the specific file type. You give it the file or folder, and it asks questions to figure out the rest. This program never deletes the original input file or directory. One reason is that it's easy enough for you to do that yourself after running this if you want to. A second reason is that if this script never deletes things, then any possible bugs this script may contain are unlikely to be particularly disastrous. # Usage examples To encrypt or create an archive for file 'foo.pdf': python3 Swiss-archive-knife.py foo.pdf Possible resulting file: 'foo.pdf.7z' To encrypt or create an archive for directory 'bar/': python3 Swiss-archive-knife.py bar Possible resulting file: 'bar.zip' To decrypt existing archive 'baz.tar.gz.gpg': python3 Swiss-archive-knife.py baz.tar.gz.gpg Resulting file: 'baz.tar.gz' To extract existing archive 'qux.tar.bz2': python3 Swiss-archive-knife.py qux.tar.bz2 Resulting directory: 'qux/' To test archive 'corge.7z': python3 Swiss-archive-knife.py corge.7z # Why extract to a newly created subfolder When extracting anything other than `.gpg` archives, this program creates a new subfolder to extract the archive in. The new subfolder is datestamped with the current time up to a precision of 1 second, so it should be unique and not exist yet. Often you _can_ get away with extracting an archive directly in the folder where it lives, but it's overall safest to always move to a brand new subdirectory for extracting. A main reason is the possibility of so-called tarbombs. [This page](http://www.linfo.org/tarbomb.html) advises: "a tarball exploding into an existing directory can be a major annoyance, or even dangerous. [...] The easiest way to prevent unwanted tarball explosions is to get into the habit of first creating a new, protective directory and then moving the tarball into it before untarring." The other reason is that even if the extracted archive isn't a tarbomb, i.e., even if it just creates a single output folder, there's some chance that a folder with that same name already exists there. For example, suppose you have a directory that contains both a folder `foo/` and an archive `my-foo.zip`. Suppose that `my-foo.zip` was originally created from a folder named `foo/`, and the archive was renamed from `foo.zip` to `my-foo.zip`. When `my-foo.zip` extracts, it tries to put the extracted files in a folder with the name it had when it was created, namely `foo/`. But you already have a different `foo/` directory. What will happen is that the unzipping process will try to put files into the existing `foo/` directory, which may create a mess by mixing files originally in `foo/` with new files that come from `my-foo.zip`. If some of the extracted files have the same names as files already in `foo/`, you'll be asked whether to leave the original file or replace it with the extracted one. (At least, `7z` and `unzip` have this behavior. `tar` by default just silently overwrites any existing files that have the same name, though this can be turned off with the `-k` option.) All of this potential mess can be avoided by expanding archives into a brand new folder that doesn't contain anything else. In this script, `.gpg` files do expand directly into the folder where they're located. The reason for this is that the output location of a decrypted `.gpg` file is specified explicitly by the user. Since I know the exact name of the destination file, I can check ahead of time to make sure that that file doesn't already exist. """ #import pdb import argparse import os import shutil from datetime import datetime import subprocess FORMATS_I_CAN_EXTRACT = ['.gpg', '.7z', '.zip', '.tar', '.tar.gz', '.tgz', '.tar.bz2'] FORMATS_I_CAN_TEST = ['.7z', '.zip'] ABORTING_MESSAGE = "This script is aborting without having done anything." def check_that_output_location_doesnt_already_exist(proposed_output_location): assert not os.path.exists(proposed_output_location), "The output location '{}' already exists! {}".format(proposed_output_location, ABORTING_MESSAGE) def do_archiving(input_file_or_folder, is_dir): menu_text = "Choose an archive format by entering the corresponding abbreviation, or enter anything else to abort.\n\n# Encrypted formats\n'g' for .gpg\n'7ze' for .7z encrypted\n\n# Unencrypted formats\n'7zu' for .7z unencrypted\n'z' for .zip\n't' for .tar\n'tg' for .tar.gz\n'tb' for .tar.bz2\n\n" if is_dir: menu_text = menu_text.replace("\n'g' for .gpg", "") # GPG only encrypts files, not dirs. answer = input(menu_text) if answer == 'g' and not is_dir: output_location = input_file_or_folder + ".gpg" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["gpg", "-v", "-o", output_location, "-c", input_file_or_folder], check=True) elif answer == '7ze': output_location = input_file_or_folder + ".7z" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["7z", "a", "-p", "-mhe=on", "-ms=off", output_location, input_file_or_folder], check=True) elif answer == '7zu': output_location = input_file_or_folder + ".7z" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["7z", "a", "-ms=off", output_location, input_file_or_folder], check=True) elif answer == 'z': output_location = input_file_or_folder + ".zip" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["zip", "-rq", output_location, input_file_or_folder], check=True) elif answer == 't': output_location = input_file_or_folder + ".tar" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["tar", "-cf", output_location, input_file_or_folder], check=True) elif answer == 'tg': output_location = input_file_or_folder + ".tar.gz" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["tar", "-czf", output_location, input_file_or_folder], check=True) elif answer == 'tb': output_location = input_file_or_folder + ".tar.bz2" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["tar", "-cjf", output_location, input_file_or_folder], check=True) else: print(ABORTING_MESSAGE) def create_and_move_to_local_subfolder(input_file): new_subfolder_name = "extracted_" + datetime.now().strftime("%Y-%m-%dT%H-%M-%S") assert not os.path.exists(new_subfolder_name), "The attempted new subfolder '{}' already exists! {}".format(new_subfolder_name, ABORTING_MESSAGE) os.mkdir(new_subfolder_name) print("Created a new subdirectory '{}' to extract the archive in.".format(new_subfolder_name)) shutil.move(input_file, new_subfolder_name) os.chdir(new_subfolder_name) def is_some_kind_of_tar_archive(input_file): for possible_extension in [".tar", ".tar.gz", ".tgz", ".tar.bz2"]: if input_file.endswith(possible_extension): return True return False def do_extracting(input_file): if input_file.endswith(".gpg"): output_location, outermost_extension = os.path.splitext(input_file) assert outermost_extension == ".gpg" check_that_output_location_doesnt_already_exist(output_location) subprocess.run(["gpg", "-v", "-o", output_location, "-d", input_file], check=True) else: create_and_move_to_local_subfolder(input_file) if input_file.endswith(".7z"): subprocess.run(["7z", "x", input_file], check=True) elif input_file.endswith(".zip"): subprocess.run(["unzip", "-q", input_file], check=True) elif is_some_kind_of_tar_archive(input_file): subprocess.run(["tar", "-kxf", input_file], check=True) else: print("ERROR: The script should never get here because the file format is unknown.") shutil.move(input_file, "..") # Put the original file back where it was. def do_testing(input_file): if input_file.endswith(".7z"): subprocess.run(["7z", "t", input_file], check=True) elif input_file.endswith(".zip"): subprocess.run(["unzip", "-tq", input_file], check=True) else: print("ERROR: The script should never get here because the file format is unknown.") def file_is_testable(filename): for cur_format in FORMATS_I_CAN_TEST: if filename.endswith(cur_format): return True return False def file_is_extractable(filename): for cur_format in FORMATS_I_CAN_EXTRACT: if filename.endswith(cur_format): return True return False def check_that_input_file_or_folder_is_in_current_dir(normpath_input_file_or_folder): head, tail = os.path.split(normpath_input_file_or_folder) assert head == "", "Sorry, this script only handles files or folders located in the current directory. For example, `foo/` is ok and `bar.zip` is ok but `foo/bar.zip` isn't. This restriction was imposed just to reduce the number of cases I had to think about when writing and testing the script." def parse_input(): parser = argparse.ArgumentParser() parser.add_argument("input_file_or_folder", help="The name of the file or folder to operate on.") return parser.parse_args() def main(): args = parse_input() normpath_input_file_or_folder = os.path.normpath(args.input_file_or_folder) assert os.path.exists(normpath_input_file_or_folder), "'{}' doesn't exist.".format(normpath_input_file_or_folder) check_that_input_file_or_folder_is_in_current_dir(normpath_input_file_or_folder) if os.path.isdir(normpath_input_file_or_folder): do_archiving(normpath_input_file_or_folder, True) else: assert os.path.isfile(normpath_input_file_or_folder), "'{}' isn't a directory or file.".format(normpath_input_file_or_folder) extractable = file_is_extractable(normpath_input_file_or_folder) testable = file_is_testable(normpath_input_file_or_folder) menu_text = "Choose an option for your file '{}' by entering the corresponding abbreviation, or enter anything else to abort.\n'a' for archive\n".format(normpath_input_file_or_folder) if extractable: menu_text += "'x' for extract\n" if testable: menu_text += "'t' for test\n" answer = input(menu_text) if answer == "a": do_archiving(normpath_input_file_or_folder, False) elif answer == "x" and extractable: do_extracting(normpath_input_file_or_folder) elif answer == "t" and testable: do_testing(normpath_input_file_or_folder) else: print(ABORTING_MESSAGE) if __name__ == "__main__": main()