""" By Brian Tomasik (https://briantomasik.com/). First published: 2020-11-27. Last update of any kind: 2020-11-27T05-31. To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. This script was built with Python 3.6. Hopefully it should work for later versions of Python 3 also. For explanation of this script, see https://briantomasik.com/organizing-computer-files/#optical . """ #import pdb import os from datetime import datetime home_dir = os.path.expanduser("~") DATE_FORMAT = '%Y-%m-%d' SIZE_OF_TARGET_DISC_IN_BILLION_BYTES = 50 MULTIPLIER_ON_TARGET_SIZE_TO_GIVE_SOME_SPACE = 0.9 # Give some extra wiggle room to account for the fact that these space calculations aren't perfectly accurate. SPLIT_THE_FILES_FROM_THIS_DIR = os.path.join(home_dir, "files", "debs") PUT_DESTINATION_FOLDER_UNDER_HERE = os.path.join(home_dir, "files", "temp") OMIT_PATHS_WITH_THESE_SUBSTRINGS = ["/done-bug-videos/", "/old-iTunes-and-iPod-files/"] # You can add your own substrings. def create_parts_dir(date_today): put_scripts_here = os.path.join(PUT_DESTINATION_FOLDER_UNDER_HERE, "parts_" + date_today) assert not os.path.exists(put_scripts_here), "Parts dir already exists." os.mkdir(put_scripts_here) return put_scripts_here def list_all_files_sorted(dir_to_walk): print("Getting a sorted list of all files under '{}'...".format(dir_to_walk)) all_files = [] for root, dirs, files in os.walk(dir_to_walk): for cur_file in files: all_files.append(os.path.join(root, cur_file)) return sorted(all_files) def omit_files_by_path_substrings(files_list): files_to_do = [] for cur_file in files_list: if not any(substring in cur_file for substring in OMIT_PATHS_WITH_THESE_SUBSTRINGS): files_to_do.append(cur_file) return files_to_do def get_file_size_in_bytes(file_path): # Note that this returns the logical size of the file rather than the actual size on disk. Hopefully the distinction won't matter too much, and the MULTIPLIER_ON_TARGET_SIZE_TO_GIVE_SOME_SPACE should help avoid any problems that would result from slightly underestimating file sizes. return os.stat(file_path).st_size def split_files_into_parts(files_to_do): print("Splitting the files into parts...") size_limit_in_bytes = SIZE_OF_TARGET_DISC_IN_BILLION_BYTES * 10**9 * MULTIPLIER_ON_TARGET_SIZE_TO_GIVE_SOME_SPACE parts = [] cur_part = [] total_size_of_cur_part = 0 for cur_file in files_to_do: cur_file_size = get_file_size_in_bytes(cur_file) assert cur_file_size < size_limit_in_bytes, "File '{}' is too big for the size limit.".format(cur_file) if total_size_of_cur_part + cur_file_size > size_limit_in_bytes: assert len(cur_part) > 0, "cur_part is empty." parts.append(cur_part) cur_part = [] total_size_of_cur_part = 0 cur_part.append(cur_file) total_size_of_cur_part += cur_file_size assert len(cur_part) > 0, "I think we should always have at least one leftover file by this point." parts.append(cur_part) # Save out the last files that didn't get saved during the above 'for' loop. return parts def write_copying_scripts(parts, put_scripts_here, date_today): print("Creating the Bash scripts for copying each part...") num_parts = len(parts) assert num_parts < 100, "This script assumes less than 100 parts." for i in range(num_parts): name_of_cur_part_dir = "part-{:02d}-of-{:02d}_created{}".format(i+1, num_parts, date_today) output_path = os.path.join(put_scripts_here, name_of_cur_part_dir + ".sh") assert not os.path.exists(output_path), "Output path already exists." with open(output_path, "w") as output_file: print("mkdir -v {}\n".format(name_of_cur_part_dir), file=output_file) for cur_file in parts[i]: print('cp -v --parents "{}" {}'.format(cur_file, name_of_cur_part_dir), file=output_file) print("\necho ''", file=output_file) # Add a newline to the output. print("cd {}".format(name_of_cur_part_dir), file=output_file) print("myshaofeverything", file=output_file) print("myshacheck sha512-of-everything.txt", file=output_file) print("cd ..", file=output_file) print("echo 'Here's the size of this directory:'", file=output_file) print("du -sh {}".format(name_of_cur_part_dir), file=output_file) def main(): date_today = datetime.now().strftime(DATE_FORMAT) put_scripts_here = create_parts_dir(date_today) all_files_sorted = list_all_files_sorted(SPLIT_THE_FILES_FROM_THIS_DIR) assert len(all_files_sorted) > 0, "There are no files." files_to_do = omit_files_by_path_substrings(all_files_sorted) assert len(files_to_do) > 0, "All files were omitted." parts = split_files_into_parts(files_to_do) write_copying_scripts(parts, put_scripts_here, date_today) print("Done.") if __name__ == "__main__": main()