Hanze_Surfnet_Data_POC/Enquete/enquete/storage/storage.py
2022-02-17 14:35:54 +01:00

249 lines
12 KiB
Python

import shlex
import subprocess
import storage.exceptions as StorageException
import importlib
from pathlib import Path
import re
import glob
import os
import shutil
from datetime import datetime
import tempfile
import logging
logger = logging.getLogger(__name__)
class Storage():
CLASS_REGEX = re.compile(r'class\s+(?P<class_name>[^\s\(]+)\s*\(\s*BaseStorage\s*\)\s*:')
# This acts like a factory function. It will return a storage object from the requested engine
def __new__(self, storage_type, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
storage_type = storage_type.lower()
engines = Storage.__load_storage_engines()
logger.debug(f'Available storage engines({len(Storage.available_engines())}): {Storage.available_engines()}')
if storage_type not in engines:
raise StorageException.UnknownStorageEngine(storage_type)
return engines[storage_type](url, username, password, source, destination, encryption_key, sender_name, sender_email)
@staticmethod
def __load_storage_engines():
loaded_engines = {}
engines = (Path(__file__)).parent.joinpath('engines')
for engine in [x for x in engines.glob('*.py') if x.is_file()]:
with engine.open() as python_file:
data = python_file.read()
class_name = Storage.CLASS_REGEX.findall(data)
if len(class_name) == 1:
storage_engine_module = importlib.import_module('.{}' . format(engine.stem), package='storage.engines')
storage_engine_class = getattr(storage_engine_module, class_name[0])
loaded_engines[storage_engine_class.TYPE.lower()] = storage_engine_class
return loaded_engines
@staticmethod
def available_engines():
engines = list(Storage.__load_storage_engines().keys())
engines.sort()
return engines
class BaseStorage():
ENCFS_XML = '.encfs6.xml'
ENCRYPT_CMD = '/usr/bin/encfs'
FUSER_MOUNT = '/bin/fusermount'
TYPE = ''
def __init__(self, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
if source is not None and not os.path.exists(source):
logger.error(f'Source file is not available on disk! It has vanished from: {source}')
raise StorageException.FileDoesNotExist(source)
# if destination is None:
# logger.error(f'Destination is not valid: {destination}')
# raise StorageException.InvalidLocation(destination)
self.source = source
self.destination_dir = None if destination is None else os.path.dirname(destination)
self.destination_file = None if destination is None else os.path.basename(destination)
self.encryption_key = encryption_key
self.encrypted = False
self.url = url
self.username = username
self.password = password
self.sender_name = sender_name
self.sender_email = sender_email
def encrypt_source(self):
if self.encryption_key is None:
logger.error(f'Cannot encrypt source file {self.source} due to missing encryption key!')
raise StorageException.MissingEncryptionKey()
if self.encrypted:
logger.warning('File is already encrypted')
return True
start_time = datetime.now()
logger.info(f'Encrypting new uploaded file: {self.source}')
encrypted_dir = tempfile.mkdtemp()
logger.debug(f'Created encrypted source folder: {encrypted_dir}')
decoded_dir = tempfile.mkdtemp()
logger.debug(f'Created decoded folder: {decoded_dir}')
new_encryption_setup = True
existing_encfs_file = os.path.join(self.destination_dir, BaseStorage.ENCFS_XML)
logger.debug(f'Check for existing encryption key file \'{existing_encfs_file}\' on the destination location.')
if self.file_exists(existing_encfs_file):
logger.debug(f'Copying existing \'{BaseStorage.ENCFS_XML}\' file...')
self.download_file(existing_encfs_file, os.path.join(encrypted_dir, BaseStorage.ENCFS_XML))
logger.info(f'Using existing \'{existing_encfs_file}\' from destination location.')
new_encryption_setup = False
# Mounting part between source and encrypted folder
# TODO: Check what happens when there are spaces in the dir names... need some quotes I guess
cmd = f'{BaseStorage.ENCRYPT_CMD} --standard -S {encrypted_dir} {decoded_dir}'
logger.debug(f'Creating an encrypted EncFS mount point with command: {cmd}')
process = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
# # Send encryption password
logger.debug('Mounting in action. Sending encryption key...')
(output, error) = process.communicate(input=self.encryption_key.encode())
if process.wait(timeout=30) != 0:
output = output.decode().strip()
logger.error(f'Error creating an encrypted mount with EncFS. Error: \'{output}\'')
raise RuntimeError(f'Mounting error EncFS: {output}')
logger.debug(f'Mountpoint is ready at path: {decoded_dir}')
if new_encryption_setup:
logger.info(f'We have a new \'{BaseStorage.ENCFS_XML}\' file that needs to be moved to the same destination: {self.destination_dir}')
self.upload_file(os.path.join(encrypted_dir, BaseStorage.ENCFS_XML), existing_encfs_file, True)
# Here we ignore the subdirectories on the destination. This will be fixed during the upload
destination_file = os.path.join(decoded_dir, self.destination_dir, self.destination_file)
logger.debug(f'Moving source file \'{self.source}\' to \'{destination_file}\' for encryption.')
os.makedirs(os.path.dirname(destination_file))
shutil.move(self.source, destination_file)
# Here we umount the decoded directory, so we only have the encypted data left
logger.debug(f'Encrypting is done, un-mounting decoded folder: {decoded_dir}')
cmd = f'{BaseStorage.FUSER_MOUNT} -u {decoded_dir}'
logger.debug(f'Umounting cmd: {cmd}')
process = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
if process.wait() != 0:
# TODO: Better error handling... Add raise exception
logger.error(f'Error un-mounting mount point: {decoded_dir}')
raise RuntimeError(f'Un-mounting error EncFS: {process}')
logger.debug(f'Cleanup temporary decoded dir: {decoded_dir}')
shutil.rmtree(decoded_dir)
# Find the newly created encrypted file and move it back to the original source file
# We use the glob function so we can also support subdirectories in the encrypted storage
logger.debug(f'Finding newly created encrypted file in the encrypted source folder: {encrypted_dir}')
encrypted_listing = glob.glob(f'{encrypted_dir}/**', recursive=True)
logger.debug(f'Found encrypted file: {encrypted_listing[-1]}')
# Source file is been changed to the new encrypted file name. So use that for the file upload process
self.source = os.path.join(os.path.dirname(self.source), os.path.basename(encrypted_listing[-1]))
self.destination_file = os.path.basename(self.source)
logger.debug(f'Moving encrypted file {encrypted_listing[-1]} back to original file: {self.source}')
logger.debug(f'Updated the destination file name based on the encrypted name: {self.destination_file}')
shutil.move(encrypted_listing[-1], self.source)
logger.info(f'Encrypted to \'{self.source}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
self.encrypted = True
return True
def file_exists(self, path):
logger.debug(f'Check if file exists at path: \'{path}\' with engine: \'{self.TYPE}\'')
file_exists = self._file_exists_action(path)
exists = 'exist' if file_exists else 'does not exist'
logger.debug(f'File \'{path}\' {exists} on storage \'{self.TYPE}\'')
return file_exists
def upload_file(self, source=None, destination=None, move=False):
source = self.source if source is None else source
destination = os.path.join(self.destination_dir, self.destination_file) if destination is None else destination
upload_ok = None
if source is None or destination is None:
logger.error(f'Error uploading file. Either source: \'{source}\' or destination: \'{destination}\' is not set!')
start_time = datetime.now()
logger.debug(f'Start uploading file: \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\'')
if not self.directory_exists(os.path.dirname(destination)):
self.create_directories(os.path.dirname(destination))
upload_ok = self._upload_file_action(source, destination)
if upload_ok:
logger.info(f'Uploaded \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
if move or self.encrypted:
os.unlink(source)
logger.debug('Removed source file from disk!')
else:
logger.error(f'Error uploading \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
return upload_ok
def directory_exists(self, path):
# logger.debug()
return self._directory_exists_action(path)
def download_file(self, source=None, destination=None, move=False):
source = self.source if source is None else source
#destination = self.destination if destination is None else destination
destination = os.path.join(self.destination_dir, os.path.basename(self.destination_file)) if destination is None else destination
download_ok = None
if source is None or destination is None:
logger.error(f'Error downloading file. Either source: {source} or destination: {destination} is not set!')
start_time = datetime.now()
logger.debug('Downloading file: {source} to: {destination}')
download_ok = self._download_file_action(source, destination)
if download_ok:
logger.info(f'Downloaded \'{source}\' to: \'{destination}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
else:
logger.error(f'Downloading failed for \'{source}\' to: \'{destination}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
return download_ok
def create_directories(self, path):
folders = []
for folder in path.strip('/').split('/'):
# Store travelled path. We need this to make the directories on the remote servers
folders.append(folder)
if not self.directory_exists('/'.join(folders)):
logger.debug(f'Creating folder {folder} with full path: {"/".join(folders)}')
self._make_folder_action('/'.join(folders))
else:
logger.debug(f'Folder \'{folder}\' already exists.')
return True
def _file_exists_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', 'file_exists')
def _directory_exists_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', 'directory_exists')
def _upload_file_action(self, source, destination):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_upload_file')
def _download_file_action(self, source, destination):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_download_file')
def _make_folder_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_make_folder_action')