Initial commit

This commit is contained in:
2022-02-17 14:35:54 +01:00
commit abbaaab98a
43 changed files with 2079 additions and 0 deletions

View File

@ -0,0 +1 @@
A small questionnaire tool that stores the data trough a WebDAV connection.

View File

@ -0,0 +1,8 @@
import os
import logging
import logging.config
if os.path.isfile('logging.custom.ini'):
logging.config.fileConfig('logging.custom.ini')
elif os.path.isfile('logging.ini'):
logging.config.fileConfig('logging.ini')

View File

@ -0,0 +1,28 @@
import shutil
import os
from storage.storage import BaseStorage
import logging
logger = logging.getLogger(__name__)
class LocalStorage(BaseStorage):
TYPE = 'fs'
def file_exists(self, filepath):
return os.path.exists(filepath) and os.path.isfile(filepath)
def directory_exists(self, filepath):
return os.path.exists(filepath) and os.path.isdir(filepath)
def _make_folder_action(self, path):
os.makedirs(path)
return True
def _upload_file_action(self, source, destination):
shutil.copy(source, destination)
return True
def _download_file_action(self, source, destination):
shutil.copy(source, destination)
return True

View File

@ -0,0 +1,89 @@
from giteapy.rest import ApiException
import giteapy
import base64
from storage.storage import BaseStorage
import logging
logger = logging.getLogger(__name__)
# Gitea Support - https://pypi.org/project/giteapy/
class GiteaStorage(BaseStorage):
TYPE = 'gitea'
def __init__(self, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
# The repository is added to the url parameter. Use a '#' as seperator. The repository needs to be created first.
# Ex: https://git.web.rug.nl/api/v1#RepositoryName
(url, self.repository) = url.split('#')
destination = destination.strip('/')
super().__init__(url, username, password, source, destination, encryption_key, sender_name, sender_email)
# Create a commiter object when the data is uploaded through one of the invited accounts.
self.committer = None
if sender_name is not None or sender_email is not None:
self.committer = giteapy.Identity(name=sender_name, email=sender_email)
def __connect(self):
try:
assert(self.client)
except AttributeError:
# Configuration for the GITEA connection
configuration = giteapy.Configuration()
# Overrule the host url....?
configuration.host = self.url
#configuration.debug = False
configuration.api_key['access_token'] = self.password
# Create the client
self.client = giteapy.RepositoryApi(giteapy.ApiClient(configuration))
logger.info(f'Created Gitea connection to url: {self.url}')
def file_exists(self, filepath):
self.__connect()
try:
self.client.repo_get_contents(self.username, self.repository, filepath)
return True
except ApiException:
return False
def directory_exists(self, filepath):
self.__connect()
return self.file_exists(filepath)
def _make_folder_action(self, path):
# On GitHub you cannot create empty directories. So this actions will always succeed
return True
def _upload_file_action(self, source, destination):
self.__connect()
try:
with open(source, 'rb') as datafile:
# This is a very big issue. Big files will be stored completely in memory :(
body = giteapy.CreateFileOptions(content=base64.b64encode(datafile.read()).decode(),
message=f'Upload from VRE DataDropOff\n Added file: {destination}',
committer=self.committer)
except Exception:
return False
try:
# Create a file in a repository
api_response = self.client.repo_create_file(self.username, self.repository, destination, body)
return True
except ApiException as ex:
logger.exception(f'Exception when calling RepositoryApi->repo_create_file: {ex}')
return True
def _download_file_action(self, source, destination):
self.__connect()
with open(destination, 'wb') as destination_file:
try:
data = self.client.repo_get_contents(self.username, self.repository, source)
destination_file.write(base64.b64decode(data.content))
except ApiException as ex:
logger.exception(f'Exception when calling RepositoryApi->repo_get_contents: {ex}')
return True

View File

@ -0,0 +1,66 @@
from github.GithubException import UnknownObjectException
from github import Github, InputGitAuthor, GithubObject
from storage.storage import BaseStorage
import os
import logging
logger = logging.getLogger(__name__)
# Github Support - https://pypi.org/project/PyGithub/
class GithubStorage(BaseStorage):
TYPE = 'github'
def __init__(self, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
# The repository is added to the url parameter. Use a '#' as seperator. The repository needs to be created first.
# Ex: https://api.github.com/#RepositoryName
(url, self.repository) = url.split('#')
destination = destination.strip('/')
super().__init__(url, username, password, source, destination, encryption_key, sender_name, sender_email)
# Create a commiter object when the data is uploaded through one of the invited accounts.
self.committer = GithubObject.NotSet
if sender_name is not None or sender_email is not None:
self.committer = InputGitAuthor(name=sender_name, email=sender_email)
def __connect(self):
try:
assert(self.repo)
except AttributeError:
client = Github(self.password)
self.repo = client.get_user().get_repo(self.repository)
logger.info('Created Github.com connection')
def file_exists(self, filepath):
self.__connect()
try:
self.repo.get_contents(filepath)
return True
except UnknownObjectException:
return False
def directory_exists(self, filepath):
return True
def _make_folder_action(self, path):
# On GitHub you cannot create empty directories. So this actions will always succeed
return True
def _upload_file_action(self, source, destination):
self.__connect()
# Read the file and post to Github. The library will convert to Base64
with open(source, 'rb') as datafile:
self.repo.create_file(destination.strip('/'), f'Upload from VRE DataDropOff\n Added file: {destination}', datafile.read(), committer=self.committer)
return True
def _download_file_action(self, source, destination):
self.__connect()
download = self.repo.get_contents(source)
with open(destination, 'wb') as destination_file:
destination_file.write(download.decoded_content)
return True

View File

@ -0,0 +1,139 @@
import atexit
from irods.session import iRODSSession
import irods
import storage.exceptions as StorageException
from storage.storage import BaseStorage
import logging
logger = logging.getLogger(__name__)
# iRods support - https://pypi.org/project/python-irodsclient/
class iRODSStorage(BaseStorage):
TYPE = 'irods'
def __init__(self, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
# The iRODS zone is added to the url parameter. Use a '#' as seperator. This needs to be an Existing iRODS zone
# Ex: rdms-prod-icat.data.rug.nl#rug
(url, self.irods_zone) = url.split('#')
if destination:
destination = destination.strip('/')
super().__init__(url, username, password, source, destination, encryption_key, sender_name, sender_email)
# We need to clean up the iRODS session. Using atexit is the easiest way.
atexit.register(self.__close)
def __connect(self):
try:
assert(self.client)
except AttributeError:
# Connect to the iRODS server
self.client = None
try:
self.client = iRODSSession(host=self.url, port=1247, user=self.username, password=self.password, zone=self.irods_zone)
# Need to make a call to validate the authentication. So by checking the version, we know if we can authenticate...
logger.debug(f'iRODS {self.client.server_version} connection through *native* authentication')
except irods.exception.CAT_INVALID_AUTHENTICATION:
# Authentication scheme is not native (default), so we try PAM here
try:
self.client = iRODSSession(host=self.url, port=1247, user=self.username, password=self.password, zone=self.irods_zone, irods_authentication_scheme='pam')
logger.debug(f'iRODS {self.client.server_version} connection through *PAM* authentication')
except irods.exception.CAT_INVALID_AUTHENTICATION:
# Authentication scheme is not PAM either last try: GIS
try:
self.client = iRODSSession(host=self.url, port=1247, user=self.username, password=self.password, zone=self.irods_zone, irods_authentication_scheme='gis')
logger.debug(f'iRODS {self.client.server_version} connection through *GIS* authentication')
except irods.exception.CAT_INVALID_AUTHENTICATION:
pass
if self.client is None:
logger.error('Unable to login to the iRODS instance. Please check username and password combination!')
raise StorageException.InvalidAuthentication(self.username)
logger.info('Created iRODS connection')
def __close(self):
logger.debug('Closing iRODS storage connection and clean up')
self.client.cleanup()
def _file_exists_action(self, path):
self.__connect()
try:
self.client.data_objects.get(f'/{self.irods_zone}/home/{self.username}/{path}')
except irods.exception.DataObjectDoesNotExist:
logger.debug(f'File \'{path}\' does NOT exists on the iRODS server')
return False
except irods.exception.CollectionDoesNotExist:
logger.debug(f'Parent folder of file \'{path}\' does NOT exists on the iRODS server')
return False
return True
def _directory_exists_action(self, path):
self.__connect()
try:
self.client.collections.get(f'/{self.irods_zone}/home/{self.username}/{path}')
logger.debug(f'Folder \'{path}\' exists on the iRODS server')
except irods.exception.CollectionDoesNotExist:
logger.debug(f'Folder \'{path}\' does NOT exists on the iRODS server')
return False
return True
def _make_folder_action(self, path):
self.__connect()
try:
self.client.collections.create(f'/{self.irods_zone}/home/{self.username}/{path}')
except irods.exception.CollectionDoesNotExist:
logger.debug(f'Parent folder of file \'{path}\' does NOT exists on the iRODS server')
return False
return True
def _upload_file_action(self, source, destination):
self.__connect()
# The upload path consists of a zone, username and path
destination = f'/{self.irods_zone}/home/{self.username}/{destination}'
logger.debug(f'Uploading to file: \'{destination}\'')
try:
obj = self.client.data_objects.create(destination)
logger.debug(f'Created file: \'{destination}\'')
# Open 'both' files and copy 4K data each time.
with obj.open('w') as irods_file, open(source, 'rb') as source_file_binary:
while True:
buf = source_file_binary.read(4096)
if buf:
irods_file.write(buf)
else:
break
obj.metadata.add('source', f'Upload from VRE DataDropOff\n Added file: {destination} uploaded by: {self.sender_name}({self.sender_email})')
except irods.exception.OVERWRITE_WITHOUT_FORCE_FLAG:
logger.warning('The uploaded file already exists. So we did NOT upload the new file!')
return False
return True
def _download_file_action(self, source, destination):
self.__connect()
logger.debug(f'Downloading file: \'{source}\' to \'{destination}\'')
try:
obj = self.client.data_objects.get(f'/{self.irods_zone}/home/{self.username}/{source}')
# Open 'both' files and copy 4K data each time.
with obj.open('r') as irods_source_file, open(destination, 'wb') as local_destination_file:
while True:
buf = irods_source_file.read(4096)
if buf:
local_destination_file.write(buf)
else:
break
except irods.exception.DataObjectDoesNotExist:
logger.error(f'File: \'{source}\' does not exists on the iRODS server')
return False
return True

View File

@ -0,0 +1,65 @@
from webdav3.exceptions import WebDavException, ResponseErrorCode
from webdav3.client import Client
import storage.exceptions as StorageException
from storage.utils import human_filesize
from storage.storage import BaseStorage
import logging
logger = logging.getLogger(__name__)
# WebDAV Support - https://pypi.org/project/webdavclient3/
class WebDAVStorage(BaseStorage):
TYPE = 'webdav'
def __connect(self):
# Connect to the external storage. This function can be run multiple times. It will check if it has already a connection to re-use
try:
# When this fails with an Attribute error, that means that the 'client' variable is not set and we need to make a new connection
assert(self.client)
except AttributeError:
# Because the 'client' variable is not known, the WebDAV connections is not created yet. So do it now!
self.client = Client({
'webdav_hostname': self.url,
'webdav_login': self.username,
'webdav_password': self.password,
})
try:
# Here we abuse the .free check to see if the login credentials do work
free_space = self.client.free()
logger.info(f'Created WebDAV connection to url: \'{self.url}\', with space left: {human_filesize(free_space)}')
except ResponseErrorCode as ex:
# Login went wrong, so delete the client variable for next run/try
del(self.client)
# If there was an authentication error, raise exception and quit.
if 401 == ex.code:
raise StorageException.InvalidAuthentication(self.username)
# TODO: More errors.....
def _file_exists_action(self, path):
self.__connect()
return self.client.check(path)
def _directory_exists_action(self, path):
self.__connect()
return self.client.check(path)
def _make_folder_action(self, path):
self.__connect()
self.client.mkdir(path)
return True
def _upload_file_action(self, source, destination):
self.__connect()
self.client.upload(local_path=source, remote_path=destination)
return True
def _download_file_action(self, source, destination):
self.__connect()
self.client.download(source, destination)
return True

View File

@ -0,0 +1,63 @@
class BaseStorageError(Exception):
pass
class StorageActionNotImplemented(Exception):
def __init__(self, storage, action, message='is not implemented'):
self.storage = storage
self.action = action
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.action} on class {self.storage} {self.message}'
class FileDoesNotExist(BaseStorageError):
def __init__(self, source, message='File does not exists on disk'):
self.source = source
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.source} -> {self.message}'
class InvalidLocation(BaseStorageError):
def __init__(self, location, message='Location does not exists or is not valid'):
self.location = location
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.location} -> {self.message}'
class InvalidAuthentication(BaseStorageError):
def __init__(self, user, message='Authentication failed'):
self.user = user
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.user} -> {self.message}'
class UnknownStorageEngine(BaseStorageError):
def __init__(self, engine, message='Storage engine is unknown, not available'):
self.engine = engine
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.engine} -> {self.message}'
class MissingEncryptionKey(BaseStorageError):
def __init__(self, message='The encryption keys are missing'):
self.message = message
super().__init__(self.message)
def __str__(self):
return f'{self.message}'

View File

@ -0,0 +1,248 @@
import shlex
import subprocess
import storage.exceptions as StorageException
import importlib
from pathlib import Path
import re
import glob
import os
import shutil
from datetime import datetime
import tempfile
import logging
logger = logging.getLogger(__name__)
class Storage():
CLASS_REGEX = re.compile(r'class\s+(?P<class_name>[^\s\(]+)\s*\(\s*BaseStorage\s*\)\s*:')
# This acts like a factory function. It will return a storage object from the requested engine
def __new__(self, storage_type, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
storage_type = storage_type.lower()
engines = Storage.__load_storage_engines()
logger.debug(f'Available storage engines({len(Storage.available_engines())}): {Storage.available_engines()}')
if storage_type not in engines:
raise StorageException.UnknownStorageEngine(storage_type)
return engines[storage_type](url, username, password, source, destination, encryption_key, sender_name, sender_email)
@staticmethod
def __load_storage_engines():
loaded_engines = {}
engines = (Path(__file__)).parent.joinpath('engines')
for engine in [x for x in engines.glob('*.py') if x.is_file()]:
with engine.open() as python_file:
data = python_file.read()
class_name = Storage.CLASS_REGEX.findall(data)
if len(class_name) == 1:
storage_engine_module = importlib.import_module('.{}' . format(engine.stem), package='storage.engines')
storage_engine_class = getattr(storage_engine_module, class_name[0])
loaded_engines[storage_engine_class.TYPE.lower()] = storage_engine_class
return loaded_engines
@staticmethod
def available_engines():
engines = list(Storage.__load_storage_engines().keys())
engines.sort()
return engines
class BaseStorage():
ENCFS_XML = '.encfs6.xml'
ENCRYPT_CMD = '/usr/bin/encfs'
FUSER_MOUNT = '/bin/fusermount'
TYPE = ''
def __init__(self, url=None, username=None, password=None, source=None, destination=None, encryption_key=None, sender_name=None, sender_email=None):
if source is not None and not os.path.exists(source):
logger.error(f'Source file is not available on disk! It has vanished from: {source}')
raise StorageException.FileDoesNotExist(source)
# if destination is None:
# logger.error(f'Destination is not valid: {destination}')
# raise StorageException.InvalidLocation(destination)
self.source = source
self.destination_dir = None if destination is None else os.path.dirname(destination)
self.destination_file = None if destination is None else os.path.basename(destination)
self.encryption_key = encryption_key
self.encrypted = False
self.url = url
self.username = username
self.password = password
self.sender_name = sender_name
self.sender_email = sender_email
def encrypt_source(self):
if self.encryption_key is None:
logger.error(f'Cannot encrypt source file {self.source} due to missing encryption key!')
raise StorageException.MissingEncryptionKey()
if self.encrypted:
logger.warning('File is already encrypted')
return True
start_time = datetime.now()
logger.info(f'Encrypting new uploaded file: {self.source}')
encrypted_dir = tempfile.mkdtemp()
logger.debug(f'Created encrypted source folder: {encrypted_dir}')
decoded_dir = tempfile.mkdtemp()
logger.debug(f'Created decoded folder: {decoded_dir}')
new_encryption_setup = True
existing_encfs_file = os.path.join(self.destination_dir, BaseStorage.ENCFS_XML)
logger.debug(f'Check for existing encryption key file \'{existing_encfs_file}\' on the destination location.')
if self.file_exists(existing_encfs_file):
logger.debug(f'Copying existing \'{BaseStorage.ENCFS_XML}\' file...')
self.download_file(existing_encfs_file, os.path.join(encrypted_dir, BaseStorage.ENCFS_XML))
logger.info(f'Using existing \'{existing_encfs_file}\' from destination location.')
new_encryption_setup = False
# Mounting part between source and encrypted folder
# TODO: Check what happens when there are spaces in the dir names... need some quotes I guess
cmd = f'{BaseStorage.ENCRYPT_CMD} --standard -S {encrypted_dir} {decoded_dir}'
logger.debug(f'Creating an encrypted EncFS mount point with command: {cmd}')
process = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
# # Send encryption password
logger.debug('Mounting in action. Sending encryption key...')
(output, error) = process.communicate(input=self.encryption_key.encode())
if process.wait(timeout=30) != 0:
output = output.decode().strip()
logger.error(f'Error creating an encrypted mount with EncFS. Error: \'{output}\'')
raise RuntimeError(f'Mounting error EncFS: {output}')
logger.debug(f'Mountpoint is ready at path: {decoded_dir}')
if new_encryption_setup:
logger.info(f'We have a new \'{BaseStorage.ENCFS_XML}\' file that needs to be moved to the same destination: {self.destination_dir}')
self.upload_file(os.path.join(encrypted_dir, BaseStorage.ENCFS_XML), existing_encfs_file, True)
# Here we ignore the subdirectories on the destination. This will be fixed during the upload
destination_file = os.path.join(decoded_dir, self.destination_dir, self.destination_file)
logger.debug(f'Moving source file \'{self.source}\' to \'{destination_file}\' for encryption.')
os.makedirs(os.path.dirname(destination_file))
shutil.move(self.source, destination_file)
# Here we umount the decoded directory, so we only have the encypted data left
logger.debug(f'Encrypting is done, un-mounting decoded folder: {decoded_dir}')
cmd = f'{BaseStorage.FUSER_MOUNT} -u {decoded_dir}'
logger.debug(f'Umounting cmd: {cmd}')
process = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
if process.wait() != 0:
# TODO: Better error handling... Add raise exception
logger.error(f'Error un-mounting mount point: {decoded_dir}')
raise RuntimeError(f'Un-mounting error EncFS: {process}')
logger.debug(f'Cleanup temporary decoded dir: {decoded_dir}')
shutil.rmtree(decoded_dir)
# Find the newly created encrypted file and move it back to the original source file
# We use the glob function so we can also support subdirectories in the encrypted storage
logger.debug(f'Finding newly created encrypted file in the encrypted source folder: {encrypted_dir}')
encrypted_listing = glob.glob(f'{encrypted_dir}/**', recursive=True)
logger.debug(f'Found encrypted file: {encrypted_listing[-1]}')
# Source file is been changed to the new encrypted file name. So use that for the file upload process
self.source = os.path.join(os.path.dirname(self.source), os.path.basename(encrypted_listing[-1]))
self.destination_file = os.path.basename(self.source)
logger.debug(f'Moving encrypted file {encrypted_listing[-1]} back to original file: {self.source}')
logger.debug(f'Updated the destination file name based on the encrypted name: {self.destination_file}')
shutil.move(encrypted_listing[-1], self.source)
logger.info(f'Encrypted to \'{self.source}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
self.encrypted = True
return True
def file_exists(self, path):
logger.debug(f'Check if file exists at path: \'{path}\' with engine: \'{self.TYPE}\'')
file_exists = self._file_exists_action(path)
exists = 'exist' if file_exists else 'does not exist'
logger.debug(f'File \'{path}\' {exists} on storage \'{self.TYPE}\'')
return file_exists
def upload_file(self, source=None, destination=None, move=False):
source = self.source if source is None else source
destination = os.path.join(self.destination_dir, self.destination_file) if destination is None else destination
upload_ok = None
if source is None or destination is None:
logger.error(f'Error uploading file. Either source: \'{source}\' or destination: \'{destination}\' is not set!')
start_time = datetime.now()
logger.debug(f'Start uploading file: \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\'')
if not self.directory_exists(os.path.dirname(destination)):
self.create_directories(os.path.dirname(destination))
upload_ok = self._upload_file_action(source, destination)
if upload_ok:
logger.info(f'Uploaded \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
if move or self.encrypted:
os.unlink(source)
logger.debug('Removed source file from disk!')
else:
logger.error(f'Error uploading \'{source}\' to: \'{destination}\' with engine: \'{self.TYPE}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
return upload_ok
def directory_exists(self, path):
# logger.debug()
return self._directory_exists_action(path)
def download_file(self, source=None, destination=None, move=False):
source = self.source if source is None else source
#destination = self.destination if destination is None else destination
destination = os.path.join(self.destination_dir, os.path.basename(self.destination_file)) if destination is None else destination
download_ok = None
if source is None or destination is None:
logger.error(f'Error downloading file. Either source: {source} or destination: {destination} is not set!')
start_time = datetime.now()
logger.debug('Downloading file: {source} to: {destination}')
download_ok = self._download_file_action(source, destination)
if download_ok:
logger.info(f'Downloaded \'{source}\' to: \'{destination}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
else:
logger.error(f'Downloading failed for \'{source}\' to: \'{destination}\' in {datetime.now() - start_time} (h:mm:ss.ms)')
return download_ok
def create_directories(self, path):
folders = []
for folder in path.strip('/').split('/'):
# Store travelled path. We need this to make the directories on the remote servers
folders.append(folder)
if not self.directory_exists('/'.join(folders)):
logger.debug(f'Creating folder {folder} with full path: {"/".join(folders)}')
self._make_folder_action('/'.join(folders))
else:
logger.debug(f'Folder \'{folder}\' already exists.')
return True
def _file_exists_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', 'file_exists')
def _directory_exists_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', 'directory_exists')
def _upload_file_action(self, source, destination):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_upload_file')
def _download_file_action(self, source, destination):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_download_file')
def _make_folder_action(self, path):
raise StorageException.StorageActionNotImplemented('BaseStorage', '_make_folder_action')

View File

@ -0,0 +1,8 @@
def human_filesize(nbytes):
suffixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
i = 0
while nbytes >= 1024 and i < len(suffixes) - 1:
nbytes /= 1024.
i += 1
f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
return '%s %s' % (f, suffixes[i])