Source code for OpenSpecimenAPIconnector.mg_util.OpenSpecimenBBMRImapping

#! /bin/python3


'''
    Road to official version:
    TODO
    - Extend TODO-List
    - Person_Extensions in openspecimen
    - Some_fields in Collections have to be loaded separatly
    - Header should be borderless, and non-bold
    - Format of cells
    - use OpenSpecimenBBMRIconnector.py pip package instead
    - Bring it in a JUPYTER NoteBook
    - Extend Dokumentation, or maybe via JUPYTER NoteBook
    -- optional: functionize it?!?

'''
from ..mg_core.mappings import bbmri_mapping 
from ..os_core.site import sites
from ..os_core.jsons import Json_factory
from ..os_core.users import users
from ..os_core.collection_protocol_registration import collection_protocol_registration
from ..os_core.collection_protocol import collection_protocol
from .. import config_manager

import json
import pandas as pd
import xlsxwriter
from openpyxl import load_workbook

[docs]class bbmri_connector: def __init__(self): pass
[docs] def execute(self, collection_protocol_ids): # shhetnames: bb_sheet = "eu_bbmri_eric_biobanks" per_sheet = "eu_bbmri_eric_persons" cp_sheet = "eu_bbmri_eric_collections" # file_output output_file = "test.xlsx" mapping = bbmri_mapping() persons_extensions = mapping.person_extension() person_map = mapping.person_map() biobank_extensions = mapping.biobank_extension() biobank_map = mapping.biobank_map() collection_map = mapping.collection_map() # Design header for writing to xlsx # here we create a format object for header. # Load headers of BBMRI_ERIC Directory template_file_name = "empty_eric_duo.xlsx" url = "https://drive.google.com/uc?export=download&id=1eICcwOXrsfYxaG_kjs_myCBCosEpHqVz" bbmri_file = pd.read_excel(template_file_name, sheet_name=None) # initialize Users,CollectionProtocols, Sites protocols = collection_protocol() user = users() site = sites() # index_ = 0 # writing all Collection protocols to the bbmri-dict for index_, cp_id in enumerate(collection_protocol_ids): cp = protocols.get_collection_protocol(cpid=str(cp_id)) # extract User OS_user_id = cp['principalInvestigator']['id'] OS_user = user.get_user(userId=OS_user_id) # extractSite OS_site_name = cp['cpSites'][0]['siteName'] OS_sites = site.get_all_sites() for i in range(len(OS_sites)): if OS_sites[i]['name'] == OS_site_name: OS_site_id = OS_sites[i]['id'] OS_site = site.get_site(siteid=OS_site_id) # check if user exists # if bbmri_file[per_sheet]['id'].values() # fill the bbmri_persons_dict if index_ == 0: for key in bbmri_file[per_sheet].keys(): # fileds which can't generated by OpenSpecimen for now if key in persons_extensions.keys(): bbmri_file[per_sheet].at[index_, key] = persons_extensions[key] # standard fields in OpenSpecimen which are named differently to BBMRI elif key in person_map.keys(): bbmri_file[per_sheet][key] = OS_user[person_map[key]] # fill the Biobank fields attrs = OS_site['extensionDetail']['attrs'] # extract BBMRI-Extension-Details # ExtensionFields are called differently in Openspecimen #TODO-for MIABIS Plugin for attr in attrs: key_string = attr["caption"] = attr["caption"].lower().replace(" ", "_") value_string = '' if isinstance(attr['value'], list): for i in range(len(attr['value'])): value_string += attr['value'][i] + ', ' value_string = value_string[0:-2] else: value_string = attr['value'] # append the OS_Biobank dict with the extensiondetails, such that there are lesser if statements OS_site[key_string] = value_string for key in bbmri_file[bb_sheet].keys(): # The Persons ID is stored in the BIobank contact, if key == 'contact' and index_ == 0: bbmri_file[per_sheet].at[index_, 'id'] = OS_site[key] # fileds which can't generated by OpenSpecimen for now if key in biobank_extensions.keys(): bbmri_file[bb_sheet].at[index_, key] = biobank_extensions[key] # standard fields in OpenSpecimen which are named differently to BBMRI elif key in biobank_map.keys(): bbmri_file[bb_sheet].at[index_, key] = OS_site[biobank_map[key]] # fields which are named same to BBMRI elif key in OS_site.keys(): bbmri_file[bb_sheet].at[index_, key] = OS_site[key] # fill the BBBMRI Biobank dict attrs = cp['extensionDetail']['attrs'] # extract BBMRI-Extension-Details # ExtensionFields are called differently in Openspecimen #TODO-for MIABIS Plugin for attr in attrs: key_string = attr['caption'].lower().replace(' ', '_') value_string = '' if isinstance(attr['value'], list): for i in range(len(attr['value'])): value_string += attr['value'][i] + ', ' value_string = value_string[0:-2] else: value_string = attr['value'] # append the OS_Biobank dict with the extensiondetails, such that there are lesser if statements cp[key_string] = value_string ##fill json-dict, with OpenSpecimen standard fields for key in bbmri_file[cp_sheet]: if key in collection_map.keys(): bbmri_file[cp_sheet].at[index_, key] = cp[collection_map[key]] elif key in cp.keys(): bbmri_file[cp_sheet].at[index_, key] = cp[key] # Write excel-file with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer: for sheet_name in bbmri_file.keys(): df = bbmri_file[sheet_name] df.to_excel(writer, sheet_name=sheet_name, index=False) return bbmri_file