Source code for OpenSpecimenAPIconnector.os_core.csv_export

import pandas as pd
import zipfile
import json
import time
import io
from .req_util import OS_request_gen
from .. import config_manager

[docs]class CSV_exporter():
    """Handles the API calls for CSV file export

    This class provides methods to create and fetch the resutls of an internal OpenSpecimen export job.

    Note
    -----
    In order to use this and also the other classes, the user has to know OpenSpecimen. In the core classes one can
    just pass the parameters via JSON-formatted string. This means the user has to know the keywords.
    The API calls are documented in https://openspecimen.atlassian.net/wiki/spaces/CAT/pages/1116035/REST+APIs and 
    the calls refer to this site. More details can be seen in the documentation.

    Examples
    ---------
    Just call create export job with the correct data json String like
    my_id = CSV_exporter().create_export_job() or supply a job id to call
    my_pd_data_frame = CSV_exporter().get_job_output() to retrieve a pandas data frame
    """

    def __init__(self):
        
        """Constructor of the Class CSV_exporter

        Handles the basic API-calls for export-job fetching and creation. Either creates or fetches the outcome of an 
        specific export job

        Parameters
        ----------
        base_url : string
        URL to openspecimen, has the format: http(s)://<host>:<port>/openspecimen/rest/ng
        auth : tuple
        Consits of two strings (loginname , password)
        """
        self.base_url = config_manager.get_url()
        self.auth = config_manager.get_auth()
        self.OS_request_gen = OS_request_gen(self.auth)

[docs]    def create_export_job(self, data):

        """Create export job method with the entity (Collection Protocoll, Institutes etc.)
        encoded within the data JSON-formated-string.

        Parameters
        ----------
        data : JSON-formatted-string 
            Containing the information needed by the API. 
            See OpenSpecimenAPIconnector.os_util.Export_OP().export_file to find the JSON 
            blueprint methods in OpenSpecimenAPIconnector.os_core.Json_Factory().

        Returns
        -------
        job_id: String
            A string representing the id (integer number) assigned by the OpenSpecimen API
        """        

        job_endpoint = "/export-jobs/"
        job_url = self.base_url + job_endpoint
        done = False
        
        while not done:
            r = self.OS_request_gen.post_request(job_url, data)
            assert r.status_code == 200, "Error creating Job:\n {}".format(r.text)
            req_json = json.loads(r.text)
            if req_json["status"] == "IN_PROGRESS":
                print("Waiting for job to finish")
                time.sleep(5)
            else:
                done = True
        
        job_id = req_json["id"]
        
        return job_id

[docs]    def get_job_output(self, job_id):

        """Fetch the output 

        Parameters
        ----------
        job_id : 
            String representing the ID for identifying the output file

        Returns
        -------
        job_data: pandas.DataFrame()
            A pandas data frame containing the CSV files information. You can easily recover the original file
            by using the pandas.to_csv() method.
        """        
	

        job_out_endpoint = "/export-jobs/{}/output".format(job_id)
        job_out_url = self.base_url + job_out_endpoint

        r = self.OS_request_gen.get_request(job_out_url, stream=True)
        assert isinstance(r.content, bytes), "Error getting job Output either check job status or see attached error:\n {}".format(r.text)
        file = io.BytesIO(r.content)

        with zipfile.ZipFile(file, "r") as archive:
            job_data = archive.open("output.csv", "r")
            job_data = pd.read_csv(job_data)

        return job_data