import pandas as pd
import zipfile
import json
import time
import io
from .req_util import OS_request_gen
from .. import config_manager
[docs]class CSV_exporter():
"""Handles the API calls for CSV file export
This class provides methods to create and fetch the resutls of an internal OpenSpecimen export job.
Note
-----
In order to use this and also the other classes, the user has to know OpenSpecimen. In the core classes one can
just pass the parameters via JSON-formatted string. This means the user has to know the keywords.
The API calls are documented in https://openspecimen.atlassian.net/wiki/spaces/CAT/pages/1116035/REST+APIs and
the calls refer to this site. More details can be seen in the documentation.
Examples
---------
Just call create export job with the correct data json String like
my_id = CSV_exporter().create_export_job() or supply a job id to call
my_pd_data_frame = CSV_exporter().get_job_output() to retrieve a pandas data frame
"""
def __init__(self):
"""Constructor of the Class CSV_exporter
Handles the basic API-calls for export-job fetching and creation. Either creates or fetches the outcome of an
specific export job
Parameters
----------
base_url : string
URL to openspecimen, has the format: http(s)://<host>:<port>/openspecimen/rest/ng
auth : tuple
Consits of two strings (loginname , password)
"""
self.base_url = config_manager.get_url()
self.auth = config_manager.get_auth()
self.OS_request_gen = OS_request_gen(self.auth)
[docs] def create_export_job(self, data):
"""Create export job method with the entity (Collection Protocoll, Institutes etc.)
encoded within the data JSON-formated-string.
Parameters
----------
data : JSON-formatted-string
Containing the information needed by the API.
See OpenSpecimenAPIconnector.os_util.Export_OP().export_file to find the JSON
blueprint methods in OpenSpecimenAPIconnector.os_core.Json_Factory().
Returns
-------
job_id: String
A string representing the id (integer number) assigned by the OpenSpecimen API
"""
job_endpoint = "/export-jobs/"
job_url = self.base_url + job_endpoint
done = False
while not done:
r = self.OS_request_gen.post_request(job_url, data)
assert r.status_code == 200, "Error creating Job:\n {}".format(r.text)
req_json = json.loads(r.text)
if req_json["status"] == "IN_PROGRESS":
print("Waiting for job to finish")
time.sleep(5)
else:
done = True
job_id = req_json["id"]
return job_id
[docs] def get_job_output(self, job_id):
"""Fetch the output
Parameters
----------
job_id :
String representing the ID for identifying the output file
Returns
-------
job_data: pandas.DataFrame()
A pandas data frame containing the CSV files information. You can easily recover the original file
by using the pandas.to_csv() method.
"""
job_out_endpoint = "/export-jobs/{}/output".format(job_id)
job_out_url = self.base_url + job_out_endpoint
r = self.OS_request_gen.get_request(job_out_url, stream=True)
assert isinstance(r.content, bytes), "Error getting job Output either check job status or see attached error:\n {}".format(r.text)
file = io.BytesIO(r.content)
with zipfile.ZipFile(file, "r") as archive:
job_data = archive.open("output.csv", "r")
job_data = pd.read_csv(job_data)
return job_data