Source code for provenaclient.modules.prov

'''
Created Date: Monday June 17th 2024 +1000
Author: Peter Baker
-----
Last Modified: Friday November 29th 2024 4:21:39 pm +1000
Modified By: Parth Kulkarni
-----
Description: Provenance API L3 module. Includes the ProvAPI sub module. Contains IO helper functions for writing/reading files.
-----
HISTORY:
Date      	By	Comments
----------	---	---------------------------------------------------------

29-11-2024 | Parth Kulkarni | Added generate-report functionality. 

'''

from provenaclient.auth.manager import AuthManager
from provenaclient.utils.config import Config
from provenaclient.clients import ProvClient
from provenaclient.utils.exceptions import *
from provenaclient.modules.module_helpers import *
from provenaclient.utils.helpers import read_file_helper, write_file_helper, get_and_validate_file_path
from typing import List
from provenaclient.models.general import CustomLineageResponse, HealthCheckResponse
from ProvenaInterfaces.ProvenanceAPI import LineageResponse, ModelRunRecord, ConvertModelRunsResponse, RegisterModelRunResponse, RegisterBatchModelRunRequest, RegisterBatchModelRunResponse, PostUpdateModelRunResponse, GenerateReportRequest
from ProvenaInterfaces.RegistryAPI import ItemModelRun
from ProvenaInterfaces.SharedTypes import StatusResponse

# L3 interface.

PROV_API_DEFAULT_SEARCH_DEPTH = 3
DEFAULT_CONFIG_FILE_NAME = "prov-api.env"
DEFAULT_RELATIVE_FILE_PATH = "./"

[docs] class ProvAPIAdminSubModule(ModuleService): _prov_api_client: ProvClient def __init__(self, auth: AuthManager, config: Config, prov_api_client: ProvClient) -> None: """ Admin sub module of the Prov API providing functionality for the admin endpoints. Parameters ---------- auth : AuthManager An abstract interface containing the user's requested auth flow method. config : Config A config object which contains information related to the Provena instance. auth_client: AuthClient The instantiated auth client """ self._auth = auth self._config = config # Clients related to the prov_api scoped as private. self._prov_api_client = prov_api_client
[docs] async def generate_config_file(self, required_only: bool = True, file_path: Optional[str] = None, write_to_file: bool = False) -> str: """Generates a nicely formatted .env file of the current required/non supplied properties Used to quickly bootstrap a local environment or to understand currently deployed API. Parameters ---------- required_only : bool, optional By default True file_path: str, optional The path you want to save the config file at WITH the file name. If you don't specify a path this will be saved in a relative directory. write_to_file: bool, By default False A boolean flag to indicate whether you want to save the config response to a file or not. Returns ---------- str: Response containing the config text. """ file_path = get_and_validate_file_path( file_path=file_path, write_to_file=write_to_file, default_file_name=DEFAULT_CONFIG_FILE_NAME) config_text: str = await self._prov_api_client.admin.generate_config_file(required_only=required_only) if config_text is None: raise ValueError( f"No data returned for generate config file endpoint.") # Write to file if config text is not None, write to file is True and file path is not None. if write_to_file: if file_path is None: raise ValueError("File path is not set for writing the CSV.") write_file_helper(file_path=file_path, content=config_text) return config_text
[docs] async def store_record(self, registry_record: ItemModelRun, validate_record: bool = True) -> StatusResponse: """An admin only endpoint which enables the reupload/storage of an existing completed provenance record. Parameters ---------- registry_record : ItemModelRun The completed registry record for the model run. validate_record: bool Optional Should the ids in the payload be validated?, by default True Returns ------- StatusResponse A status response indicating the success of the request and any other details. """ return await self._prov_api_client.admin.store_record(registry_record=registry_record, validate_record=validate_record)
[docs] async def store_multiple_records(self, registry_record: List[ItemModelRun], validate_record: bool = True) -> StatusResponse: """An admin only endpoint which enables the reupload/storage of an existing but multiple completed provenance record. Parameters ---------- registry_record : List[ItemModelRun] List of the completed registry record for the model run validate_record validate_record: bool Optional Should the ids in the payload be validated?, by default True Returns ------- StatusResponse A status response indicating the success of the request and any other details. """ return await self._prov_api_client.admin.store_multiple_records(registry_record=registry_record, validate_record=validate_record)
[docs] async def store_all_registry_records(self, validate_record: bool = True) -> StatusResponse: """Applies the store record endpoint action across a list of ItemModelRuns ' which is found by querying the registry model run list endpoint directly. Parameters ---------- validate_record : bool Optional Should the ids in the payload be validated?, by default True Returns ------- StatusResponse A status response indicating the success of the request and any other details. """ return await self._prov_api_client.admin.store_all_registry_records(validate_record=validate_record)
[docs] class Prov(ModuleService): _prov_client: ProvClient def __init__(self, auth: AuthManager, config: Config, prov_client: ProvClient) -> None: """Initialises a new datastore object, which sits between the user and the datastore api operations. Parameters ---------- auth : AuthManager An abstract interface containing the user's requested auth flow method. config : Config A config object which contains information related to the Provena instance. datastore_client : DatastoreClient This client interacts with the Datastore API's. """ self._auth = auth self._config = config # Clients related to the prov-api scoped as private. self._prov_api_client = prov_client # Submodules self.admin = ProvAPIAdminSubModule(auth, config, prov_client)
[docs] async def get_health_check(self) -> HealthCheckResponse: """Checks the health status of the PROV-API. Returns ------- HealthCheckResponse Response containing the PROV-API health information. """ return await self._prov_api_client.get_health_check()
[docs] async def update_model_run(self, model_run_id: str, reason: str, record: ModelRunRecord) -> PostUpdateModelRunResponse: """Updates an existing model run with new information. This function triggers an asynchronous update of a model run. The update is processed as a job, and the job session ID is returned for tracking the update progress. Args: model_run_id (str): The ID of the model run to update reason (str): The reason for updating the model run record (ModelRunRecord): The new model run record details Returns: PostUpdateModelRunResponse: Response containing the job session ID tracking the update Example: ```python response = await prov_api.update_model_run( model_run_id="10378.1/1234567", reason="Updating input dataset information", record=updated_model_run_record ) # Get the session ID to track progress session_id = response.session_id ``` """ return await self._prov_api_client.post_update_model_run( model_run_id=model_run_id, reason=reason, record=record )
[docs] async def explore_upstream(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Explores in the upstream direction (inputs/associations) starting at the specified node handle ID. The search depth is bounded by the depth parameter which has a default maximum of 100. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the upstream direction, by default 100. Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ upstream_response = await self._prov_api_client.explore_upstream(starting_id=starting_id, depth=depth) typed_upstream_response = CustomLineageResponse.parse_obj(upstream_response.dict()) return typed_upstream_response
[docs] async def explore_downstream(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Explores in the downstream direction (inputs/associations) starting at the specified node handle ID. The search depth is bounded by the depth parameter which has a default maximum of 100. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the downstream direction, by default 100 Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ typed_downstream_response = await self._prov_api_client.explore_downstream(starting_id=starting_id, depth=depth) typed_downstream_response = CustomLineageResponse.parse_obj(typed_downstream_response.dict()) return typed_downstream_response
[docs] async def get_contributing_datasets(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Fetches datasets (inputs) which involved in a model run naturally in the upstream direction. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the upstream direction, by default 100 Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ contributing_datasets = await self._prov_api_client.get_contributing_datasets(starting_id=starting_id, depth=depth) typed_contributing_datasets = CustomLineageResponse.parse_obj(contributing_datasets.dict()) return typed_contributing_datasets
[docs] async def get_effected_datasets(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Fetches datasets (outputs) which are derived from the model run naturally in the downstream direction. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the downstream direction, by default 100. Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ effected_datasets_response = await self._prov_api_client.get_effected_datasets(starting_id=starting_id, depth=depth) typed_effected_datasets = CustomLineageResponse.parse_obj(effected_datasets_response.dict()) return typed_effected_datasets
[docs] async def get_contributing_agents(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Fetches agents (organisations or peoples) that are involved or impacted by the model run. naturally in the upstream direction. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the upstream direction, by default 100. Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ contributing_agents_response = await self._prov_api_client.get_contributing_agents(starting_id=starting_id, depth=depth) typed_contributing_agents = CustomLineageResponse.parse_obj(contributing_agents_response.dict()) return typed_contributing_agents
[docs] async def get_effected_agents(self, starting_id: str, depth: int = PROV_API_DEFAULT_SEARCH_DEPTH) -> CustomLineageResponse: """Fetches agents (organisations or peoples) that are involved or impacted by the model run. naturally in the downstream direction. Parameters ---------- starting_id : str The ID of the entity to start at. depth : int, optional The depth to traverse in the downstream direction, by default 100. Returns ------- CustomLineageResponse A typed response containing the status, node count, and networkx serialised graph response. """ effected_agents_response = await self._prov_api_client.get_effected_agents(starting_id=starting_id, depth=depth) typed_effected_agents = CustomLineageResponse.parse_obj(effected_agents_response.dict()) return typed_effected_agents
[docs] async def register_batch_model_runs(self, batch_model_run_payload: RegisterBatchModelRunRequest) -> RegisterBatchModelRunResponse: """This function allows you to register multiple model runs in one go (batch) asynchronously. Note: You can utilise the returned session ID to poll on the JOB API to check status of the model run registration(s). Parameters ---------- batch_model_run_payload : RegisterBatchModelRunRequest A list of model runs (ModelRunRecord objects) Returns ------- RegisterBatchModelRunResponse The job session id derived from job-api for the model-run batch. """ return await self._prov_api_client.register_batch_model_runs(model_run_batch_payload=batch_model_run_payload)
[docs] async def register_model_run(self, model_run_payload: ModelRunRecord) -> RegisterModelRunResponse: """Asynchronously registers a single model run. Note: You can utilise the returned session ID to poll on the JOB API to check status of the model run registration. Parameters ---------- model_run_payload : ModelRunRecord Contains information needed for the model run such as workflow template, inputs, outputs, description etc. Returns ------- RegisterModelRunResponse The job session id derived from job-api for the model-run. """ return await self._prov_api_client.register_model_run(model_run_payload=model_run_payload)
[docs] async def generate_csv_template(self, workflow_template_id: str, file_path: Optional[str] = None, write_to_csv: bool = False) -> str: """Generates a model run csv template to be utilised for creating model runs through csv format.. Parameters ---------- workflow_template_id : str An ID of a created and existing model run workflow template. path_to_save_csv: str, optional The path you want to save the csv file at WITH csv file name. If you don't specify a path this will be saved in a relative directory. write_to_csv: bool, By default False A boolean flag to indicate whether you want to save the template to a csv file or not. Returns ---------- str: Response containing the csv template text (encoded in a csv format). """ file_path = get_and_validate_file_path( file_path=file_path, write_to_file=write_to_csv, default_file_name=workflow_template_id + ".csv") csv_text = await self._prov_api_client.generate_csv_template(workflow_template_id=workflow_template_id) if csv_text is None: raise ValueError( f"No data returned for generate CSV template workflow template ID {workflow_template_id}") # Write to file if CSV content is returned and write_to_csv is True and file path is assigned. if write_to_csv: if file_path is None: raise ValueError("File path is not set for writing the CSV.") write_file_helper(file_path=file_path, content=csv_text) return csv_text
[docs] async def convert_model_runs(self, model_run_content: str) -> ConvertModelRunsResponse: """Converts model run with model_run_content provided as a string. Parameters ---------- model_run_content : str The model run information containing the necessary parameters for model run lodge. Returns ------- ConvertModelRunsResponse Returns the model run information in an interactive python datatype. Raises ------ Exception Exception raised when converting string to bytes. """ response = await self._prov_api_client.convert_model_runs_to_csv(csv_file_contents=model_run_content) return response
[docs] async def convert_model_runs_to_csv_with_file(self, file_path: str) -> ConvertModelRunsResponse: """Reads a CSV file, and it's defined model run contents and lodges a model run. Parameters ---------- file_path : str The path of an existing created CSV file containing the necessary parameters for model run lodge. Returns ------- ConvertModelRunsResponse Returns the model run information in an interactive python datatype. """ file_content = read_file_helper(file_path=file_path) response = await self._prov_api_client.convert_model_runs_to_csv(csv_file_contents=file_content) return response
[docs] async def regenerate_csv_from_model_run_batch(self, batch_id: str, file_path: Optional[str] = None, write_to_csv: bool = False) -> str: """Regenerate/create a csv file containing model run information from a model run batch job. The batch id must exist in the system. Parameters ---------- batch_id : str Obtained from creating a batch model run. file_path: str, optional The path you want to save the csv file at WITH CSV file name. If you don't specify a path this will be saved in a relative directory. write_to_csv: bool, By default False A boolean flag to indicate whether you want to save the template to a csv file or not. Returns ---------- str: Response containing the model run information (encoded in csv format). """ file_path = get_and_validate_file_path( file_path=file_path, write_to_file=write_to_csv, default_file_name=batch_id + ".csv") csv_text: str = await self._prov_api_client.regenerate_csv_from_model_run_batch(batch_id=batch_id) if csv_text is None: raise ValueError(f"No data returned for batch ID {batch_id}") # Write to file if CSV content is returned and write_to_csv is True and file path is assigned. if write_to_csv: if file_path is None: raise ValueError("File path is not set for writing the CSV.") write_file_helper(file_path=file_path, content=csv_text) return csv_text
[docs] async def generate_report(self, report_request:GenerateReportRequest, file_path: str = DEFAULT_RELATIVE_FILE_PATH) -> None: """Generates a provenance report from a Study or Model Run Entity containing the associated inputs, model runs and outputs involved. The report is generated in `.docx` and saved at relative directory level. Parameters ---------- report_request : GenerateReportRequest The request object containing the parameters for generating the report, including the `id`, `item_subtype`, and `depth`. """ # Calls API endpoint to generate report document. generated_word_file = await self._prov_api_client.generate_report( report_request=report_request ) # Sanitize the id to avoid file system errors sanitized_filename = report_request.id.replace("/", "_") + " - Study Close Out Report.docx" # Append file path and file-name together file_path = file_path + sanitized_filename # Writes content into word docx file. write_file_helper(file_path=file_path,content = generated_word_file)