trustyai-explainability
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/endpoints/consumer/consumer_endpoint.py‎
Lines changed: 36 additions & 0 deletions b/‎src/endpoints/consumer/consumer_endpoint.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎src/endpoints/metadata.py‎
Lines changed: 233 additions & 11 deletions b/‎src/endpoints/metadata.py‎
Lines changed: 233 additions & 11 deletions
@@ -12,7 +12,7 @@ dependencies = [
     "pandas>=2.2.3,<3",
     "prometheus-client>=0.21.1,<0.24",
     "pydantic>=2.4.2,<3",
-    "uvicorn>=0.34.0,<0.39",
+    "hypercorn>=0.17.0,<0.19",
     "protobuf>=4.24.4,<7",
     "requests>=2.31.0,<3",
     "cryptography>=44.0.2,<47",
 
@@ -14,6 +14,8 @@
 from src.service.data.storage import get_storage_interface
 from src.service.utils import list_utils
 from src.service.data.modelmesh_parser import ModelMeshPayloadParser, PartialPayload
+from src.service.data.datasources.data_source import DataSource
+from src.service.data.shared_data_source import get_shared_data_source
 
 # Define constants locally to avoid import issues
 INPUT_SUFFIX = "_inputs"
@@ -31,6 +33,10 @@
 unreconciled_inputs = {}
 unreconciled_outputs = {}
 
+def get_data_source():
+    """Get the shared data source instance."""
+    return get_shared_data_source()
+
 
 class PartialPayloadId(BaseModel):
     prediction_id: Optional[str] = None
@@ -236,6 +242,21 @@ async def reconcile_modelmesh_payloads(
         f"Current storage shapes for {model_id}: Inputs={shapes[0]}, Outputs={shapes[1]}, Metadata={shapes[2]}"
     )
 
+    # Add model to known models set so it can be discovered by the scheduler
+    data_source = get_data_source()
+    await data_source.add_model_to_known(model_id)
+    known_models = await data_source.get_known_models()
+    logger.info(f"Added model {model_id} to known models set. Current known models: {list(known_models)}")
+    logger.debug(f"DataSource instance id: {id(data_source)}")
+
+    # Mark that inference data has been recorded for this model
+    try:
+        metadata = await data_source.get_metadata(model_id)
+        metadata.set_recorded_inferences(True)
+        logger.info(f"Marked model {model_id} as having recorded inferences")
+    except Exception as e:
+        logger.warning(f"Could not update recorded_inferences flag for model {model_id}: {e}")
+
     # Clean up
     await storage_interface.delete_modelmesh_payload(request_id, True)
     await storage_interface.delete_modelmesh_payload(request_id, False)
@@ -338,6 +359,21 @@ async def reconcile(input_payload: KServeInferenceRequest, output_payload: KServ
         f"Metadata={shapes[2]}"
     )
 
+    # Add model to known models set so it can be discovered by the scheduler
+    data_source = get_data_source()
+    await data_source.add_model_to_known(output_payload.model_name)
+    known_models = await data_source.get_known_models()
+    logger.info(f"Added model {output_payload.model_name} to known models set. Current known models: {list(known_models)}")
+    logger.debug(f"DataSource instance id: {id(data_source)}")
+
+    # Mark that inference data has been recorded for this model
+    try:
+        metadata = await data_source.get_metadata(output_payload.model_name)
+        metadata.set_recorded_inferences(True)
+        logger.info(f"Marked model {output_payload.model_name} as having recorded inferences")
+    except Exception as e:
+        logger.warning(f"Could not update recorded_inferences flag for model {output_payload.model_name}: {e}")
+
 
 @router.post("/")
 async def consume_cloud_event(
 
@@ -3,9 +3,24 @@
 from typing import Dict, List
 import logging
 
+from src.service.data.storage import get_storage_interface
+from src.service.data.shared_data_source import get_shared_data_source
+from src.service.prometheus.shared_prometheus_scheduler import get_shared_prometheus_scheduler
+from src.service.constants import INPUT_SUFFIX, OUTPUT_SUFFIX
+
 router = APIRouter()
 logger = logging.getLogger(__name__)
 
+storage_interface = get_storage_interface()
+
+def get_data_source():
+    """Get the shared data source instance."""
+    return get_shared_data_source()
+
+def get_prometheus_scheduler():
+    """Get the shared prometheus scheduler instance."""
+    return get_shared_prometheus_scheduler()
+
 
 class NameMapping(BaseModel):
     modelId: str
@@ -18,15 +33,88 @@ class DataTagging(BaseModel):
     dataTagging: Dict[str, List[List[int]]] = {}
 
 
+class ModelIdRequest(BaseModel):
+    modelId: str
+
+
 @router.get("/info")
 async def get_service_info():
-    """Get a list of all inference ids within a particular model inference."""
+    """Get a comprehensive overview of the model inference datasets collected by TrustyAI and the metric computations that are scheduled over those datasets."""
     try:
-        # TODO: Implement
-        return {"models": [], "metrics": [], "version": "1.0.0rc0"}
+        logger.info("Retrieving service info")
+
+        # Get all known models from shared data source
+        data_source = get_data_source()
+        known_models = await data_source.get_known_models()
+        logger.info(f"DataSource instance id: {id(data_source)}")
+        logger.info(f"Found {len(known_models)} known models: {list(known_models)}")
+
+        service_metadata = {}
+
+        for model_id in known_models:
+            try:
+                # Get metadata for each model
+                model_metadata = await data_source.get_metadata(model_id)
+                num_observations = await data_source.get_num_observations(model_id)
+                has_inferences = await data_source.has_recorded_inferences(model_id)
+
+                # Get scheduled metrics for this model
+                scheduled_metadata = {}
+                try:
+                    scheduler = get_prometheus_scheduler()
+                    if scheduler:
+                        # Get all metric types and count scheduled requests per model
+                        all_requests = scheduler.get_all_requests()  # Should return dict of metric_name -> {request_id -> request}
+                        for metric_name, requests_dict in all_requests.items():
+                            count = 0
+                            for request_id, request in requests_dict.items():
+                                # Check if request is for this model (defensive access)
+                                request_model_id = getattr(request, 'model_id', getattr(request, 'modelId', None))
+                                if request_model_id == model_id:
+                                    count += 1
+                            if count > 0:
+                                scheduled_metadata[metric_name] = count
+                        logger.debug(f"Found {len(scheduled_metadata)} scheduled metric types for model {model_id}")
+                except Exception as e:
+                    logger.warning(f"Error retrieving scheduled metrics for model {model_id}: {e}")
+
+                # Transform to match expected format
+                service_metadata[model_id] = {
+                    "data": {
+                        "observations": num_observations,
+                        "hasRecordedInferences": has_inferences,
+                        "inputTensorName": model_metadata.input_tensor_name if model_metadata else "input",
+                        "outputTensorName": model_metadata.output_tensor_name if model_metadata else "output"
+                    },
+                    "metrics": {
+                        "scheduledMetadata": scheduled_metadata
+                    }
+                }
+
+                logger.debug(f"Retrieved metadata for model {model_id}: observations={num_observations}, hasInferences={has_inferences}")
+
+            except Exception as e:
+                logger.warning(f"Error retrieving metadata for model {model_id}: {e}")
+                # Still include the model in the response but with basic info
+                service_metadata[model_id] = {
+                    "data": {
+                        "observations": 0,
+                        "hasRecordedInferences": False,
+                        "inputTensorName": "input",
+                        "outputTensorName": "output"
+                    },
+                    "metrics": {"scheduledMetadata": {}},
+                    "error": str(e)
+                }
+
+        logger.info(f"Successfully retrieved service info for {len(service_metadata)} models")
+        return service_metadata
+
     except Exception as e:
         logger.error(f"Error retrieving service info: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Error retrieving service info: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Error retrieving service info: {str(e)}"
+        ) from e
 
 
 @router.get("/info/inference/ids/{model}")
@@ -41,28 +129,162 @@ async def get_inference_ids(model: str, type: str = "all"):
         raise HTTPException(status_code=500, detail=f"Error retrieving inference IDs: {str(e)}")
 
 
+@router.get("/info/names")
+async def get_column_names():
+    """Get the current name mappings for all models."""
+    try:
+        logger.info("Retrieving name mappings for all models")
+
+        # Get all known models from shared data source
+        data_source = get_data_source()
+        known_models = await data_source.get_known_models()
+        logger.info(f"Found {len(known_models)} known models: {list(known_models)}")
+
+        name_mappings = {}
+
+        for model_id in known_models:
+            try:
+                input_dataset_name = model_id + INPUT_SUFFIX
+                output_dataset_name = model_id + OUTPUT_SUFFIX
+
+                input_exists = await storage_interface.dataset_exists(input_dataset_name)
+                output_exists = await storage_interface.dataset_exists(output_dataset_name)
+
+                model_mappings = {
+                    "modelId": model_id,
+                    "inputMapping": {},
+                    "outputMapping": {}
+                }
+
+                # Get input name mappings
+                if input_exists:
+                    try:
+                        original_input_names = await storage_interface.get_original_column_names(input_dataset_name)
+                        aliased_input_names = await storage_interface.get_aliased_column_names(input_dataset_name)
+
+                        if original_input_names is not None and aliased_input_names is not None:
+                            # Create mapping from original to aliased names
+                            input_mapping = {}
+                            for orig, alias in zip(list(original_input_names), list(aliased_input_names)):
+                                if orig != alias:  # Only include if there's an actual mapping
+                                    input_mapping[orig] = alias
+                            model_mappings["inputMapping"] = input_mapping
+
+                    except Exception as e:
+                        logger.warning(f"Error getting input name mappings for {model_id}: {e}")
+
+                # Get output name mappings
+                if output_exists:
+                    try:
+                        original_output_names = await storage_interface.get_original_column_names(output_dataset_name)
+                        aliased_output_names = await storage_interface.get_aliased_column_names(output_dataset_name)
+
+                        if original_output_names is not None and aliased_output_names is not None:
+                            # Create mapping from original to aliased names
+                            output_mapping = {}
+                            for orig, alias in zip(list(original_output_names), list(aliased_output_names)):
+                                if orig != alias:  # Only include if there's an actual mapping
+                                    output_mapping[orig] = alias
+                            model_mappings["outputMapping"] = output_mapping
+
+                    except Exception as e:
+                        logger.warning(f"Error getting output name mappings for {model_id}: {e}")
+
+                name_mappings[model_id] = model_mappings
+
+            except Exception as e:
+                logger.warning(f"Error getting name mappings for model {model_id}: {e}")
+
+        logger.info(f"Successfully retrieved name mappings for {len(name_mappings)} models")
+        return name_mappings
+
+    except Exception as e:
+        logger.error(f"Error retrieving name mappings: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error retrieving name mappings: {str(e)}")
+
+
 @router.post("/info/names")
 async def apply_column_names(name_mapping: NameMapping):
     """Apply a set of human-readable column names to a particular inference."""
     try:
         logger.info(f"Applying column names for model: {name_mapping.modelId}")
-        # TODO: Implement
-        return {"status": "success", "message": "Column names applied successfully"}
+
+        model_id = name_mapping.modelId
+        input_dataset_name = model_id + INPUT_SUFFIX
+        output_dataset_name = model_id + OUTPUT_SUFFIX
+
+        # Check if the model datasets exist
+        input_exists = await storage_interface.dataset_exists(input_dataset_name)
+        output_exists = await storage_interface.dataset_exists(output_dataset_name)
+
+        if not input_exists and not output_exists:
+            error_msg = f"No metadata found for model={model_id}. This can happen if TrustyAI has not yet logged any inferences from this model."
+            logger.error(error_msg)
+            raise HTTPException(status_code=400, detail=error_msg)
+
+        # Apply input mappings if provided and dataset exists
+        if name_mapping.inputMapping and input_exists:
+            logger.info(f"Applying input mappings for model {model_id}: {name_mapping.inputMapping}")
+            await storage_interface.apply_name_mapping(input_dataset_name, name_mapping.inputMapping)
+
+        # Apply output mappings if provided and dataset exists
+        if name_mapping.outputMapping and output_exists:
+            logger.info(f"Applying output mappings for model {model_id}: {name_mapping.outputMapping}")
+            await storage_interface.apply_name_mapping(output_dataset_name, name_mapping.outputMapping)
+
+        logger.info(f"Name mappings successfully applied to model={model_id}")
+        return {"message": "Feature and output name mapping successfully applied."}
+
+    except HTTPException:
+        # Re-raise HTTP exceptions without wrapping
+        raise
     except Exception as e:
         logger.error(f"Error applying column names: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Error applying column names: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Error applying column names: {str(e)}"
+        ) from e
 
 
 @router.delete("/info/names")
-async def remove_column_names(model_id: str):
+async def remove_column_names(request: ModelIdRequest):
     """Remove any column names that have been applied to a particular inference."""
     try:
+        model_id = request.modelId
         logger.info(f"Removing column names for model: {model_id}")
-        # TODO: Implement
-        return {"status": "success", "message": "Column names removed successfully"}
+
+        input_dataset_name = model_id + INPUT_SUFFIX
+        output_dataset_name = model_id + OUTPUT_SUFFIX
+
+        # Check if the model datasets exist
+        input_exists = await storage_interface.dataset_exists(input_dataset_name)
+        output_exists = await storage_interface.dataset_exists(output_dataset_name)
+
+        if not input_exists and not output_exists:
+            error_msg = f"No metadata found for model={model_id}. This can happen if TrustyAI has not yet logged any inferences from this model."
+            logger.error(error_msg)
+            raise HTTPException(status_code=400, detail=error_msg)
+
+        # Clear name mappings from input dataset if it exists
+        if input_exists:
+            logger.info(f"Clearing input name mappings for model {model_id}")
+            await storage_interface.clear_name_mapping(input_dataset_name)
+
+        # Clear name mappings from output dataset if it exists
+        if output_exists:
+            logger.info(f"Clearing output name mappings for model {model_id}")
+            await storage_interface.clear_name_mapping(output_dataset_name)
+
+        logger.info(f"Name mappings successfully cleared from model={model_id}")
+        return {"message": "Feature and output name mapping successfully cleared."}
+
+    except HTTPException:
+        # Re-raise HTTP exceptions without wrapping
+        raise
     except Exception as e:
         logger.error(f"Error removing column names: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Error removing column names: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Error removing column names: {str(e)}"
+        ) from e
 
 
 @router.get("/info/tags")