Перейти к содержанию

Predictors API

culicidaelab.predictors

This package contains the predictor classes for the culicidaelab library.

__all__ = ['MosquitoClassifier', 'MosquitoDetector', 'MosquitoSegmenter', 'ModelWeightsManager'] module-attribute
MosquitoClassifier

Classifies mosquito species from an image.

This class provides methods to load a pre-trained model, predict species from single or batches of images, evaluate model performance, and visualize the classification results.

Attributes:

Name Type Description
arch str

The model architecture (e.g., 'convnext_tiny').

data_dir Path

The directory where datasets are stored.

species_map dict[int, str]

A mapping from class indices to species names.

num_classes int

The total number of species classes.

Source code in culicidaelab\predictors\classifier.py
class MosquitoClassifier(
    BasePredictor[ImageInput, ClassificationPrediction, ClassificationGroundTruthType],
):
    """Classifies mosquito species from an image.

    This class provides methods to load a pre-trained model, predict species
    from single or batches of images, evaluate model performance, and visualize
    the classification results.

    Attributes:
        arch (str): The model architecture (e.g., 'convnext_tiny').
        data_dir (Path): The directory where datasets are stored.
        species_map (dict[int, str]): A mapping from class indices to species names.
        num_classes (int): The total number of species classes.
    """

    def __init__(
        self,
        settings: Settings,
        predictor_type="classifier",
        mode: Literal["torch", "serve"] | None = None,
        load_model: bool = False,
        backend: BaseInferenceBackend | None = None,
    ) -> None:
        """Initializes the MosquitoClassifier.

        Args:
            settings: The main settings object for the library.
            predictor_type: The type of predictor. Defaults to "classifier".
            mode: The mode to run the predictor in, 'torch' or 'serve'.
                If None, it's determined by the environment.
            load_model: If True, load the model upon initialization.
            backend: An optional backend instance. If not provided, one will be
                created based on the mode and settings.
        """

        backend_instance = backend or create_backend(
            predictor_type=predictor_type,
            settings=settings,
            mode=mode,
        )

        super().__init__(
            settings=settings,
            predictor_type=predictor_type,
            backend=backend_instance,
            load_model=load_model,
        )
        self.arch: str | None = self.config.model_arch

        self.data_dir: Path = self.settings.dataset_dir
        self.species_map: dict[int, str] = self.settings.species_config.species_map
        self.labels_map: dict[
            str,
            str,
        ] = self.settings.species_config.class_to_full_name_map
        self.num_classes: int = len(self.species_map)

    # --------------------------------------------------------------------------
    # Public Methods
    # --------------------------------------------------------------------------

    def get_class_index(self, species_name: str) -> int | None:
        """Retrieves the class index for a given species name.

        Args:
            species_name: The name of the species.

        Returns:
            The corresponding class index if found, otherwise None.
        """
        return self.settings.species_config.get_index_by_species(species_name)

    def get_species_names(self) -> list[str]:
        """Gets a sorted list of all species names known to the classifier.

        The list is ordered by the class index.

        Returns:
            A list of species names.
        """
        return [self.species_map[i] for i in sorted(self.species_map.keys())]

    def visualize(
        self,
        input_data: ImageInput,
        predictions: ClassificationPrediction,
        save_path: str | Path | None = None,
    ) -> np.ndarray:
        """Creates a composite image with results and the input image.

        This method generates a visualization by placing the top-k predictions
        in a separate panel to the left of the image.

        Example:
            >>> from culicidaelab.settings import Settings
            >>> from culicidaelab.predictors import MosquitoClassifier
            >>> # This example assumes you have a configured settings object
            >>> settings = Settings()
            >>> classifier = MosquitoClassifier(settings, load_model=True)
            >>> image = "path/to/your/image.jpg"
            >>> prediction = classifier.predict(image)
            >>> viz_image = classifier.visualize(image, prediction, save_path="viz.jpg")

        Args:
            input_data: The input image (NumPy array, path, or PIL Image).
            predictions: The prediction output from the `predict` method.
            save_path: If provided, the image is saved to this path.

        Returns:
            A new image array containing the text panel and original image.

        Raises:
            ValueError: If the input data is invalid or predictions are empty.
            FileNotFoundError: If the image file path doesn't exist.
        """
        image_pil = self._load_and_validate_image(input_data)
        image_np_rgb = np.array(image_pil)

        if not predictions.predictions:
            raise ValueError("Predictions list cannot be empty")

        vis_config = self.config.visualization
        font_scale = vis_config.font_scale
        top_k = self.config.params.get("top_k", 5)

        img_h, img_w, _ = image_np_rgb.shape
        text_panel_width = 250
        padding = 20
        canvas_h = img_h
        canvas_w = text_panel_width + img_w
        canvas = Image.new("RGB", (canvas_w, canvas_h), color="white")
        draw = ImageDraw.Draw(canvas)

        y_offset = 40
        line_height = int(font_scale * 20)
        for classification in predictions.predictions[:top_k]:
            species, conf = classification.species_name, classification.confidence
            display_name = self.labels_map.get(species, species)
            text = f"{display_name}: {conf:.3f}"
            # Load a font (you might want to make this configurable or load once)
            try:
                font_pil = ImageFont.truetype("arial.ttf", int(font_scale * 15))
            except OSError:
                font_pil = ImageFont.load_default()
            draw.text((padding, y_offset), text, fill=vis_config.text_color, font=font_pil)
            y_offset += line_height

        canvas.paste(image_pil, (text_panel_width, 0))

        if save_path:
            save_path = Path(save_path)
            save_path.parent.mkdir(parents=True, exist_ok=True)
            canvas.save(str(save_path))

        return np.array(canvas)

    def visualize_report(
        self,
        report_data: dict[str, Any],
        save_path: str | Path | None = None,
    ) -> None:
        """Generates a visualization of the evaluation report.

        This function creates a figure with a text summary of key performance
        metrics and a heatmap of the confusion matrix.

        Args:
            report_data: The evaluation report from the `evaluate` method.
            save_path: If provided, the figure is saved to this path.

        Raises:
            ValueError: If `report_data` is missing required keys.
        """
        required_keys = [
            "accuracy_mean",
            "confidence_mean",
            "top_5_correct_mean",
            "count",
            "confusion_matrix",
        ]
        if not all(key in report_data for key in required_keys):
            raise ValueError("report_data is missing one or more required keys.")

        conf_matrix = np.array(report_data["confusion_matrix"])
        class_labels = self.get_species_names()

        fig, (ax_text, ax_matrix) = plt.subplots(
            1,
            2,
            figsize=(15, 10),
            gridspec_kw={"width_ratios": [1, 2.5]},
        )
        fig.suptitle("Model Evaluation Report", fontsize=20, y=1.02)

        ax_text.axis("off")
        text_content = (
            f"Summary (on {report_data['count']} samples):\n\n"
            f"Mean Accuracy (Top-1): {report_data['accuracy_mean']:.3f}\n"
            f"Mean Top-5 Accuracy:   {report_data['top_5_correct_mean']:.3f}\n\n"
            f"Mean Confidence:         {report_data['confidence_mean']:.3f}\n"
        )
        if "roc_auc" in report_data:
            text_content += f"ROC-AUC Score:           {report_data['roc_auc']:.3f}\n"
        ax_text.text(
            0.0,
            0.7,
            text_content,
            ha="left",
            va="top",
            transform=ax_text.transAxes,
            fontsize=16,
            family="monospace",
        )

        im = ax_matrix.imshow(conf_matrix, cmap="BuGn", interpolation="nearest")
        tick_marks = np.arange(len(class_labels))
        ax_matrix.set_xticks(tick_marks)
        ax_matrix.set_yticks(tick_marks)
        ax_matrix.set_xticklabels(
            class_labels,
            rotation=30,
            ha="right",
            rotation_mode="anchor",
        )
        ax_matrix.set_yticklabels(class_labels, rotation=0)
        fig.colorbar(im, ax=ax_matrix, fraction=0.046, pad=0.04)

        threshold = conf_matrix.max() / 2.0
        for i in range(len(class_labels)):
            for j in range(len(class_labels)):
                text_color = "white" if conf_matrix[i, j] > threshold else "black"
                ax_matrix.text(
                    j,
                    i,
                    f"{conf_matrix[i, j]}",
                    ha="center",
                    va="center",
                    color=text_color,
                )
        ax_matrix.set_title("Confusion Matrix", fontsize=16)
        ax_matrix.set_xlabel("Predicted Label", fontsize=12)
        ax_matrix.set_ylabel("True Label", fontsize=12)

        plt.tight_layout(rect=(0, 0, 1, 0.96))
        if save_path:
            save_path = Path(save_path)
            save_path.parent.mkdir(parents=True, exist_ok=True)
            plt.savefig(save_path, dpi=300, bbox_inches="tight")
            print(f"Report visualization saved to: {save_path}")
        plt.show()

    # --------------------------------------------------------------------------
    # Private Methods
    # --------------------------------------------------------------------------
    def _convert_raw_to_prediction(self, raw_prediction: np.ndarray) -> ClassificationPrediction:
        """Converts raw model output to a structured classification prediction."""
        species_probs = []

        for idx, prob in enumerate(raw_prediction):
            species_name = self.species_map.get(idx, f"unknown_{idx}")
            species_probs.append(Classification(species_name=species_name, confidence=float(prob)))

        species_probs.sort(key=lambda x: x.confidence, reverse=True)
        return ClassificationPrediction(predictions=species_probs)

    def _evaluate_from_prediction(
        self,
        prediction: ClassificationPrediction,
        ground_truth: ClassificationGroundTruthType,
    ) -> dict[str, float]:
        """Calculates core evaluation metrics for a single prediction."""
        if not prediction.predictions:
            return {
                "accuracy": 0.0,
                "confidence": 0.0,
                "top_1_correct": 0.0,
                "top_5_correct": 0.0,
            }
        ground_truth_species = self.labels_map.get(ground_truth, ground_truth)
        top_pred = prediction.top_prediction()
        pred_species = top_pred.species_name if top_pred else ""
        confidence = top_pred.confidence if top_pred else 0.0
        top_1_correct = float(pred_species == ground_truth_species)
        top_5_species = [p.species_name for p in prediction.predictions[:5]]
        top_5_correct = float(ground_truth_species in top_5_species)
        return {
            "accuracy": top_1_correct,
            "confidence": confidence,
            "top_1_correct": top_1_correct,
            "top_5_correct": top_5_correct,
        }

    def _finalize_evaluation_report(
        self,
        aggregated_metrics: dict[str, float],
        predictions: Sequence[ClassificationPrediction],
        ground_truths: Sequence[ClassificationGroundTruthType],
    ) -> dict[str, Any]:
        """Calculates and adds confusion matrix and ROC-AUC to the final report."""
        species_to_idx = {v: k for k, v in self.species_map.items()}
        class_labels = list(range(self.num_classes))
        y_true_indices, y_pred_indices, y_scores = [], [], []

        for gt, pred_list in zip(ground_truths, predictions):
            gt_str = self.labels_map.get(gt, gt)
            if gt_str in species_to_idx and pred_list.predictions:
                true_idx = species_to_idx[gt_str]
                top_pred = pred_list.top_prediction()
                pred_str = top_pred.species_name if top_pred else ""
                pred_idx = species_to_idx.get(pred_str, -1)
                y_true_indices.append(true_idx)
                y_pred_indices.append(pred_idx)
                prob_vector = [0.0] * self.num_classes
                for classification in pred_list.predictions:
                    class_idx = species_to_idx.get(classification.species_name)
                    if class_idx is not None:
                        prob_vector[class_idx] = classification.confidence
                y_scores.append(prob_vector)

        if y_true_indices and y_pred_indices:
            valid_indices = [i for i, p_idx in enumerate(y_pred_indices) if p_idx != -1]
            if valid_indices:
                cm_y_true = [y_true_indices[i] for i in valid_indices]
                cm_y_pred = [y_pred_indices[i] for i in valid_indices]
                conf_matrix = confusion_matrix(
                    cm_y_true,
                    cm_y_pred,
                    labels=class_labels,
                )
                aggregated_metrics["confusion_matrix"] = conf_matrix.tolist()

        if y_scores and y_true_indices and len(np.unique(y_true_indices)) > 1:
            y_true_binarized = label_binarize(y_true_indices, classes=class_labels)
            try:
                roc_auc = roc_auc_score(
                    y_true_binarized,
                    np.array(y_scores),
                    multi_class="ovr",
                )
                aggregated_metrics["roc_auc"] = roc_auc  # type: ignore
            except ValueError as e:
                self._logger.warning(f"Could not compute ROC AUC score: {e}")
                aggregated_metrics["roc_auc"] = 0.0
        return aggregated_metrics
settings = settings instance-attribute
predictor_type = predictor_type instance-attribute
backend = backend instance-attribute
config: PredictorConfig property

Get the predictor configuration Pydantic model.

Returns:

Name Type Description
PredictorConfig PredictorConfig

The configuration object for this predictor.

model_loaded: bool property

Check if the model is loaded.

Returns:

Name Type Description
bool bool

True if the model is loaded, False otherwise.

arch: str | None = self.config.model_arch instance-attribute
data_dir: Path = self.settings.dataset_dir instance-attribute
species_map: dict[int, str] = self.settings.species_config.species_map instance-attribute
labels_map: dict[str, str] = self.settings.species_config.class_to_full_name_map instance-attribute
num_classes: int = len(self.species_map) instance-attribute
__call__(input_data: InputDataType, **kwargs: Any) -> Any

Convenience method that calls predict().

This allows the predictor instance to be called as a function.

Parameters:

Name Type Description Default
input_data InputDataType

The input data for the prediction.

required
**kwargs Any

Additional arguments to pass to the predict method.

{}

Returns:

Name Type Description
Any Any

The result of the prediction.

Source code in culicidaelab\core\base_predictor.py
def __call__(self, input_data: InputDataType, **kwargs: Any) -> Any:
    """Convenience method that calls `predict()`.

    This allows the predictor instance to be called as a function.

    Args:
        input_data (InputDataType): The input data for the prediction.
        **kwargs (Any): Additional arguments to pass to the `predict` method.

    Returns:
        Any: The result of the prediction.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self.predict(input_data, **kwargs)
__enter__()

Context manager entry.

Loads the model if it is not already loaded.

Returns:

Name Type Description
BasePredictor

The predictor instance.

Source code in culicidaelab\core\base_predictor.py
def __enter__(self):
    """Context manager entry.

    Loads the model if it is not already loaded.

    Returns:
        BasePredictor: The predictor instance.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self
__exit__(exc_type, exc_val, exc_tb)

Context manager exit.

This default implementation does nothing, but can be overridden to handle resource cleanup.

Source code in culicidaelab\core\base_predictor.py
def __exit__(self, exc_type, exc_val, exc_tb):
    """Context manager exit.

    This default implementation does nothing, but can be overridden to handle
    resource cleanup.
    """
    pass
model_context()

A context manager for temporary model loading.

Ensures the model is loaded upon entering the context and unloaded upon exiting if it was not loaded before. This is useful for managing memory in pipelines.

Yields:

Name Type Description
BasePredictor

The predictor instance itself.

Example

with predictor.model_context(): ... predictions = predictor.predict(data)

Source code in culicidaelab\core\base_predictor.py
@contextmanager
def model_context(self):
    """A context manager for temporary model loading.

    Ensures the model is loaded upon entering the context and unloaded
    upon exiting if it was not loaded before. This is useful for managing
    memory in pipelines.

    Yields:
        BasePredictor: The predictor instance itself.

    Example:
        >>> with predictor.model_context():
        ...     predictions = predictor.predict(data)
    """
    was_loaded = self.backend.is_loaded
    try:
        if not was_loaded:
            self.load_model()
        yield self
    finally:
        if not was_loaded and self.backend.is_loaded:
            self.unload_model()
evaluate(ground_truth: GroundTruthType, prediction: PredictionType | None = None, input_data: InputDataType | None = None, **predict_kwargs: Any) -> dict[str, float]

Evaluate a prediction against a ground truth.

Either prediction or input_data must be provided. If prediction is provided, it is used directly. If prediction is None, input_data is used to generate a new prediction.

Parameters:

Name Type Description Default
ground_truth GroundTruthType

The ground truth annotation.

required
prediction PredictionType

A pre-computed prediction.

None
input_data InputDataType

Input data to generate a prediction from, if one isn't provided.

None
**predict_kwargs Any

Additional arguments passed to the predict method.

{}

Returns:

Type Description
dict[str, float]

dict[str, float]: Dictionary containing evaluation metrics for a

dict[str, float]

single item.

Raises:

Type Description
ValueError

If neither prediction nor input_data is provided.

Source code in culicidaelab\core\base_predictor.py
def evaluate(
    self,
    ground_truth: GroundTruthType,
    prediction: PredictionType | None = None,
    input_data: InputDataType | None = None,
    **predict_kwargs: Any,
) -> dict[str, float]:
    """Evaluate a prediction against a ground truth.

    Either `prediction` or `input_data` must be provided. If `prediction`
    is provided, it is used directly. If `prediction` is None, `input_data`
    is used to generate a new prediction.

    Args:
        ground_truth (GroundTruthType): The ground truth annotation.
        prediction (PredictionType, optional): A pre-computed prediction.
        input_data (InputDataType, optional): Input data to generate a
            prediction from, if one isn't provided.
        **predict_kwargs (Any): Additional arguments passed to the `predict`
            method.

    Returns:
        dict[str, float]: Dictionary containing evaluation metrics for a
        single item.

    Raises:
        ValueError: If neither `prediction` nor `input_data` is provided.
    """
    if prediction is None:
        if input_data is not None:
            prediction = self.predict(input_data, **predict_kwargs)
        else:
            raise ValueError(
                "Either 'prediction' or 'input_data' must be provided.",
            )
    return self._evaluate_from_prediction(
        prediction=prediction,
        ground_truth=ground_truth,
    )
evaluate_batch(ground_truth_batch: Sequence[GroundTruthType], predictions_batch: Sequence[PredictionType] | None = None, input_data_batch: Sequence[InputDataType] | None = None, num_workers: int = 1, show_progress: bool = False, **predict_kwargs: Any) -> dict[str, Any]

Evaluate on a batch of items using parallel processing.

Either predictions_batch or input_data_batch must be provided.

Parameters:

Name Type Description Default
ground_truth_batch Sequence[GroundTruthType]

List of corresponding ground truth annotations.

required
predictions_batch Sequence[PredictionType]

A pre-computed list of predictions.

None
input_data_batch Sequence[InputDataType]

List of input data to generate predictions from.

None
num_workers int

Number of parallel workers for calculating metrics.

1
show_progress bool

Whether to show a progress bar.

False
**predict_kwargs Any

Additional arguments passed to predict_batch.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary containing aggregated evaluation metrics.

Raises:

Type Description
ValueError

If the number of predictions does not match the number of ground truths, or if required inputs are missing.

Source code in culicidaelab\core\base_predictor.py
def evaluate_batch(
    self,
    ground_truth_batch: Sequence[GroundTruthType],
    predictions_batch: Sequence[PredictionType] | None = None,
    input_data_batch: Sequence[InputDataType] | None = None,
    num_workers: int = 1,
    show_progress: bool = False,
    **predict_kwargs: Any,
) -> dict[str, Any]:
    """Evaluate on a batch of items using parallel processing.

    Either `predictions_batch` or `input_data_batch` must be provided.

    Args:
        ground_truth_batch (Sequence[GroundTruthType]): List of corresponding
            ground truth annotations.
        predictions_batch (Sequence[PredictionType], optional): A pre-computed
            list of predictions.
        input_data_batch (Sequence[InputDataType], optional): List of input data
            to generate predictions from.
        num_workers (int): Number of parallel workers for calculating metrics.
        show_progress (bool): Whether to show a progress bar.
        **predict_kwargs (Any): Additional arguments passed to `predict_batch`.

    Returns:
        dict[str, Any]: Dictionary containing aggregated evaluation metrics.

    Raises:
        ValueError: If the number of predictions does not match the number
            of ground truths, or if required inputs are missing.
    """
    if predictions_batch is None:
        if input_data_batch is not None:
            predictions_batch = self.predict_batch(
                input_data_batch,
                show_progress=show_progress,
                **predict_kwargs,
            )
        else:
            raise ValueError(
                "Either 'predictions_batch' or 'input_data_batch' must be provided.",
            )

    if len(predictions_batch) != len(ground_truth_batch):
        raise ValueError(
            f"Number of predictions ({len(predictions_batch)}) must match "
            f"number of ground truths ({len(ground_truth_batch)}).",
        )

    per_item_metrics = self._calculate_metrics_parallel(
        predictions_batch,
        ground_truth_batch,
        num_workers,
        show_progress,
    )
    aggregated_metrics = self._aggregate_metrics(per_item_metrics)
    final_report = self._finalize_evaluation_report(
        aggregated_metrics,
        predictions_batch,
        ground_truth_batch,
    )
    return final_report
get_model_info() -> dict[str, Any]

Gets information about the loaded model.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing details about the model, such

dict[str, Any]

as architecture, path, etc.

Source code in culicidaelab\core\base_predictor.py
def get_model_info(self) -> dict[str, Any]:
    """Gets information about the loaded model.

    Returns:
        dict[str, Any]: A dictionary containing details about the model, such
        as architecture, path, etc.
    """
    return {
        "predictor_type": self.predictor_type,
        "model_loaded": self.backend.is_loaded,
        "config": self.config.model_dump(),
    }
load_model() -> None

Delegates model loading to the configured backend.

Source code in culicidaelab\core\base_predictor.py
def load_model(self) -> None:
    """Delegates model loading to the configured backend."""
    if not self.backend.is_loaded:
        self._logger.info(
            f"Loading model for {self.predictor_type} using {self.backend.__class__.__name__}",
        )
        try:
            self.backend.load_model()
            self._logger.info(f"Successfully loaded model for {self.predictor_type}")
        except Exception as e:
            self._logger.error(f"Failed to load model for {self.predictor_type}: {e}")
            raise RuntimeError(f"Failed to load model for {self.predictor_type}: {e}") from e
predict(input_data: InputDataType, **kwargs: Any) -> PredictionType

Makes a prediction on a single input data sample.

Parameters:

Name Type Description Default
input_data InputDataType

The input data (e.g., an image as a NumPy array) to make a prediction on.

required
**kwargs Any

Additional predictor-specific arguments.

{}

Returns:

Name Type Description
PredictionType PredictionType

The prediction result, with a format specific to the

PredictionType

predictor type.

Raises:

Type Description
RuntimeError

If the model is not loaded before calling this method.

Source code in culicidaelab\core\base_predictor.py
def predict(
    self,
    input_data: InputDataType,
    **kwargs: Any,
) -> PredictionType:
    """Makes a prediction on a single input data sample.

    Args:
        input_data (InputDataType): The input data (e.g., an image as a NumPy
            array) to make a prediction on.
        **kwargs (Any): Additional predictor-specific arguments.

    Returns:
        PredictionType: The prediction result, with a format specific to the
        predictor type.

    Raises:
        RuntimeError: If the model is not loaded before calling this method.
    """
    if not self.backend.is_loaded:
        try:
            self.load_model()
        except Exception as e:
            raise RuntimeError(f"Failed to load model: {e}") from e

    image = self._load_and_validate_image(input_data)

    raw_output = self.backend.predict(image, **kwargs)

    return self._convert_raw_to_prediction(raw_output)
predict_batch(input_data_batch: Sequence[InputDataType], show_progress: bool = False, **kwargs: Any) -> list[PredictionType]

Makes predictions on a batch of inputs by delegating to the backend.

Parameters:

Name Type Description Default
input_data_batch Sequence[InputDataType]

A sequence of inputs.

required
show_progress bool

If True, displays a progress bar.

False
**kwargs Any

Additional arguments for the backend's predict_batch.

{}

Returns:

Type Description
list[PredictionType]

list[PredictionType]: A list of prediction results.

Source code in culicidaelab\core\base_predictor.py
def predict_batch(
    self,
    input_data_batch: Sequence[InputDataType],
    show_progress: bool = False,
    **kwargs: Any,
) -> list[PredictionType]:
    """Makes predictions on a batch of inputs by delegating to the backend.

    Args:
        input_data_batch (Sequence[InputDataType]): A sequence of inputs.
        show_progress (bool): If True, displays a progress bar.
        **kwargs (Any): Additional arguments for the backend's `predict_batch`.

    Returns:
        list[PredictionType]: A list of prediction results.
    """
    if not input_data_batch:
        return []

    if not self.backend.is_loaded:
        self.load_model()

    raw_predictions = self.backend.predict_batch(list(input_data_batch), **kwargs)
    final_predictions = [self._convert_raw_to_prediction(raw_pred) for raw_pred in raw_predictions]
    return final_predictions
unload_model() -> None

Unloads the model to free memory.

Source code in culicidaelab\core\base_predictor.py
def unload_model(self) -> None:
    """Unloads the model to free memory."""
    if self.backend.is_loaded:
        self.backend.unload_model()
        self._logger.info(f"Unloaded model for {self.predictor_type}")
__init__(settings: Settings, predictor_type='classifier', mode: Literal['torch', 'serve'] | None = None, load_model: bool = False, backend: BaseInferenceBackend | None = None) -> None

Initializes the MosquitoClassifier.

Parameters:

Name Type Description Default
settings Settings

The main settings object for the library.

required
predictor_type

The type of predictor. Defaults to "classifier".

'classifier'
mode Literal['torch', 'serve'] | None

The mode to run the predictor in, 'torch' or 'serve'. If None, it's determined by the environment.

None
load_model bool

If True, load the model upon initialization.

False
backend BaseInferenceBackend | None

An optional backend instance. If not provided, one will be created based on the mode and settings.

None
Source code in culicidaelab\predictors\classifier.py
def __init__(
    self,
    settings: Settings,
    predictor_type="classifier",
    mode: Literal["torch", "serve"] | None = None,
    load_model: bool = False,
    backend: BaseInferenceBackend | None = None,
) -> None:
    """Initializes the MosquitoClassifier.

    Args:
        settings: The main settings object for the library.
        predictor_type: The type of predictor. Defaults to "classifier".
        mode: The mode to run the predictor in, 'torch' or 'serve'.
            If None, it's determined by the environment.
        load_model: If True, load the model upon initialization.
        backend: An optional backend instance. If not provided, one will be
            created based on the mode and settings.
    """

    backend_instance = backend or create_backend(
        predictor_type=predictor_type,
        settings=settings,
        mode=mode,
    )

    super().__init__(
        settings=settings,
        predictor_type=predictor_type,
        backend=backend_instance,
        load_model=load_model,
    )
    self.arch: str | None = self.config.model_arch

    self.data_dir: Path = self.settings.dataset_dir
    self.species_map: dict[int, str] = self.settings.species_config.species_map
    self.labels_map: dict[
        str,
        str,
    ] = self.settings.species_config.class_to_full_name_map
    self.num_classes: int = len(self.species_map)
get_class_index(species_name: str) -> int | None

Retrieves the class index for a given species name.

Parameters:

Name Type Description Default
species_name str

The name of the species.

required

Returns:

Type Description
int | None

The corresponding class index if found, otherwise None.

Source code in culicidaelab\predictors\classifier.py
def get_class_index(self, species_name: str) -> int | None:
    """Retrieves the class index for a given species name.

    Args:
        species_name: The name of the species.

    Returns:
        The corresponding class index if found, otherwise None.
    """
    return self.settings.species_config.get_index_by_species(species_name)
get_species_names() -> list[str]

Gets a sorted list of all species names known to the classifier.

The list is ordered by the class index.

Returns:

Type Description
list[str]

A list of species names.

Source code in culicidaelab\predictors\classifier.py
def get_species_names(self) -> list[str]:
    """Gets a sorted list of all species names known to the classifier.

    The list is ordered by the class index.

    Returns:
        A list of species names.
    """
    return [self.species_map[i] for i in sorted(self.species_map.keys())]
visualize(input_data: ImageInput, predictions: ClassificationPrediction, save_path: str | Path | None = None) -> np.ndarray

Creates a composite image with results and the input image.

This method generates a visualization by placing the top-k predictions in a separate panel to the left of the image.

Example

from culicidaelab.settings import Settings from culicidaelab.predictors import MosquitoClassifier

This example assumes you have a configured settings object

settings = Settings() classifier = MosquitoClassifier(settings, load_model=True) image = "path/to/your/image.jpg" prediction = classifier.predict(image) viz_image = classifier.visualize(image, prediction, save_path="viz.jpg")

Parameters:

Name Type Description Default
input_data ImageInput

The input image (NumPy array, path, or PIL Image).

required
predictions ClassificationPrediction

The prediction output from the predict method.

required
save_path str | Path | None

If provided, the image is saved to this path.

None

Returns:

Type Description
ndarray

A new image array containing the text panel and original image.

Raises:

Type Description
ValueError

If the input data is invalid or predictions are empty.

FileNotFoundError

If the image file path doesn't exist.

Source code in culicidaelab\predictors\classifier.py
def visualize(
    self,
    input_data: ImageInput,
    predictions: ClassificationPrediction,
    save_path: str | Path | None = None,
) -> np.ndarray:
    """Creates a composite image with results and the input image.

    This method generates a visualization by placing the top-k predictions
    in a separate panel to the left of the image.

    Example:
        >>> from culicidaelab.settings import Settings
        >>> from culicidaelab.predictors import MosquitoClassifier
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> classifier = MosquitoClassifier(settings, load_model=True)
        >>> image = "path/to/your/image.jpg"
        >>> prediction = classifier.predict(image)
        >>> viz_image = classifier.visualize(image, prediction, save_path="viz.jpg")

    Args:
        input_data: The input image (NumPy array, path, or PIL Image).
        predictions: The prediction output from the `predict` method.
        save_path: If provided, the image is saved to this path.

    Returns:
        A new image array containing the text panel and original image.

    Raises:
        ValueError: If the input data is invalid or predictions are empty.
        FileNotFoundError: If the image file path doesn't exist.
    """
    image_pil = self._load_and_validate_image(input_data)
    image_np_rgb = np.array(image_pil)

    if not predictions.predictions:
        raise ValueError("Predictions list cannot be empty")

    vis_config = self.config.visualization
    font_scale = vis_config.font_scale
    top_k = self.config.params.get("top_k", 5)

    img_h, img_w, _ = image_np_rgb.shape
    text_panel_width = 250
    padding = 20
    canvas_h = img_h
    canvas_w = text_panel_width + img_w
    canvas = Image.new("RGB", (canvas_w, canvas_h), color="white")
    draw = ImageDraw.Draw(canvas)

    y_offset = 40
    line_height = int(font_scale * 20)
    for classification in predictions.predictions[:top_k]:
        species, conf = classification.species_name, classification.confidence
        display_name = self.labels_map.get(species, species)
        text = f"{display_name}: {conf:.3f}"
        # Load a font (you might want to make this configurable or load once)
        try:
            font_pil = ImageFont.truetype("arial.ttf", int(font_scale * 15))
        except OSError:
            font_pil = ImageFont.load_default()
        draw.text((padding, y_offset), text, fill=vis_config.text_color, font=font_pil)
        y_offset += line_height

    canvas.paste(image_pil, (text_panel_width, 0))

    if save_path:
        save_path = Path(save_path)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        canvas.save(str(save_path))

    return np.array(canvas)
visualize_report(report_data: dict[str, Any], save_path: str | Path | None = None) -> None

Generates a visualization of the evaluation report.

This function creates a figure with a text summary of key performance metrics and a heatmap of the confusion matrix.

Parameters:

Name Type Description Default
report_data dict[str, Any]

The evaluation report from the evaluate method.

required
save_path str | Path | None

If provided, the figure is saved to this path.

None

Raises:

Type Description
ValueError

If report_data is missing required keys.

Source code in culicidaelab\predictors\classifier.py
def visualize_report(
    self,
    report_data: dict[str, Any],
    save_path: str | Path | None = None,
) -> None:
    """Generates a visualization of the evaluation report.

    This function creates a figure with a text summary of key performance
    metrics and a heatmap of the confusion matrix.

    Args:
        report_data: The evaluation report from the `evaluate` method.
        save_path: If provided, the figure is saved to this path.

    Raises:
        ValueError: If `report_data` is missing required keys.
    """
    required_keys = [
        "accuracy_mean",
        "confidence_mean",
        "top_5_correct_mean",
        "count",
        "confusion_matrix",
    ]
    if not all(key in report_data for key in required_keys):
        raise ValueError("report_data is missing one or more required keys.")

    conf_matrix = np.array(report_data["confusion_matrix"])
    class_labels = self.get_species_names()

    fig, (ax_text, ax_matrix) = plt.subplots(
        1,
        2,
        figsize=(15, 10),
        gridspec_kw={"width_ratios": [1, 2.5]},
    )
    fig.suptitle("Model Evaluation Report", fontsize=20, y=1.02)

    ax_text.axis("off")
    text_content = (
        f"Summary (on {report_data['count']} samples):\n\n"
        f"Mean Accuracy (Top-1): {report_data['accuracy_mean']:.3f}\n"
        f"Mean Top-5 Accuracy:   {report_data['top_5_correct_mean']:.3f}\n\n"
        f"Mean Confidence:         {report_data['confidence_mean']:.3f}\n"
    )
    if "roc_auc" in report_data:
        text_content += f"ROC-AUC Score:           {report_data['roc_auc']:.3f}\n"
    ax_text.text(
        0.0,
        0.7,
        text_content,
        ha="left",
        va="top",
        transform=ax_text.transAxes,
        fontsize=16,
        family="monospace",
    )

    im = ax_matrix.imshow(conf_matrix, cmap="BuGn", interpolation="nearest")
    tick_marks = np.arange(len(class_labels))
    ax_matrix.set_xticks(tick_marks)
    ax_matrix.set_yticks(tick_marks)
    ax_matrix.set_xticklabels(
        class_labels,
        rotation=30,
        ha="right",
        rotation_mode="anchor",
    )
    ax_matrix.set_yticklabels(class_labels, rotation=0)
    fig.colorbar(im, ax=ax_matrix, fraction=0.046, pad=0.04)

    threshold = conf_matrix.max() / 2.0
    for i in range(len(class_labels)):
        for j in range(len(class_labels)):
            text_color = "white" if conf_matrix[i, j] > threshold else "black"
            ax_matrix.text(
                j,
                i,
                f"{conf_matrix[i, j]}",
                ha="center",
                va="center",
                color=text_color,
            )
    ax_matrix.set_title("Confusion Matrix", fontsize=16)
    ax_matrix.set_xlabel("Predicted Label", fontsize=12)
    ax_matrix.set_ylabel("True Label", fontsize=12)

    plt.tight_layout(rect=(0, 0, 1, 0.96))
    if save_path:
        save_path = Path(save_path)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(save_path, dpi=300, bbox_inches="tight")
        print(f"Report visualization saved to: {save_path}")
    plt.show()
MosquitoDetector

Detects mosquitos in images using a YOLO model.

This class loads a model and provides methods for predicting bounding boxes on single or batches of images, visualizing results, and evaluating detection performance against ground truth data.

Attributes:

Name Type Description
confidence_threshold float

The minimum confidence score for a detection to be considered valid.

iou_threshold float

The IoU threshold for non-maximum suppression.

max_detections int

The maximum number of detections to return per image.

Source code in culicidaelab\predictors\detector.py
class MosquitoDetector(
    BasePredictor[ImageInput, DetectionPrediction, DetectionGroundTruthType],
):
    """Detects mosquitos in images using a YOLO model.

    This class loads a model and provides methods for predicting bounding
    boxes on single or batches of images, visualizing results, and evaluating
    detection performance against ground truth data.

    Attributes:
        confidence_threshold (float): The minimum confidence score for a
            detection to be considered valid.
        iou_threshold (float): The IoU threshold for non-maximum suppression.
        max_detections (int): The maximum number of detections to return per image.
    """

    def __init__(
        self,
        settings: Settings,
        predictor_type="detector",
        mode: Literal["torch", "serve"] | None = None,
        load_model: bool = False,
        backend: BaseInferenceBackend | None = None,
    ) -> None:
        """Initializes the MosquitoDetector.

        Args:
            settings: The main settings object for the library.
            predictor_type: The type of predictor. Defaults to "detector".
            mode: The mode to run the predictor in, 'torch' or 'serve'.
                If None, it's determined by the environment.
            load_model: If True, load the model upon initialization.
            backend: An optional backend instance. If not provided, one will be
                created based on the mode and settings.
        """

        backend_instance = backend or create_backend(
            predictor_type=predictor_type,
            settings=settings,
            mode=mode,
        )

        super().__init__(
            settings=settings,
            predictor_type=predictor_type,
            backend=backend_instance,
            load_model=load_model,
        )
        self.confidence_threshold: float = self.config.confidence or 0.5
        self.iou_threshold: float = self.config.params.get("iou_threshold", 0.45)
        self.max_detections: int = self.config.params.get("max_detections", 300)

    def predict(self, input_data: ImageInput, **kwargs: Any) -> DetectionPrediction:
        """Detects mosquitos in a single image.

        Example:
            >>> from culicidaelab.settings import Settings
            >>> from culicidaelab.predictors import MosquitoDetector
            >>> # This example assumes you have a configured settings object
            >>> settings = Settings()
            >>> detector = MosquitoDetector(settings, load_model=True)
            >>> image = "path/to/your/image.jpg"
            >>> detections = detector.predict(image)
            >>> for detection in detections.detections:
            ...     print(detection.box, detection.confidence)

        Args:
            input_data: The input image as a NumPy array or other supported format.
            **kwargs: Optional keyword arguments, including:
                confidence_threshold (float): Override the default confidence
                    threshold for this prediction.

        Returns:
            A `DetectionPrediction` object containing a list of
            `Detection` instances. Returns an empty list if no mosquitos are found.

        Raises:
            RuntimeError: If the model fails to load or if prediction fails.
        """
        if not self.backend.is_loaded:
            self.load_model()

        confidence_threshold = kwargs.get(
            "confidence_threshold",
            self.confidence_threshold,
        )

        try:
            input_image = self._load_and_validate_image(input_data)
            # The backend now returns a standardized NumPy array (N, 5) -> [x1, y1, x2, y2, conf]
            results_array = self.backend.predict(
                input_data=input_image,
                conf=confidence_threshold,
                iou=self.iou_threshold,
                max_det=self.max_detections,
                verbose=False,
            )
        except Exception as e:
            logger.error(f"Prediction failed: {e}", exc_info=True)
            raise RuntimeError(f"Prediction failed: {e}") from e

        return self._convert_raw_to_prediction(results_array)

    def _convert_raw_to_prediction(self, raw_prediction: np.ndarray) -> DetectionPrediction:
        """Converts raw model output to a structured detection prediction.

        Args:
            raw_prediction: A numpy array with shape (N, 5) where each row is
                [x1, y1, x2, y2, confidence].

        Returns:
            A DetectionPrediction object containing a list of Detection objects.
        """
        detections: list[Detection] = []
        if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 5:
            for row in raw_prediction:
                x1, y1, x2, y2, conf = row
                detections.append(
                    Detection(box=BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2), confidence=conf),
                )
        return DetectionPrediction(detections=detections)

    def visualize(
        self,
        input_data: ImageInput,
        predictions: DetectionPrediction,
        save_path: str | Path | None = None,
    ) -> np.ndarray:
        """Draws predicted bounding boxes on an image.

        Example:
            >>> from culicidaelab.settings import Settings
            >>> from culicidaelab.predictors import MosquitoDetector
            >>> # This example assumes you have a configured settings object
            >>> settings = Settings()
            >>> detector = MosquitoDetector(settings, load_model=True)
            >>> image = "path/to/your/image.jpg"
            >>> detections = detector.predict(image)
            >>> viz_image = detector.visualize(image, detections, save_path="viz.jpg")

        Args:
            input_data: The original image.
            predictions: The `DetectionPrediction` from `predict`.
            save_path: If provided, the output image is saved to this path.

        Returns:
            A new image array with bounding boxes and confidence scores drawn on it.
        """
        vis_img = self._load_and_validate_image(input_data).copy()
        draw = ImageDraw.Draw(vis_img)
        vis_config = self.config.visualization
        font_scale = vis_config.font_scale
        thickness = vis_config.box_thickness

        for detection in predictions.detections:
            box = detection.box
            conf = detection.confidence
            draw.rectangle(
                [(int(box.x1), int(box.y1)), (int(box.x2), int(box.y2))],
                outline=vis_config.box_color,
                width=thickness,
            )
            text = f"{conf:.2f}"
            try:
                font = ImageFont.truetype("arial.ttf", int(font_scale * 20))
            except OSError:
                font = ImageFont.load_default()
            draw.text((int(box.x1), int(box.y1 - 10)), text, fill=vis_config.text_color, font=font)

        if save_path:
            save_path = Path(save_path)
            save_path.parent.mkdir(parents=True, exist_ok=True)
            vis_img.save(str(save_path))

        return np.array(vis_img)

    def _calculate_iou(self, box1_xyxy: tuple, box2_xyxy: tuple) -> float:
        """Calculates Intersection over Union (IoU) for two boxes.

        Args:
            box1_xyxy: The first box in (x1, y1, x2, y2) format.
            box2_xyxy: The second box in (x1, y1, x2, y2) format.

        Returns:
            The IoU score between 0.0 and 1.0.
        """
        b1_x1, b1_y1, b1_x2, b1_y2 = box1_xyxy
        b2_x1, b2_y1, b2_x2, b2_y2 = box2_xyxy

        inter_x1, inter_y1 = max(b1_x1, b2_x1), max(b1_y1, b2_y1)
        inter_x2, inter_y2 = min(b1_x2, b2_x2), min(b1_y2, b2_y2)
        intersection = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

        area1 = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
        area2 = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
        union = area1 + area2 - intersection
        return float(intersection / union) if union > 0 else 0.0

    def _evaluate_from_prediction(
        self,
        prediction: DetectionPrediction,
        ground_truth: DetectionGroundTruthType,
    ) -> dict[str, float]:
        """Calculates detection metrics for a single image's predictions.

        This computes precision, recall, F1-score, Average Precision (AP),
        and mean IoU for a set of predicted boxes against ground truth boxes.

        Args:
            prediction: A `DetectionPrediction` object.
            ground_truth: A list of ground truth boxes: `[(x, y, w, h), ...]`.

        Returns:
            A dictionary containing the calculated metrics.
        """
        if not ground_truth and not prediction.detections:
            return {
                "precision": 1.0,
                "recall": 1.0,
                "f1": 1.0,
                "ap": 1.0,
                "mean_iou": 0.0,
            }
        if not ground_truth:  # False positives exist
            return {
                "precision": 0.0,
                "recall": 0.0,
                "f1": 0.0,
                "ap": 0.0,
                "mean_iou": 0.0,
            }
        if not prediction.detections:  # False negatives exist
            return {
                "precision": 0.0,
                "recall": 0.0,
                "f1": 0.0,
                "ap": 0.0,
                "mean_iou": 0.0,
            }

        predictions_sorted = sorted(prediction.detections, key=lambda x: x.confidence, reverse=True)
        tp = np.zeros(len(predictions_sorted))
        fp = np.zeros(len(predictions_sorted))
        gt_matched = [False] * len(ground_truth)
        all_ious_for_mean = []
        iou_threshold = self.iou_threshold

        for i, pred in enumerate(predictions_sorted):
            pred_box = (pred.box.x1, pred.box.y1, pred.box.x2, pred.box.y2)
            best_iou, best_gt_idx = 0.0, -1

            for j, gt_box in enumerate(ground_truth):
                if not gt_matched[j]:
                    iou = self._calculate_iou(pred_box, gt_box)
                    if iou > best_iou:
                        best_iou = iou
                        best_gt_idx = j

            if best_gt_idx != -1:
                all_ious_for_mean.append(best_iou)

            if best_iou >= iou_threshold:
                if not gt_matched[best_gt_idx]:
                    tp[i] = 1
                    gt_matched[best_gt_idx] = True
                else:  # Matched a GT box that was already matched
                    fp[i] = 1
            else:
                fp[i] = 1

        mean_iou_val = float(np.mean(all_ious_for_mean)) if all_ious_for_mean else 0.0
        fp_cumsum, tp_cumsum = np.cumsum(fp), np.cumsum(tp)
        recall_curve = tp_cumsum / len(ground_truth)
        precision_curve = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-9)

        ap = 0.0
        for t in np.linspace(0, 1, 11):  # 11-point interpolation
            precisions_at_recall_t = precision_curve[recall_curve >= t]
            ap += np.max(precisions_at_recall_t) if len(precisions_at_recall_t) > 0 else 0.0
        ap /= 11.0

        final_precision = precision_curve[-1] if len(precision_curve) > 0 else 0.0
        final_recall = recall_curve[-1] if len(recall_curve) > 0 else 0.0
        f1 = (
            2 * (final_precision * final_recall) / (final_precision + final_recall + 1e-9)
            if (final_precision + final_recall) > 0
            else 0.0
        )

        return {
            "precision": float(final_precision),
            "recall": float(final_recall),
            "f1": float(f1),
            "ap": float(ap),
            "mean_iou": mean_iou_val,
        }
settings = settings instance-attribute
predictor_type = predictor_type instance-attribute
backend = backend instance-attribute
config: PredictorConfig property

Get the predictor configuration Pydantic model.

Returns:

Name Type Description
PredictorConfig PredictorConfig

The configuration object for this predictor.

model_loaded: bool property

Check if the model is loaded.

Returns:

Name Type Description
bool bool

True if the model is loaded, False otherwise.

confidence_threshold: float = self.config.confidence or 0.5 instance-attribute
iou_threshold: float = self.config.params.get('iou_threshold', 0.45) instance-attribute
max_detections: int = self.config.params.get('max_detections', 300) instance-attribute
__call__(input_data: InputDataType, **kwargs: Any) -> Any

Convenience method that calls predict().

This allows the predictor instance to be called as a function.

Parameters:

Name Type Description Default
input_data InputDataType

The input data for the prediction.

required
**kwargs Any

Additional arguments to pass to the predict method.

{}

Returns:

Name Type Description
Any Any

The result of the prediction.

Source code in culicidaelab\core\base_predictor.py
def __call__(self, input_data: InputDataType, **kwargs: Any) -> Any:
    """Convenience method that calls `predict()`.

    This allows the predictor instance to be called as a function.

    Args:
        input_data (InputDataType): The input data for the prediction.
        **kwargs (Any): Additional arguments to pass to the `predict` method.

    Returns:
        Any: The result of the prediction.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self.predict(input_data, **kwargs)
__enter__()

Context manager entry.

Loads the model if it is not already loaded.

Returns:

Name Type Description
BasePredictor

The predictor instance.

Source code in culicidaelab\core\base_predictor.py
def __enter__(self):
    """Context manager entry.

    Loads the model if it is not already loaded.

    Returns:
        BasePredictor: The predictor instance.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self
__exit__(exc_type, exc_val, exc_tb)

Context manager exit.

This default implementation does nothing, but can be overridden to handle resource cleanup.

Source code in culicidaelab\core\base_predictor.py
def __exit__(self, exc_type, exc_val, exc_tb):
    """Context manager exit.

    This default implementation does nothing, but can be overridden to handle
    resource cleanup.
    """
    pass
model_context()

A context manager for temporary model loading.

Ensures the model is loaded upon entering the context and unloaded upon exiting if it was not loaded before. This is useful for managing memory in pipelines.

Yields:

Name Type Description
BasePredictor

The predictor instance itself.

Example

with predictor.model_context(): ... predictions = predictor.predict(data)

Source code in culicidaelab\core\base_predictor.py
@contextmanager
def model_context(self):
    """A context manager for temporary model loading.

    Ensures the model is loaded upon entering the context and unloaded
    upon exiting if it was not loaded before. This is useful for managing
    memory in pipelines.

    Yields:
        BasePredictor: The predictor instance itself.

    Example:
        >>> with predictor.model_context():
        ...     predictions = predictor.predict(data)
    """
    was_loaded = self.backend.is_loaded
    try:
        if not was_loaded:
            self.load_model()
        yield self
    finally:
        if not was_loaded and self.backend.is_loaded:
            self.unload_model()
evaluate(ground_truth: GroundTruthType, prediction: PredictionType | None = None, input_data: InputDataType | None = None, **predict_kwargs: Any) -> dict[str, float]

Evaluate a prediction against a ground truth.

Either prediction or input_data must be provided. If prediction is provided, it is used directly. If prediction is None, input_data is used to generate a new prediction.

Parameters:

Name Type Description Default
ground_truth GroundTruthType

The ground truth annotation.

required
prediction PredictionType

A pre-computed prediction.

None
input_data InputDataType

Input data to generate a prediction from, if one isn't provided.

None
**predict_kwargs Any

Additional arguments passed to the predict method.

{}

Returns:

Type Description
dict[str, float]

dict[str, float]: Dictionary containing evaluation metrics for a

dict[str, float]

single item.

Raises:

Type Description
ValueError

If neither prediction nor input_data is provided.

Source code in culicidaelab\core\base_predictor.py
def evaluate(
    self,
    ground_truth: GroundTruthType,
    prediction: PredictionType | None = None,
    input_data: InputDataType | None = None,
    **predict_kwargs: Any,
) -> dict[str, float]:
    """Evaluate a prediction against a ground truth.

    Either `prediction` or `input_data` must be provided. If `prediction`
    is provided, it is used directly. If `prediction` is None, `input_data`
    is used to generate a new prediction.

    Args:
        ground_truth (GroundTruthType): The ground truth annotation.
        prediction (PredictionType, optional): A pre-computed prediction.
        input_data (InputDataType, optional): Input data to generate a
            prediction from, if one isn't provided.
        **predict_kwargs (Any): Additional arguments passed to the `predict`
            method.

    Returns:
        dict[str, float]: Dictionary containing evaluation metrics for a
        single item.

    Raises:
        ValueError: If neither `prediction` nor `input_data` is provided.
    """
    if prediction is None:
        if input_data is not None:
            prediction = self.predict(input_data, **predict_kwargs)
        else:
            raise ValueError(
                "Either 'prediction' or 'input_data' must be provided.",
            )
    return self._evaluate_from_prediction(
        prediction=prediction,
        ground_truth=ground_truth,
    )
evaluate_batch(ground_truth_batch: Sequence[GroundTruthType], predictions_batch: Sequence[PredictionType] | None = None, input_data_batch: Sequence[InputDataType] | None = None, num_workers: int = 1, show_progress: bool = False, **predict_kwargs: Any) -> dict[str, Any]

Evaluate on a batch of items using parallel processing.

Either predictions_batch or input_data_batch must be provided.

Parameters:

Name Type Description Default
ground_truth_batch Sequence[GroundTruthType]

List of corresponding ground truth annotations.

required
predictions_batch Sequence[PredictionType]

A pre-computed list of predictions.

None
input_data_batch Sequence[InputDataType]

List of input data to generate predictions from.

None
num_workers int

Number of parallel workers for calculating metrics.

1
show_progress bool

Whether to show a progress bar.

False
**predict_kwargs Any

Additional arguments passed to predict_batch.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary containing aggregated evaluation metrics.

Raises:

Type Description
ValueError

If the number of predictions does not match the number of ground truths, or if required inputs are missing.

Source code in culicidaelab\core\base_predictor.py
def evaluate_batch(
    self,
    ground_truth_batch: Sequence[GroundTruthType],
    predictions_batch: Sequence[PredictionType] | None = None,
    input_data_batch: Sequence[InputDataType] | None = None,
    num_workers: int = 1,
    show_progress: bool = False,
    **predict_kwargs: Any,
) -> dict[str, Any]:
    """Evaluate on a batch of items using parallel processing.

    Either `predictions_batch` or `input_data_batch` must be provided.

    Args:
        ground_truth_batch (Sequence[GroundTruthType]): List of corresponding
            ground truth annotations.
        predictions_batch (Sequence[PredictionType], optional): A pre-computed
            list of predictions.
        input_data_batch (Sequence[InputDataType], optional): List of input data
            to generate predictions from.
        num_workers (int): Number of parallel workers for calculating metrics.
        show_progress (bool): Whether to show a progress bar.
        **predict_kwargs (Any): Additional arguments passed to `predict_batch`.

    Returns:
        dict[str, Any]: Dictionary containing aggregated evaluation metrics.

    Raises:
        ValueError: If the number of predictions does not match the number
            of ground truths, or if required inputs are missing.
    """
    if predictions_batch is None:
        if input_data_batch is not None:
            predictions_batch = self.predict_batch(
                input_data_batch,
                show_progress=show_progress,
                **predict_kwargs,
            )
        else:
            raise ValueError(
                "Either 'predictions_batch' or 'input_data_batch' must be provided.",
            )

    if len(predictions_batch) != len(ground_truth_batch):
        raise ValueError(
            f"Number of predictions ({len(predictions_batch)}) must match "
            f"number of ground truths ({len(ground_truth_batch)}).",
        )

    per_item_metrics = self._calculate_metrics_parallel(
        predictions_batch,
        ground_truth_batch,
        num_workers,
        show_progress,
    )
    aggregated_metrics = self._aggregate_metrics(per_item_metrics)
    final_report = self._finalize_evaluation_report(
        aggregated_metrics,
        predictions_batch,
        ground_truth_batch,
    )
    return final_report
get_model_info() -> dict[str, Any]

Gets information about the loaded model.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing details about the model, such

dict[str, Any]

as architecture, path, etc.

Source code in culicidaelab\core\base_predictor.py
def get_model_info(self) -> dict[str, Any]:
    """Gets information about the loaded model.

    Returns:
        dict[str, Any]: A dictionary containing details about the model, such
        as architecture, path, etc.
    """
    return {
        "predictor_type": self.predictor_type,
        "model_loaded": self.backend.is_loaded,
        "config": self.config.model_dump(),
    }
load_model() -> None

Delegates model loading to the configured backend.

Source code in culicidaelab\core\base_predictor.py
def load_model(self) -> None:
    """Delegates model loading to the configured backend."""
    if not self.backend.is_loaded:
        self._logger.info(
            f"Loading model for {self.predictor_type} using {self.backend.__class__.__name__}",
        )
        try:
            self.backend.load_model()
            self._logger.info(f"Successfully loaded model for {self.predictor_type}")
        except Exception as e:
            self._logger.error(f"Failed to load model for {self.predictor_type}: {e}")
            raise RuntimeError(f"Failed to load model for {self.predictor_type}: {e}") from e
predict_batch(input_data_batch: Sequence[InputDataType], show_progress: bool = False, **kwargs: Any) -> list[PredictionType]

Makes predictions on a batch of inputs by delegating to the backend.

Parameters:

Name Type Description Default
input_data_batch Sequence[InputDataType]

A sequence of inputs.

required
show_progress bool

If True, displays a progress bar.

False
**kwargs Any

Additional arguments for the backend's predict_batch.

{}

Returns:

Type Description
list[PredictionType]

list[PredictionType]: A list of prediction results.

Source code in culicidaelab\core\base_predictor.py
def predict_batch(
    self,
    input_data_batch: Sequence[InputDataType],
    show_progress: bool = False,
    **kwargs: Any,
) -> list[PredictionType]:
    """Makes predictions on a batch of inputs by delegating to the backend.

    Args:
        input_data_batch (Sequence[InputDataType]): A sequence of inputs.
        show_progress (bool): If True, displays a progress bar.
        **kwargs (Any): Additional arguments for the backend's `predict_batch`.

    Returns:
        list[PredictionType]: A list of prediction results.
    """
    if not input_data_batch:
        return []

    if not self.backend.is_loaded:
        self.load_model()

    raw_predictions = self.backend.predict_batch(list(input_data_batch), **kwargs)
    final_predictions = [self._convert_raw_to_prediction(raw_pred) for raw_pred in raw_predictions]
    return final_predictions
unload_model() -> None

Unloads the model to free memory.

Source code in culicidaelab\core\base_predictor.py
def unload_model(self) -> None:
    """Unloads the model to free memory."""
    if self.backend.is_loaded:
        self.backend.unload_model()
        self._logger.info(f"Unloaded model for {self.predictor_type}")
__init__(settings: Settings, predictor_type='detector', mode: Literal['torch', 'serve'] | None = None, load_model: bool = False, backend: BaseInferenceBackend | None = None) -> None

Initializes the MosquitoDetector.

Parameters:

Name Type Description Default
settings Settings

The main settings object for the library.

required
predictor_type

The type of predictor. Defaults to "detector".

'detector'
mode Literal['torch', 'serve'] | None

The mode to run the predictor in, 'torch' or 'serve'. If None, it's determined by the environment.

None
load_model bool

If True, load the model upon initialization.

False
backend BaseInferenceBackend | None

An optional backend instance. If not provided, one will be created based on the mode and settings.

None
Source code in culicidaelab\predictors\detector.py
def __init__(
    self,
    settings: Settings,
    predictor_type="detector",
    mode: Literal["torch", "serve"] | None = None,
    load_model: bool = False,
    backend: BaseInferenceBackend | None = None,
) -> None:
    """Initializes the MosquitoDetector.

    Args:
        settings: The main settings object for the library.
        predictor_type: The type of predictor. Defaults to "detector".
        mode: The mode to run the predictor in, 'torch' or 'serve'.
            If None, it's determined by the environment.
        load_model: If True, load the model upon initialization.
        backend: An optional backend instance. If not provided, one will be
            created based on the mode and settings.
    """

    backend_instance = backend or create_backend(
        predictor_type=predictor_type,
        settings=settings,
        mode=mode,
    )

    super().__init__(
        settings=settings,
        predictor_type=predictor_type,
        backend=backend_instance,
        load_model=load_model,
    )
    self.confidence_threshold: float = self.config.confidence or 0.5
    self.iou_threshold: float = self.config.params.get("iou_threshold", 0.45)
    self.max_detections: int = self.config.params.get("max_detections", 300)
predict(input_data: ImageInput, **kwargs: Any) -> DetectionPrediction

Detects mosquitos in a single image.

Example

from culicidaelab.settings import Settings from culicidaelab.predictors import MosquitoDetector

This example assumes you have a configured settings object

settings = Settings() detector = MosquitoDetector(settings, load_model=True) image = "path/to/your/image.jpg" detections = detector.predict(image) for detection in detections.detections: ... print(detection.box, detection.confidence)

Parameters:

Name Type Description Default
input_data ImageInput

The input image as a NumPy array or other supported format.

required
**kwargs Any

Optional keyword arguments, including: confidence_threshold (float): Override the default confidence threshold for this prediction.

{}

Returns:

Type Description
DetectionPrediction

A DetectionPrediction object containing a list of

DetectionPrediction

Detection instances. Returns an empty list if no mosquitos are found.

Raises:

Type Description
RuntimeError

If the model fails to load or if prediction fails.

Source code in culicidaelab\predictors\detector.py
def predict(self, input_data: ImageInput, **kwargs: Any) -> DetectionPrediction:
    """Detects mosquitos in a single image.

    Example:
        >>> from culicidaelab.settings import Settings
        >>> from culicidaelab.predictors import MosquitoDetector
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> detector = MosquitoDetector(settings, load_model=True)
        >>> image = "path/to/your/image.jpg"
        >>> detections = detector.predict(image)
        >>> for detection in detections.detections:
        ...     print(detection.box, detection.confidence)

    Args:
        input_data: The input image as a NumPy array or other supported format.
        **kwargs: Optional keyword arguments, including:
            confidence_threshold (float): Override the default confidence
                threshold for this prediction.

    Returns:
        A `DetectionPrediction` object containing a list of
        `Detection` instances. Returns an empty list if no mosquitos are found.

    Raises:
        RuntimeError: If the model fails to load or if prediction fails.
    """
    if not self.backend.is_loaded:
        self.load_model()

    confidence_threshold = kwargs.get(
        "confidence_threshold",
        self.confidence_threshold,
    )

    try:
        input_image = self._load_and_validate_image(input_data)
        # The backend now returns a standardized NumPy array (N, 5) -> [x1, y1, x2, y2, conf]
        results_array = self.backend.predict(
            input_data=input_image,
            conf=confidence_threshold,
            iou=self.iou_threshold,
            max_det=self.max_detections,
            verbose=False,
        )
    except Exception as e:
        logger.error(f"Prediction failed: {e}", exc_info=True)
        raise RuntimeError(f"Prediction failed: {e}") from e

    return self._convert_raw_to_prediction(results_array)
visualize(input_data: ImageInput, predictions: DetectionPrediction, save_path: str | Path | None = None) -> np.ndarray

Draws predicted bounding boxes on an image.

Example

from culicidaelab.settings import Settings from culicidaelab.predictors import MosquitoDetector

This example assumes you have a configured settings object

settings = Settings() detector = MosquitoDetector(settings, load_model=True) image = "path/to/your/image.jpg" detections = detector.predict(image) viz_image = detector.visualize(image, detections, save_path="viz.jpg")

Parameters:

Name Type Description Default
input_data ImageInput

The original image.

required
predictions DetectionPrediction

The DetectionPrediction from predict.

required
save_path str | Path | None

If provided, the output image is saved to this path.

None

Returns:

Type Description
ndarray

A new image array with bounding boxes and confidence scores drawn on it.

Source code in culicidaelab\predictors\detector.py
def visualize(
    self,
    input_data: ImageInput,
    predictions: DetectionPrediction,
    save_path: str | Path | None = None,
) -> np.ndarray:
    """Draws predicted bounding boxes on an image.

    Example:
        >>> from culicidaelab.settings import Settings
        >>> from culicidaelab.predictors import MosquitoDetector
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> detector = MosquitoDetector(settings, load_model=True)
        >>> image = "path/to/your/image.jpg"
        >>> detections = detector.predict(image)
        >>> viz_image = detector.visualize(image, detections, save_path="viz.jpg")

    Args:
        input_data: The original image.
        predictions: The `DetectionPrediction` from `predict`.
        save_path: If provided, the output image is saved to this path.

    Returns:
        A new image array with bounding boxes and confidence scores drawn on it.
    """
    vis_img = self._load_and_validate_image(input_data).copy()
    draw = ImageDraw.Draw(vis_img)
    vis_config = self.config.visualization
    font_scale = vis_config.font_scale
    thickness = vis_config.box_thickness

    for detection in predictions.detections:
        box = detection.box
        conf = detection.confidence
        draw.rectangle(
            [(int(box.x1), int(box.y1)), (int(box.x2), int(box.y2))],
            outline=vis_config.box_color,
            width=thickness,
        )
        text = f"{conf:.2f}"
        try:
            font = ImageFont.truetype("arial.ttf", int(font_scale * 20))
        except OSError:
            font = ImageFont.load_default()
        draw.text((int(box.x1), int(box.y1 - 10)), text, fill=vis_config.text_color, font=font)

    if save_path:
        save_path = Path(save_path)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        vis_img.save(str(save_path))

    return np.array(vis_img)
MosquitoSegmenter

Segments mosquitos in images using a SAM model.

This class provides methods to load a SAM model, generate segmentation masks for entire images or specific regions defined by bounding boxes, and visualize the resulting masks.

Example

from culicidaelab.core.settings import Settings from culicidaelab.predictors import MosquitoSegmenter import numpy as np

This example assumes you have a configured settings object

settings = Settings() segmenter = MosquitoSegmenter(settings, load_model=True) image = np.random.randint(0, 256, (1024, 1024, 3), dtype=np.uint8)

Predict without prompts (might not be effective for all backends)

prediction = segmenter.predict(image) print(f"Generated mask with {prediction.pixel_count} pixels.")

Source code in culicidaelab\predictors\segmenter.py
class MosquitoSegmenter(
    BasePredictor[ImageInput, SegmentationPrediction, SegmentationGroundTruthType],
):
    """Segments mosquitos in images using a SAM model.

    This class provides methods to load a SAM model, generate segmentation
    masks for entire images or specific regions defined by bounding boxes,
    and visualize the resulting masks.

    Example:
        >>> from culicidaelab.core.settings import Settings
        >>> from culicidaelab.predictors import MosquitoSegmenter
        >>> import numpy as np
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> segmenter = MosquitoSegmenter(settings, load_model=True)
        >>> image = np.random.randint(0, 256, (1024, 1024, 3), dtype=np.uint8)
        >>> # Predict without prompts (might not be effective for all backends)
        >>> prediction = segmenter.predict(image)
        >>> print(f"Generated mask with {prediction.pixel_count} pixels.")

    """

    def __init__(
        self,
        settings: Settings,
        predictor_type="segmenter",
        mode: Literal["torch", "serve"] | None = None,
        load_model: bool = False,
        backend: BaseInferenceBackend | None = None,
    ) -> None:
        """Initializes the MosquitoSegmenter.

        Args:
            settings: The main settings object for the library.
            predictor_type: The type of predictor. Defaults to "segmenter".
            mode: The mode to run the predictor in, 'torch' or 'serve'.
                If None, it's determined by the environment.
            load_model: If True, load the model upon initialization.
            backend: An optional backend instance. If not provided, one will be
                created based on the mode and settings.
        """

        backend_instance = backend or create_backend(
            predictor_type=predictor_type,
            settings=settings,
            mode=mode,
        )

        super().__init__(
            settings=settings,
            predictor_type=predictor_type,
            backend=backend_instance,
            load_model=load_model,
        )

    def _convert_raw_to_prediction(self, raw_prediction: np.ndarray) -> SegmentationPrediction:
        """Converts a raw numpy mask to a structured segmentation prediction.

        Args:
            raw_prediction: A 2D numpy array representing the segmentation mask.

        Returns:
            A SegmentationPrediction object containing the mask and pixel count.
        """
        return SegmentationPrediction(mask=raw_prediction, pixel_count=int(np.sum(raw_prediction)))

    def visualize(
        self,
        input_data: ImageInput,
        predictions: SegmentationPrediction,
        save_path: str | Path | None = None,
    ) -> np.ndarray:
        """Overlays a segmentation mask on the original image.

        Example:
            >>> from culicidaelab.settings import Settings
            >>> from culicidaelab.predictors import MosquitoSegmenter
            >>> # This example assumes you have a configured settings object
            >>> settings = Settings()
            >>> segmenter = MosquitoSegmenter(settings, load_model=True)
            >>> image = "path/to/your/image.jpg"
            >>> # Assuming you have a prediction from segmenter.predict()
            >>> prediction = segmenter.predict(image)
            >>> viz_image = segmenter.visualize(image, prediction, save_path="viz.jpg")

        Args:
            input_data: The original image.
            predictions: The `SegmentationPrediction` from `predict`.
            save_path: If provided, the output image is saved to this path.

        Returns:
            A numpy array of the image with the segmentation mask overlaid.
        """

        image_pil = self._load_and_validate_image(input_data)

        colored_mask = Image.new("RGB", image_pil.size, self.config.visualization.overlay_color)

        # Create an alpha mask where the segmentation is transparent
        alpha_mask = Image.fromarray((predictions.mask * 255).astype(np.uint8))

        # Composite the images
        overlay = Image.composite(colored_mask, image_pil, alpha_mask)

        if save_path:
            save_path = Path(save_path)
            save_path.parent.mkdir(parents=True, exist_ok=True)
            overlay.save(str(save_path))

        return np.array(overlay)

    def _evaluate_from_prediction(
        self,
        prediction: SegmentationPrediction,
        ground_truth: SegmentationGroundTruthType,
    ) -> dict[str, float]:
        """Calculates segmentation metrics for a single predicted mask.

        Computes Intersection over Union (IoU), precision, recall, and F1-score.

        Args:
            prediction: The `SegmentationPrediction` object.
            ground_truth: A 2D numpy array of the ground truth mask.

        Returns:
            A dictionary containing the calculated metrics.

        Raises:
            ValueError: If prediction and ground truth masks have different shapes.
        """
        pred_mask = prediction.mask.astype(bool)
        ground_truth = ground_truth.astype(bool)

        if pred_mask.shape != ground_truth.shape:
            raise ValueError("Prediction and ground truth must have the same shape.")

        intersection = np.logical_and(pred_mask, ground_truth).sum()
        union = np.logical_or(pred_mask, ground_truth).sum()
        prediction_sum = pred_mask.sum()
        ground_truth_sum = ground_truth.sum()

        iou = intersection / union if union > 0 else 0.0
        precision = intersection / prediction_sum if prediction_sum > 0 else 0.0
        recall = intersection / ground_truth_sum if ground_truth_sum > 0 else 0.0
        f1 = (2 * (precision * recall) / (precision + recall)) if (precision + recall) > 0 else 0.0

        return {"iou": float(iou), "precision": float(precision), "recall": float(recall), "f1": float(f1)}
settings = settings instance-attribute
predictor_type = predictor_type instance-attribute
backend = backend instance-attribute
config: PredictorConfig property

Get the predictor configuration Pydantic model.

Returns:

Name Type Description
PredictorConfig PredictorConfig

The configuration object for this predictor.

model_loaded: bool property

Check if the model is loaded.

Returns:

Name Type Description
bool bool

True if the model is loaded, False otherwise.

__call__(input_data: InputDataType, **kwargs: Any) -> Any

Convenience method that calls predict().

This allows the predictor instance to be called as a function.

Parameters:

Name Type Description Default
input_data InputDataType

The input data for the prediction.

required
**kwargs Any

Additional arguments to pass to the predict method.

{}

Returns:

Name Type Description
Any Any

The result of the prediction.

Source code in culicidaelab\core\base_predictor.py
def __call__(self, input_data: InputDataType, **kwargs: Any) -> Any:
    """Convenience method that calls `predict()`.

    This allows the predictor instance to be called as a function.

    Args:
        input_data (InputDataType): The input data for the prediction.
        **kwargs (Any): Additional arguments to pass to the `predict` method.

    Returns:
        Any: The result of the prediction.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self.predict(input_data, **kwargs)
__enter__()

Context manager entry.

Loads the model if it is not already loaded.

Returns:

Name Type Description
BasePredictor

The predictor instance.

Source code in culicidaelab\core\base_predictor.py
def __enter__(self):
    """Context manager entry.

    Loads the model if it is not already loaded.

    Returns:
        BasePredictor: The predictor instance.
    """
    if not self.backend.is_loaded:
        self.load_model()
    return self
__exit__(exc_type, exc_val, exc_tb)

Context manager exit.

This default implementation does nothing, but can be overridden to handle resource cleanup.

Source code in culicidaelab\core\base_predictor.py
def __exit__(self, exc_type, exc_val, exc_tb):
    """Context manager exit.

    This default implementation does nothing, but can be overridden to handle
    resource cleanup.
    """
    pass
model_context()

A context manager for temporary model loading.

Ensures the model is loaded upon entering the context and unloaded upon exiting if it was not loaded before. This is useful for managing memory in pipelines.

Yields:

Name Type Description
BasePredictor

The predictor instance itself.

Example

with predictor.model_context(): ... predictions = predictor.predict(data)

Source code in culicidaelab\core\base_predictor.py
@contextmanager
def model_context(self):
    """A context manager for temporary model loading.

    Ensures the model is loaded upon entering the context and unloaded
    upon exiting if it was not loaded before. This is useful for managing
    memory in pipelines.

    Yields:
        BasePredictor: The predictor instance itself.

    Example:
        >>> with predictor.model_context():
        ...     predictions = predictor.predict(data)
    """
    was_loaded = self.backend.is_loaded
    try:
        if not was_loaded:
            self.load_model()
        yield self
    finally:
        if not was_loaded and self.backend.is_loaded:
            self.unload_model()
evaluate(ground_truth: GroundTruthType, prediction: PredictionType | None = None, input_data: InputDataType | None = None, **predict_kwargs: Any) -> dict[str, float]

Evaluate a prediction against a ground truth.

Either prediction or input_data must be provided. If prediction is provided, it is used directly. If prediction is None, input_data is used to generate a new prediction.

Parameters:

Name Type Description Default
ground_truth GroundTruthType

The ground truth annotation.

required
prediction PredictionType

A pre-computed prediction.

None
input_data InputDataType

Input data to generate a prediction from, if one isn't provided.

None
**predict_kwargs Any

Additional arguments passed to the predict method.

{}

Returns:

Type Description
dict[str, float]

dict[str, float]: Dictionary containing evaluation metrics for a

dict[str, float]

single item.

Raises:

Type Description
ValueError

If neither prediction nor input_data is provided.

Source code in culicidaelab\core\base_predictor.py
def evaluate(
    self,
    ground_truth: GroundTruthType,
    prediction: PredictionType | None = None,
    input_data: InputDataType | None = None,
    **predict_kwargs: Any,
) -> dict[str, float]:
    """Evaluate a prediction against a ground truth.

    Either `prediction` or `input_data` must be provided. If `prediction`
    is provided, it is used directly. If `prediction` is None, `input_data`
    is used to generate a new prediction.

    Args:
        ground_truth (GroundTruthType): The ground truth annotation.
        prediction (PredictionType, optional): A pre-computed prediction.
        input_data (InputDataType, optional): Input data to generate a
            prediction from, if one isn't provided.
        **predict_kwargs (Any): Additional arguments passed to the `predict`
            method.

    Returns:
        dict[str, float]: Dictionary containing evaluation metrics for a
        single item.

    Raises:
        ValueError: If neither `prediction` nor `input_data` is provided.
    """
    if prediction is None:
        if input_data is not None:
            prediction = self.predict(input_data, **predict_kwargs)
        else:
            raise ValueError(
                "Either 'prediction' or 'input_data' must be provided.",
            )
    return self._evaluate_from_prediction(
        prediction=prediction,
        ground_truth=ground_truth,
    )
evaluate_batch(ground_truth_batch: Sequence[GroundTruthType], predictions_batch: Sequence[PredictionType] | None = None, input_data_batch: Sequence[InputDataType] | None = None, num_workers: int = 1, show_progress: bool = False, **predict_kwargs: Any) -> dict[str, Any]

Evaluate on a batch of items using parallel processing.

Either predictions_batch or input_data_batch must be provided.

Parameters:

Name Type Description Default
ground_truth_batch Sequence[GroundTruthType]

List of corresponding ground truth annotations.

required
predictions_batch Sequence[PredictionType]

A pre-computed list of predictions.

None
input_data_batch Sequence[InputDataType]

List of input data to generate predictions from.

None
num_workers int

Number of parallel workers for calculating metrics.

1
show_progress bool

Whether to show a progress bar.

False
**predict_kwargs Any

Additional arguments passed to predict_batch.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary containing aggregated evaluation metrics.

Raises:

Type Description
ValueError

If the number of predictions does not match the number of ground truths, or if required inputs are missing.

Source code in culicidaelab\core\base_predictor.py
def evaluate_batch(
    self,
    ground_truth_batch: Sequence[GroundTruthType],
    predictions_batch: Sequence[PredictionType] | None = None,
    input_data_batch: Sequence[InputDataType] | None = None,
    num_workers: int = 1,
    show_progress: bool = False,
    **predict_kwargs: Any,
) -> dict[str, Any]:
    """Evaluate on a batch of items using parallel processing.

    Either `predictions_batch` or `input_data_batch` must be provided.

    Args:
        ground_truth_batch (Sequence[GroundTruthType]): List of corresponding
            ground truth annotations.
        predictions_batch (Sequence[PredictionType], optional): A pre-computed
            list of predictions.
        input_data_batch (Sequence[InputDataType], optional): List of input data
            to generate predictions from.
        num_workers (int): Number of parallel workers for calculating metrics.
        show_progress (bool): Whether to show a progress bar.
        **predict_kwargs (Any): Additional arguments passed to `predict_batch`.

    Returns:
        dict[str, Any]: Dictionary containing aggregated evaluation metrics.

    Raises:
        ValueError: If the number of predictions does not match the number
            of ground truths, or if required inputs are missing.
    """
    if predictions_batch is None:
        if input_data_batch is not None:
            predictions_batch = self.predict_batch(
                input_data_batch,
                show_progress=show_progress,
                **predict_kwargs,
            )
        else:
            raise ValueError(
                "Either 'predictions_batch' or 'input_data_batch' must be provided.",
            )

    if len(predictions_batch) != len(ground_truth_batch):
        raise ValueError(
            f"Number of predictions ({len(predictions_batch)}) must match "
            f"number of ground truths ({len(ground_truth_batch)}).",
        )

    per_item_metrics = self._calculate_metrics_parallel(
        predictions_batch,
        ground_truth_batch,
        num_workers,
        show_progress,
    )
    aggregated_metrics = self._aggregate_metrics(per_item_metrics)
    final_report = self._finalize_evaluation_report(
        aggregated_metrics,
        predictions_batch,
        ground_truth_batch,
    )
    return final_report
get_model_info() -> dict[str, Any]

Gets information about the loaded model.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing details about the model, such

dict[str, Any]

as architecture, path, etc.

Source code in culicidaelab\core\base_predictor.py
def get_model_info(self) -> dict[str, Any]:
    """Gets information about the loaded model.

    Returns:
        dict[str, Any]: A dictionary containing details about the model, such
        as architecture, path, etc.
    """
    return {
        "predictor_type": self.predictor_type,
        "model_loaded": self.backend.is_loaded,
        "config": self.config.model_dump(),
    }
load_model() -> None

Delegates model loading to the configured backend.

Source code in culicidaelab\core\base_predictor.py
def load_model(self) -> None:
    """Delegates model loading to the configured backend."""
    if not self.backend.is_loaded:
        self._logger.info(
            f"Loading model for {self.predictor_type} using {self.backend.__class__.__name__}",
        )
        try:
            self.backend.load_model()
            self._logger.info(f"Successfully loaded model for {self.predictor_type}")
        except Exception as e:
            self._logger.error(f"Failed to load model for {self.predictor_type}: {e}")
            raise RuntimeError(f"Failed to load model for {self.predictor_type}: {e}") from e
predict(input_data: InputDataType, **kwargs: Any) -> PredictionType

Makes a prediction on a single input data sample.

Parameters:

Name Type Description Default
input_data InputDataType

The input data (e.g., an image as a NumPy array) to make a prediction on.

required
**kwargs Any

Additional predictor-specific arguments.

{}

Returns:

Name Type Description
PredictionType PredictionType

The prediction result, with a format specific to the

PredictionType

predictor type.

Raises:

Type Description
RuntimeError

If the model is not loaded before calling this method.

Source code in culicidaelab\core\base_predictor.py
def predict(
    self,
    input_data: InputDataType,
    **kwargs: Any,
) -> PredictionType:
    """Makes a prediction on a single input data sample.

    Args:
        input_data (InputDataType): The input data (e.g., an image as a NumPy
            array) to make a prediction on.
        **kwargs (Any): Additional predictor-specific arguments.

    Returns:
        PredictionType: The prediction result, with a format specific to the
        predictor type.

    Raises:
        RuntimeError: If the model is not loaded before calling this method.
    """
    if not self.backend.is_loaded:
        try:
            self.load_model()
        except Exception as e:
            raise RuntimeError(f"Failed to load model: {e}") from e

    image = self._load_and_validate_image(input_data)

    raw_output = self.backend.predict(image, **kwargs)

    return self._convert_raw_to_prediction(raw_output)
predict_batch(input_data_batch: Sequence[InputDataType], show_progress: bool = False, **kwargs: Any) -> list[PredictionType]

Makes predictions on a batch of inputs by delegating to the backend.

Parameters:

Name Type Description Default
input_data_batch Sequence[InputDataType]

A sequence of inputs.

required
show_progress bool

If True, displays a progress bar.

False
**kwargs Any

Additional arguments for the backend's predict_batch.

{}

Returns:

Type Description
list[PredictionType]

list[PredictionType]: A list of prediction results.

Source code in culicidaelab\core\base_predictor.py
def predict_batch(
    self,
    input_data_batch: Sequence[InputDataType],
    show_progress: bool = False,
    **kwargs: Any,
) -> list[PredictionType]:
    """Makes predictions on a batch of inputs by delegating to the backend.

    Args:
        input_data_batch (Sequence[InputDataType]): A sequence of inputs.
        show_progress (bool): If True, displays a progress bar.
        **kwargs (Any): Additional arguments for the backend's `predict_batch`.

    Returns:
        list[PredictionType]: A list of prediction results.
    """
    if not input_data_batch:
        return []

    if not self.backend.is_loaded:
        self.load_model()

    raw_predictions = self.backend.predict_batch(list(input_data_batch), **kwargs)
    final_predictions = [self._convert_raw_to_prediction(raw_pred) for raw_pred in raw_predictions]
    return final_predictions
unload_model() -> None

Unloads the model to free memory.

Source code in culicidaelab\core\base_predictor.py
def unload_model(self) -> None:
    """Unloads the model to free memory."""
    if self.backend.is_loaded:
        self.backend.unload_model()
        self._logger.info(f"Unloaded model for {self.predictor_type}")
__init__(settings: Settings, predictor_type='segmenter', mode: Literal['torch', 'serve'] | None = None, load_model: bool = False, backend: BaseInferenceBackend | None = None) -> None

Initializes the MosquitoSegmenter.

Parameters:

Name Type Description Default
settings Settings

The main settings object for the library.

required
predictor_type

The type of predictor. Defaults to "segmenter".

'segmenter'
mode Literal['torch', 'serve'] | None

The mode to run the predictor in, 'torch' or 'serve'. If None, it's determined by the environment.

None
load_model bool

If True, load the model upon initialization.

False
backend BaseInferenceBackend | None

An optional backend instance. If not provided, one will be created based on the mode and settings.

None
Source code in culicidaelab\predictors\segmenter.py
def __init__(
    self,
    settings: Settings,
    predictor_type="segmenter",
    mode: Literal["torch", "serve"] | None = None,
    load_model: bool = False,
    backend: BaseInferenceBackend | None = None,
) -> None:
    """Initializes the MosquitoSegmenter.

    Args:
        settings: The main settings object for the library.
        predictor_type: The type of predictor. Defaults to "segmenter".
        mode: The mode to run the predictor in, 'torch' or 'serve'.
            If None, it's determined by the environment.
        load_model: If True, load the model upon initialization.
        backend: An optional backend instance. If not provided, one will be
            created based on the mode and settings.
    """

    backend_instance = backend or create_backend(
        predictor_type=predictor_type,
        settings=settings,
        mode=mode,
    )

    super().__init__(
        settings=settings,
        predictor_type=predictor_type,
        backend=backend_instance,
        load_model=load_model,
    )
visualize(input_data: ImageInput, predictions: SegmentationPrediction, save_path: str | Path | None = None) -> np.ndarray

Overlays a segmentation mask on the original image.

Example

from culicidaelab.settings import Settings from culicidaelab.predictors import MosquitoSegmenter

This example assumes you have a configured settings object

settings = Settings() segmenter = MosquitoSegmenter(settings, load_model=True) image = "path/to/your/image.jpg"

Assuming you have a prediction from segmenter.predict()

prediction = segmenter.predict(image) viz_image = segmenter.visualize(image, prediction, save_path="viz.jpg")

Parameters:

Name Type Description Default
input_data ImageInput

The original image.

required
predictions SegmentationPrediction

The SegmentationPrediction from predict.

required
save_path str | Path | None

If provided, the output image is saved to this path.

None

Returns:

Type Description
ndarray

A numpy array of the image with the segmentation mask overlaid.

Source code in culicidaelab\predictors\segmenter.py
def visualize(
    self,
    input_data: ImageInput,
    predictions: SegmentationPrediction,
    save_path: str | Path | None = None,
) -> np.ndarray:
    """Overlays a segmentation mask on the original image.

    Example:
        >>> from culicidaelab.settings import Settings
        >>> from culicidaelab.predictors import MosquitoSegmenter
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> segmenter = MosquitoSegmenter(settings, load_model=True)
        >>> image = "path/to/your/image.jpg"
        >>> # Assuming you have a prediction from segmenter.predict()
        >>> prediction = segmenter.predict(image)
        >>> viz_image = segmenter.visualize(image, prediction, save_path="viz.jpg")

    Args:
        input_data: The original image.
        predictions: The `SegmentationPrediction` from `predict`.
        save_path: If provided, the output image is saved to this path.

    Returns:
        A numpy array of the image with the segmentation mask overlaid.
    """

    image_pil = self._load_and_validate_image(input_data)

    colored_mask = Image.new("RGB", image_pil.size, self.config.visualization.overlay_color)

    # Create an alpha mask where the segmentation is transparent
    alpha_mask = Image.fromarray((predictions.mask * 255).astype(np.uint8))

    # Composite the images
    overlay = Image.composite(colored_mask, image_pil, alpha_mask)

    if save_path:
        save_path = Path(save_path)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        overlay.save(str(save_path))

    return np.array(overlay)
ModelWeightsManager

Manages the download and local availability of model weights.

This class implements the WeightsManagerProtocol and serves as the bridge between a predictor and the provider service that can download model files.

Attributes:

Name Type Description
settings Settings

The application's global settings object.

provider_service ProviderService

The service used to access and download model weights from various providers.

Source code in culicidaelab\predictors\model_weights_manager.py
class ModelWeightsManager(WeightsManagerProtocol):
    """Manages the download and local availability of model weights.

    This class implements the WeightsManagerProtocol and serves as the bridge
    between a predictor and the provider service that can download model files.

    Attributes:
        settings (Settings): The application's global settings object.
        provider_service (ProviderService): The service used to access and
            download model weights from various providers.
    """

    def __init__(self, settings: Settings):
        """Initializes the ModelWeightsManager.

        Args:
            settings: The application's global settings object.
        """
        self.settings = settings
        self.provider_service = ProviderService(settings)

    def ensure_weights(self, predictor_type: str, backend_type: str) -> Path:
        """Ensures weights for a given predictor and backend are available.

        This method checks if the model weights for the specified predictor and
        backend type exist locally. If they don't, it downloads them using the
        provider service.

        Example:
            >>> from culicidaelab.settings import Settings
            >>> from culicidaelab.predictors import ModelWeightsManager
            >>> # This example assumes you have a configured settings object
            >>> settings = Settings()
            >>> manager = ModelWeightsManager(settings)
            >>> weights_path = manager.ensure_weights("classifier", "torch")
            >>> print(weights_path.exists())
            True

        Args:
            predictor_type: The type of predictor (e.g., 'classifier').
            backend_type: The type of backend (e.g., 'torch', 'onnx').

        Returns:
            The absolute path to the local model weights file.

        Raises:
            RuntimeError: If the weights cannot be resolved or downloaded.
            ValueError: If the configuration for the weights is missing
                'repository_id' or 'filename'.
        """

        try:
            local_path = self.settings.construct_weights_path(
                predictor_type=predictor_type,
                backend=backend_type,
            )

            if local_path.exists():
                return local_path

            predictor_config = self.settings.get_config(f"predictors.{predictor_type}")
            # Construct the config key to get the specific weights info
            weights_config_key = f"predictors.{predictor_type}.weights.{backend_type}"
            weights_config = self.settings.get_config(weights_config_key)

            # The repository can be overridden at the weights level
            repo_id = predictor_config.repository_id
            filename = weights_config.filename

            if not all([repo_id, filename]):
                raise ValueError(f"Missing 'repository_id' or 'filename' for {weights_config_key}")

            provider_name = predictor_config.provider_name or "huggingface"  # Default provider
            provider = self.provider_service.get_provider(provider_name)

            # Assuming provider has a method to download a specific file
            return provider.download_model_weights(
                repo_id=repo_id,
                filename=filename,
                local_dir=local_path.parent,
            )

        except Exception as e:
            error_msg = f"Failed to resolve weights for '{predictor_type}' with backend '{backend_type}': {e}"
            raise RuntimeError(error_msg) from e
settings = settings instance-attribute
provider_service = ProviderService(settings) instance-attribute
__init__(settings: Settings)

Initializes the ModelWeightsManager.

Parameters:

Name Type Description Default
settings Settings

The application's global settings object.

required
Source code in culicidaelab\predictors\model_weights_manager.py
def __init__(self, settings: Settings):
    """Initializes the ModelWeightsManager.

    Args:
        settings: The application's global settings object.
    """
    self.settings = settings
    self.provider_service = ProviderService(settings)
ensure_weights(predictor_type: str, backend_type: str) -> Path

Ensures weights for a given predictor and backend are available.

This method checks if the model weights for the specified predictor and backend type exist locally. If they don't, it downloads them using the provider service.

Example

from culicidaelab.settings import Settings from culicidaelab.predictors import ModelWeightsManager

This example assumes you have a configured settings object

settings = Settings() manager = ModelWeightsManager(settings) weights_path = manager.ensure_weights("classifier", "torch") print(weights_path.exists()) True

Parameters:

Name Type Description Default
predictor_type str

The type of predictor (e.g., 'classifier').

required
backend_type str

The type of backend (e.g., 'torch', 'onnx').

required

Returns:

Type Description
Path

The absolute path to the local model weights file.

Raises:

Type Description
RuntimeError

If the weights cannot be resolved or downloaded.

ValueError

If the configuration for the weights is missing 'repository_id' or 'filename'.

Source code in culicidaelab\predictors\model_weights_manager.py
def ensure_weights(self, predictor_type: str, backend_type: str) -> Path:
    """Ensures weights for a given predictor and backend are available.

    This method checks if the model weights for the specified predictor and
    backend type exist locally. If they don't, it downloads them using the
    provider service.

    Example:
        >>> from culicidaelab.settings import Settings
        >>> from culicidaelab.predictors import ModelWeightsManager
        >>> # This example assumes you have a configured settings object
        >>> settings = Settings()
        >>> manager = ModelWeightsManager(settings)
        >>> weights_path = manager.ensure_weights("classifier", "torch")
        >>> print(weights_path.exists())
        True

    Args:
        predictor_type: The type of predictor (e.g., 'classifier').
        backend_type: The type of backend (e.g., 'torch', 'onnx').

    Returns:
        The absolute path to the local model weights file.

    Raises:
        RuntimeError: If the weights cannot be resolved or downloaded.
        ValueError: If the configuration for the weights is missing
            'repository_id' or 'filename'.
    """

    try:
        local_path = self.settings.construct_weights_path(
            predictor_type=predictor_type,
            backend=backend_type,
        )

        if local_path.exists():
            return local_path

        predictor_config = self.settings.get_config(f"predictors.{predictor_type}")
        # Construct the config key to get the specific weights info
        weights_config_key = f"predictors.{predictor_type}.weights.{backend_type}"
        weights_config = self.settings.get_config(weights_config_key)

        # The repository can be overridden at the weights level
        repo_id = predictor_config.repository_id
        filename = weights_config.filename

        if not all([repo_id, filename]):
            raise ValueError(f"Missing 'repository_id' or 'filename' for {weights_config_key}")

        provider_name = predictor_config.provider_name or "huggingface"  # Default provider
        provider = self.provider_service.get_provider(provider_name)

        # Assuming provider has a method to download a specific file
        return provider.download_model_weights(
            repo_id=repo_id,
            filename=filename,
            local_dir=local_path.parent,
        )

    except Exception as e:
        error_msg = f"Failed to resolve weights for '{predictor_type}' with backend '{backend_type}': {e}"
        raise RuntimeError(error_msg) from e
selection:

members: true