Home

`ping()`

GET: /ping

Endpoint to check if the server is running.

Returns:

Name	Type	Description
`Response`		Response with status 200 if the server is running.

Source code in app.py

@app.get("/ping")
def ping():
    """
    # GET: /ping

    Endpoint to check if the server is running.

    Returns:
        Response: Response with status 200 if the server is running.
    """
    try:
        client = grpcclient.InferenceServerClient(
            url=config.grcp_model_server_address, verbose=False
        )
        client.close()
        return Response(status_code=200)
    except Exception:
        return Response(status_code=400)

`predict_image(image=File(...), examination_id=Header(None))`

POST: /invocations

Endpoint to process an image and send it to the inference server.

Parameters:

Name	Type	Description	Default
`image`	`UploadFile`	Image file to process (in the request body).	`File(...)`

Headers

Content-Type: Type of the image. Can be "image/jpeg", "image/png", "image/tiff", "image/bmp", "image/jpg". Examination-Id: ID of the examination. Used to track the request.

Returns:

Type	Description
	JSON

Raises: Response: Error response if the content type is not supported.

Source code in app.py

@app.post("/invocations")
def predict_image(image: UploadFile = File(...), examination_id: str = Header(None)):
    """
    # POST: /invocations

    Endpoint to process an image and send it to the inference server.

    Args:
        image (UploadFile): Image file to process (in the request body).

    Headers:
        *Content-Type*: Type of the image. Can be "image/jpeg", "image/png", "image/tiff", "image/bmp", "image/jpg".
        *Examination-Id*: ID of the examination. Used to track the request.

    Returns:
        JSON
    Raises:
        Response: Error response if the content type is not supported.
    """
    client = None
    request_uuid = str(uuid.uuid4())

    try:
        if image.content_type not in config.available_content_types:
            log_event(
                logger,
                LogEventDataclass(
                    event_message="Rejected unsupported content type.",
                    level=LoggingLevel.WARNING,
                    model_name=config.model_name,
                    status=Status.FAILED,
                    pipeline_step="invocations_validate_request",
                    examination_id=examination_id,
                    local_filename=image.filename,
                    request_uuid=request_uuid,
                ),
            )
            return Response(
                status_code=415,
                content="Cannot decode image data. Is content_type correct?",
                media_type="text/plain",
            )

        client = grpcclient.InferenceServerClient(
            url=config.grcp_model_server_address,
            verbose=False,
            channel_args=(("grpc.lb_policy_name", "round_robin"),),
        )
        model_config = client.get_model_config(
            model_name=config.model_name,
            model_version=config.model_version,
            as_json=True,
        )["config"]

        contents = image.file.read()
        image_bytes = np.frombuffer(contents, dtype=np.uint8)
        img = cv2.imdecode(image_bytes, cv2.IMREAD_COLOR)

        if img is None:
            raise ValueError("Cannot decode image data.")

        log_event(
            logger,
            LogEventDataclass(
                event_message="Received image for inference.",
                level=LoggingLevel.INFO,
                model_name=config.model_name,
                status=Status.SUCCESS,
                pipeline_step="invocations_read_image",
                examination_id=examination_id,
                local_filename=image.filename,
                request_uuid=request_uuid,
            ),
        )

        run_params = _build_run_params()
        inputs = [
            grpcclient.InferInput(
                "IMAGE",
                [1, img.shape[0], img.shape[1], img.shape[2]],
                np_to_triton_dtype(img.dtype),
            ),
            grpcclient.InferInput("INPUT_JSON_PARAMS", (1, 1), "BYTES"),
        ]
        inputs[0].set_data_from_numpy(img[np.newaxis, ...])
        inputs[1].set_data_from_numpy(np.array([[run_params]], dtype=np.object_))

        outputs = [
            grpcclient.InferRequestedOutput(model_config["output"][i]["name"])
            for i in range(len(model_config["output"]))
        ]

        start = time.time()
        result = client.infer(
            model_name=config.model_name,
            model_version=config.model_version,
            inputs=inputs,
            outputs=outputs,
        )
        output_data = _parse_inference_output(model_config, result)
        elapsed = time.time() - start

        log_event(
            logger,
            LogEventDataclass(
                event_message="Completed inference request.",
                level=LoggingLevel.INFO,
                model_name=config.model_name,
                status=Status.SUCCESS,
                pipeline_step="invocations_infer_image",
                examination_id=examination_id,
                local_filename=image.filename,
                request_uuid=request_uuid,
                elapsed_time_s=elapsed,
            ),
        )

        return JSONResponse(
            content={
                "filename": image.filename,
                "status": "COMPLETED",
                "request_uuid": request_uuid,
                "examination_id": examination_id,
                "output": output_data,
            },
            status_code=200,
        )
    except Exception as e:
        log_event(
            logger,
            LogEventDataclass(
                event_message="Inference request failed.",
                level=LoggingLevel.ERROR,
                model_name=config.model_name,
                status=Status.ERROR,
                pipeline_step="invocations_infer_image",
                examination_id=examination_id,
                local_filename=image.filename,
                request_uuid=request_uuid,
                error=type(e).__name__,
                error_message=str(e),
            ),
        )
        return JSONResponse(
            content={
                "message": str(e),
                "status": "error",
                "request_uuid": request_uuid,
                "examination_id": examination_id,
            },
            status_code=400,
        )
    finally:
        if client is not None:
            client.close()