Initial commit
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Solutions Python API."""
|
||||
|
||||
import mediapipe.python.solutions.drawing_styles
|
||||
import mediapipe.python.solutions.drawing_utils
|
||||
import mediapipe.python.solutions.face_detection
|
||||
import mediapipe.python.solutions.face_mesh
|
||||
import mediapipe.python.solutions.hands
|
||||
import mediapipe.python.solutions.holistic
|
||||
import mediapipe.python.solutions.objectron
|
||||
import mediapipe.python.solutions.pose
|
||||
import mediapipe.python.solutions.selfie_segmentation
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,37 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Downloading utils."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
|
||||
_OSS_URL_PREFIX = 'https://github.com/google/mediapipe/raw/master/'
|
||||
|
||||
|
||||
def download_oss_model(model_path: str):
|
||||
"""Downloads the oss model from the MediaPipe GitHub repo if it doesn't exist in the package."""
|
||||
|
||||
mp_root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-4])
|
||||
model_abspath = os.path.join(mp_root_path, model_path)
|
||||
if os.path.exists(model_abspath):
|
||||
return
|
||||
model_url = _OSS_URL_PREFIX + model_path
|
||||
print('Downloading model to ' + model_abspath)
|
||||
with urllib.request.urlopen(model_url) as response, open(model_abspath,
|
||||
'wb') as out_file:
|
||||
if response.code != 200:
|
||||
raise ConnectionError('Cannot download ' + model_path +
|
||||
' from the MediaPipe Github repo.')
|
||||
shutil.copyfileobj(response, out_file)
|
@@ -0,0 +1,146 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless requi_RED by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe solution drawing styles."""
|
||||
|
||||
from typing import Mapping, Tuple
|
||||
|
||||
from mediapipe.python.solutions.drawing_utils import DrawingSpec
|
||||
from mediapipe.python.solutions.hands import HandLandmark
|
||||
|
||||
_RADIUS = 5
|
||||
_RED = (54, 67, 244)
|
||||
_GREEN = (118, 230, 0)
|
||||
_BLUE = (192, 101, 21)
|
||||
_YELLOW = (0, 204, 255)
|
||||
_GRAY = (174, 164, 144)
|
||||
_PURPLE = (128, 64, 128)
|
||||
_PEACH = (180, 229, 255)
|
||||
|
||||
# Hands
|
||||
_THICKNESS_WRIST_MCP = 3
|
||||
_THICKNESS_FINGER = 2
|
||||
_THICKNESS_DOT = -1
|
||||
|
||||
# Hand landmarks
|
||||
_PALM_LANMARKS = (HandLandmark.WRIST, HandLandmark.THUMB_CMC,
|
||||
HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP)
|
||||
_THUMP_LANDMARKS = (HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP,
|
||||
HandLandmark.THUMB_TIP)
|
||||
_INDEX_FINGER_LANDMARKS = (HandLandmark.INDEX_FINGER_PIP,
|
||||
HandLandmark.INDEX_FINGER_DIP,
|
||||
HandLandmark.INDEX_FINGER_TIP)
|
||||
_MIDDLE_FINGER_LANDMARKS = (HandLandmark.MIDDLE_FINGER_PIP,
|
||||
HandLandmark.MIDDLE_FINGER_DIP,
|
||||
HandLandmark.MIDDLE_FINGER_TIP)
|
||||
_RING_FINGER_LANDMARKS = (HandLandmark.RING_FINGER_PIP,
|
||||
HandLandmark.RING_FINGER_DIP,
|
||||
HandLandmark.RING_FINGER_TIP)
|
||||
_PINKY_FINGER_LANDMARKS = (HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP,
|
||||
HandLandmark.PINKY_TIP)
|
||||
_HAND_LANDMARK_STYLE = {
|
||||
_PALM_LANMARKS:
|
||||
DrawingSpec(
|
||||
color=_RED, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_THUMP_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_PEACH, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_INDEX_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_PURPLE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_MIDDLE_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_YELLOW, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_RING_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_GREEN, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_PINKY_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_BLUE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
}
|
||||
|
||||
# Hand connections
|
||||
_PALM_CONNECTIONS = ((HandLandmark.WRIST, HandLandmark.THUMB_CMC),
|
||||
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_MCP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.INDEX_FINGER_MCP,
|
||||
HandLandmark.MIDDLE_FINGER_MCP), (HandLandmark.WRIST,
|
||||
HandLandmark.PINKY_MCP))
|
||||
_THUMB_CONNECTIONS = ((HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
|
||||
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
|
||||
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP))
|
||||
_INDEX_FINGER_CONNECTIONS = ((HandLandmark.INDEX_FINGER_MCP,
|
||||
HandLandmark.INDEX_FINGER_PIP),
|
||||
(HandLandmark.INDEX_FINGER_PIP,
|
||||
HandLandmark.INDEX_FINGER_DIP),
|
||||
(HandLandmark.INDEX_FINGER_DIP,
|
||||
HandLandmark.INDEX_FINGER_TIP))
|
||||
_MIDDLE_FINGER_CONNECTIONS = ((HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.MIDDLE_FINGER_PIP),
|
||||
(HandLandmark.MIDDLE_FINGER_PIP,
|
||||
HandLandmark.MIDDLE_FINGER_DIP),
|
||||
(HandLandmark.MIDDLE_FINGER_DIP,
|
||||
HandLandmark.MIDDLE_FINGER_TIP))
|
||||
_RING_FINGER_CONNECTIONS = ((HandLandmark.RING_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_PIP),
|
||||
(HandLandmark.RING_FINGER_PIP,
|
||||
HandLandmark.RING_FINGER_DIP),
|
||||
(HandLandmark.RING_FINGER_DIP,
|
||||
HandLandmark.RING_FINGER_TIP))
|
||||
_PINKY_FINGER_CONNECTIONS = ((HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
|
||||
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
|
||||
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP))
|
||||
_HAND_CONNECTION_STYLE = {
|
||||
_PALM_CONNECTIONS:
|
||||
DrawingSpec(color=_GRAY, thickness=_THICKNESS_WRIST_MCP),
|
||||
_THUMB_CONNECTIONS:
|
||||
DrawingSpec(color=_PEACH, thickness=_THICKNESS_FINGER),
|
||||
_INDEX_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_PURPLE, thickness=_THICKNESS_FINGER),
|
||||
_MIDDLE_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_YELLOW, thickness=_THICKNESS_FINGER),
|
||||
_RING_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_GREEN, thickness=_THICKNESS_FINGER),
|
||||
_PINKY_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_BLUE, thickness=_THICKNESS_FINGER)
|
||||
}
|
||||
|
||||
|
||||
def get_default_hand_landmark_style() -> Mapping[int, DrawingSpec]:
|
||||
"""Returns the default hand landmark drawing style.
|
||||
|
||||
Returns:
|
||||
A mapping from each hand landmark to the default drawing spec.
|
||||
"""
|
||||
hand_landmark_style = {}
|
||||
for k, v in _HAND_LANDMARK_STYLE.items():
|
||||
for landmark in k:
|
||||
hand_landmark_style[landmark] = v
|
||||
return hand_landmark_style
|
||||
|
||||
|
||||
def get_default_hand_connection_style(
|
||||
) -> Mapping[Tuple[int, int], DrawingSpec]:
|
||||
"""Returns the default hand connection drawing style.
|
||||
|
||||
Returns:
|
||||
A mapping from each hand connection to the default drawing spec.
|
||||
"""
|
||||
hand_connection_style = {}
|
||||
for k, v in _HAND_CONNECTION_STYLE.items():
|
||||
for connection in k:
|
||||
hand_connection_style[connection] = v
|
||||
return hand_connection_style
|
@@ -0,0 +1,307 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe solution drawing utils."""
|
||||
|
||||
import math
|
||||
from typing import List, Mapping, Optional, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import dataclasses
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import location_data_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
|
||||
PRESENCE_THRESHOLD = 0.5
|
||||
RGB_CHANNELS = 3
|
||||
BLACK_COLOR = (0, 0, 0)
|
||||
RED_COLOR = (0, 0, 255)
|
||||
GREEN_COLOR = (0, 128, 0)
|
||||
BLUE_COLOR = (255, 0, 0)
|
||||
VISIBILITY_THRESHOLD = 0.5
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DrawingSpec:
|
||||
# Color for drawing the annotation. Default to the green color.
|
||||
color: Tuple[int, int, int] = (0, 255, 0)
|
||||
# Thickness for drawing the annotation. Default to 2 pixels.
|
||||
thickness: int = 2
|
||||
# Circle radius. Default to 2 pixels.
|
||||
circle_radius: int = 2
|
||||
|
||||
|
||||
def _normalized_to_pixel_coordinates(
|
||||
normalized_x: float, normalized_y: float, image_width: int,
|
||||
image_height: int) -> Union[None, Tuple[int, int]]:
|
||||
"""Converts normalized value pair to pixel coordinates."""
|
||||
|
||||
# Checks if the float value is between 0 and 1.
|
||||
def is_valid_normalized_value(value: float) -> bool:
|
||||
return (value > 0 or math.isclose(0, value)) and (value < 1 or
|
||||
math.isclose(1, value))
|
||||
|
||||
if not (is_valid_normalized_value(normalized_x) and
|
||||
is_valid_normalized_value(normalized_y)):
|
||||
# TODO: Draw coordinates even if it's outside of the image bounds.
|
||||
return None
|
||||
x_px = min(math.floor(normalized_x * image_width), image_width - 1)
|
||||
y_px = min(math.floor(normalized_y * image_height), image_height - 1)
|
||||
return x_px, y_px
|
||||
|
||||
|
||||
def draw_detection(
|
||||
image: np.ndarray,
|
||||
detection: detection_pb2.Detection,
|
||||
keypoint_drawing_spec: DrawingSpec = DrawingSpec(color=RED_COLOR),
|
||||
bbox_drawing_spec: DrawingSpec = DrawingSpec()):
|
||||
"""Draws the detction bounding box and keypoints on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
detection: A detection proto message to be annotated on the image.
|
||||
keypoint_drawing_spec: A DrawingSpec object that specifies the keypoints'
|
||||
drawing settings such as color, line thickness, and circle radius.
|
||||
bbox_drawing_spec: A DrawingSpec object that specifies the bounding box's
|
||||
drawing settings such as color and line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
b) If the location data is not relative data.
|
||||
"""
|
||||
if not detection.location_data:
|
||||
return
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
|
||||
location = detection.location_data
|
||||
if location.format != location_data_pb2.LocationData.RELATIVE_BOUNDING_BOX:
|
||||
raise ValueError(
|
||||
'LocationData must be relative for this drawing funtion to work.')
|
||||
# Draws keypoints.
|
||||
for keypoint in location.relative_keypoints:
|
||||
keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
|
||||
image_cols, image_rows)
|
||||
cv2.circle(image, keypoint_px, keypoint_drawing_spec.circle_radius,
|
||||
keypoint_drawing_spec.color, keypoint_drawing_spec.thickness)
|
||||
# Draws bounding box if exists.
|
||||
if not location.HasField('relative_bounding_box'):
|
||||
return
|
||||
relative_bounding_box = location.relative_bounding_box
|
||||
rect_start_point = _normalized_to_pixel_coordinates(
|
||||
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
|
||||
image_rows)
|
||||
rect_end_point = _normalized_to_pixel_coordinates(
|
||||
relative_bounding_box.xmin + relative_bounding_box.width,
|
||||
relative_bounding_box.ymin + +relative_bounding_box.height, image_cols,
|
||||
image_rows)
|
||||
cv2.rectangle(image, rect_start_point, rect_end_point,
|
||||
bbox_drawing_spec.color, bbox_drawing_spec.thickness)
|
||||
|
||||
|
||||
def draw_landmarks(
|
||||
image: np.ndarray,
|
||||
landmark_list: landmark_pb2.NormalizedLandmarkList,
|
||||
connections: Optional[List[Tuple[int, int]]] = None,
|
||||
landmark_drawing_spec: Union[DrawingSpec,
|
||||
Mapping[int, DrawingSpec]] = DrawingSpec(
|
||||
color=RED_COLOR),
|
||||
connection_drawing_spec: Union[DrawingSpec,
|
||||
Mapping[Tuple[int, int],
|
||||
DrawingSpec]] = DrawingSpec()):
|
||||
"""Draws the landmarks and the connections on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
landmark_list: A normalized landmark list proto message to be annotated on
|
||||
the image.
|
||||
connections: A list of landmark index tuples that specifies how landmarks to
|
||||
be connected in the drawing.
|
||||
landmark_drawing_spec: Either a DrawingSpec object or a mapping from
|
||||
hand landmarks to the DrawingSpecs that specifies the landmarks' drawing
|
||||
settings such as color, line thickness, and circle radius.
|
||||
connection_drawing_spec: Either a DrawingSpec object or a mapping from
|
||||
hand connections to the DrawingSpecs that specifies the
|
||||
connections' drawing settings such as color and line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
b) If any connetions contain invalid landmark index.
|
||||
"""
|
||||
if not landmark_list:
|
||||
return
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
idx_to_coordinates = {}
|
||||
for idx, landmark in enumerate(landmark_list.landmark):
|
||||
if ((landmark.HasField('visibility') and
|
||||
landmark.visibility < VISIBILITY_THRESHOLD) or
|
||||
(landmark.HasField('presence') and
|
||||
landmark.presence < PRESENCE_THRESHOLD)):
|
||||
continue
|
||||
landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y,
|
||||
image_cols, image_rows)
|
||||
if landmark_px:
|
||||
idx_to_coordinates[idx] = landmark_px
|
||||
if connections:
|
||||
num_landmarks = len(landmark_list.landmark)
|
||||
# Draws the connections if the start and end landmarks are both visible.
|
||||
for connection in connections:
|
||||
start_idx = connection[0]
|
||||
end_idx = connection[1]
|
||||
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
|
||||
raise ValueError(f'Landmark index is out of range. Invalid connection '
|
||||
f'from landmark #{start_idx} to landmark #{end_idx}.')
|
||||
if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
|
||||
drawing_spec = connection_drawing_spec[connection] if isinstance(
|
||||
connection_drawing_spec, Mapping) else connection_drawing_spec
|
||||
cv2.line(image, idx_to_coordinates[start_idx],
|
||||
idx_to_coordinates[end_idx], drawing_spec.color,
|
||||
drawing_spec.thickness)
|
||||
# Draws landmark points after finishing the connection lines, which is
|
||||
# aesthetically better.
|
||||
for idx, landmark_px in idx_to_coordinates.items():
|
||||
drawing_spec = landmark_drawing_spec[idx] if isinstance(
|
||||
landmark_drawing_spec, Mapping) else landmark_drawing_spec
|
||||
cv2.circle(image, landmark_px, drawing_spec.circle_radius,
|
||||
drawing_spec.color, drawing_spec.thickness)
|
||||
|
||||
|
||||
def draw_axis(
|
||||
image: np.ndarray,
|
||||
rotation: np.ndarray,
|
||||
translation: np.ndarray,
|
||||
focal_length: Tuple[float, float] = (1.0, 1.0),
|
||||
principal_point: Tuple[float, float] = (0.0, 0.0),
|
||||
axis_length: float = 0.1,
|
||||
axis_drawing_spec: DrawingSpec = DrawingSpec()):
|
||||
"""Draws the 3D axis on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
rotation: Rotation matrix from object to camera coordinate frame.
|
||||
translation: Translation vector from object to camera coordinate frame.
|
||||
focal_length: camera focal length along x and y directions.
|
||||
principal_point: camera principal point in x and y.
|
||||
axis_length: length of the axis in the drawing.
|
||||
axis_drawing_spec: A DrawingSpec object that specifies the xyz axis
|
||||
drawing settings such as line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
"""
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
# Create axis points in camera coordinate frame.
|
||||
axis_world = np.float32([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
||||
axis_cam = np.matmul(rotation, axis_length*axis_world.T).T + translation
|
||||
x = axis_cam[..., 0]
|
||||
y = axis_cam[..., 1]
|
||||
z = axis_cam[..., 2]
|
||||
# Project 3D points to NDC space.
|
||||
fx, fy = focal_length
|
||||
px, py = principal_point
|
||||
x_ndc = np.clip(-fx * x / (z + 1e-5) + px, -1., 1.)
|
||||
y_ndc = np.clip(-fy * y / (z + 1e-5) + py, -1., 1.)
|
||||
# Convert from NDC space to image space.
|
||||
x_im = np.int32((1 + x_ndc) * 0.5 * image_cols)
|
||||
y_im = np.int32((1 - y_ndc) * 0.5 * image_rows)
|
||||
# Draw xyz axis on the image.
|
||||
origin = (x_im[0], y_im[0])
|
||||
x_axis = (x_im[1], y_im[1])
|
||||
y_axis = (x_im[2], y_im[2])
|
||||
z_axis = (x_im[3], y_im[3])
|
||||
cv2.arrowedLine(image, origin, x_axis, RED_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
cv2.arrowedLine(image, origin, y_axis, GREEN_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
cv2.arrowedLine(image, origin, z_axis, BLUE_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
|
||||
|
||||
def _normalize_color(color):
|
||||
return tuple(v / 255. for v in color)
|
||||
|
||||
|
||||
def plot_landmarks(landmark_list: landmark_pb2.NormalizedLandmarkList,
|
||||
connections: Optional[List[Tuple[int, int]]] = None,
|
||||
landmark_drawing_spec: DrawingSpec = DrawingSpec(
|
||||
color=RED_COLOR, thickness=5),
|
||||
connection_drawing_spec: DrawingSpec = DrawingSpec(
|
||||
color=BLACK_COLOR, thickness=5),
|
||||
elevation: int = 10,
|
||||
azimuth: int = 10):
|
||||
"""Plot the landmarks and the connections in matplotlib 3d.
|
||||
|
||||
Args:
|
||||
landmark_list: A normalized landmark list proto message to be plotted.
|
||||
connections: A list of landmark index tuples that specifies how landmarks to
|
||||
be connected.
|
||||
landmark_drawing_spec: A DrawingSpec object that specifies the landmarks'
|
||||
drawing settings such as color and line thickness.
|
||||
connection_drawing_spec: A DrawingSpec object that specifies the
|
||||
connections' drawing settings such as color and line thickness.
|
||||
elevation: The elevation from which to view the plot.
|
||||
azimuth: the azimuth angle to rotate the plot.
|
||||
Raises:
|
||||
ValueError: If any connetions contain invalid landmark index.
|
||||
"""
|
||||
if not landmark_list:
|
||||
return
|
||||
plt.figure(figsize=(10, 10))
|
||||
ax = plt.axes(projection='3d')
|
||||
ax.view_init(elev=elevation, azim=azimuth)
|
||||
plotted_landmarks = {}
|
||||
for idx, landmark in enumerate(landmark_list.landmark):
|
||||
if ((landmark.HasField('visibility') and
|
||||
landmark.visibility < VISIBILITY_THRESHOLD) or
|
||||
(landmark.HasField('presence') and
|
||||
landmark.presence < PRESENCE_THRESHOLD)):
|
||||
continue
|
||||
ax.scatter3D(
|
||||
xs=[-landmark.z],
|
||||
ys=[landmark.x],
|
||||
zs=[-landmark.y],
|
||||
color=_normalize_color(landmark_drawing_spec.color[::-1]),
|
||||
linewidth=landmark_drawing_spec.thickness)
|
||||
plotted_landmarks[idx] = (-landmark.z, landmark.x, -landmark.y)
|
||||
if connections:
|
||||
num_landmarks = len(landmark_list.landmark)
|
||||
# Draws the connections if the start and end landmarks are both visible.
|
||||
for connection in connections:
|
||||
start_idx = connection[0]
|
||||
end_idx = connection[1]
|
||||
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
|
||||
raise ValueError(f'Landmark index is out of range. Invalid connection '
|
||||
f'from landmark #{start_idx} to landmark #{end_idx}.')
|
||||
if start_idx in plotted_landmarks and end_idx in plotted_landmarks:
|
||||
landmark_pair = [
|
||||
plotted_landmarks[start_idx], plotted_landmarks[end_idx]
|
||||
]
|
||||
ax.plot3D(
|
||||
xs=[landmark_pair[0][0], landmark_pair[1][0]],
|
||||
ys=[landmark_pair[0][1], landmark_pair[1][1]],
|
||||
zs=[landmark_pair[0][2], landmark_pair[1][2]],
|
||||
color=_normalize_color(connection_drawing_spec.color[::-1]),
|
||||
linewidth=connection_drawing_spec.thickness)
|
||||
plt.show()
|
@@ -0,0 +1,231 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.drawing_utils."""
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from google.protobuf import text_format
|
||||
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
from mediapipe.python.solutions import drawing_utils
|
||||
|
||||
DEFAULT_BBOX_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC = drawing_utils.DrawingSpec(color=(0, 0, 255))
|
||||
DEFAULT_AXIS_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
|
||||
|
||||
class DrawingUtilTest(parameterized.TestCase):
|
||||
|
||||
def test_invalid_input_image(self):
|
||||
image = np.arange(18, dtype=np.uint8).reshape(3, 3, 2)
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
drawing_utils.draw_landmarks(image, landmark_pb2.NormalizedLandmarkList())
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
drawing_utils.draw_detection(image, detection_pb2.Detection())
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
rotation = np.eye(3, dtype=np.float32)
|
||||
translation = np.array([0., 0., 1.])
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
|
||||
def test_invalid_connection(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.5 y: 0.5} landmark {x: 0.2 y: 0.2}',
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with self.assertRaisesRegex(ValueError, 'Landmark index is out of range.'):
|
||||
drawing_utils.draw_landmarks(image, landmark_list, [(0, 2)])
|
||||
|
||||
def test_unqualified_detection(self):
|
||||
detection = text_format.Parse('location_data {format: GLOBAL}',
|
||||
detection_pb2.Detection())
|
||||
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with self.assertRaisesRegex(ValueError, 'LocationData must be relative'):
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
|
||||
def test_draw_keypoints_only(self):
|
||||
detection = text_format.Parse(
|
||||
'location_data {'
|
||||
' format: RELATIVE_BOUNDING_BOX'
|
||||
' relative_keypoints {x: 0 y: 1}'
|
||||
' relative_keypoints {x: 1 y: 0}}', detection_pb2.Detection())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.circle(expected_result, (0, 99),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, (99, 0),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_bboxs_only(self):
|
||||
detection = text_format.Parse(
|
||||
'location_data {'
|
||||
' format: RELATIVE_BOUNDING_BOX'
|
||||
' relative_bounding_box {xmin: 0 ymin: 0 width: 1 height: 1}}',
|
||||
detection_pb2.Detection())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.rectangle(expected_result, (0, 0), (99, 99),
|
||||
DEFAULT_BBOX_DRAWING_SPEC.color,
|
||||
DEFAULT_BBOX_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('landmark_list_has_only_one_element', 'landmark {x: 0.1 y: 0.1}'),
|
||||
('second_landmark_is_invisible',
|
||||
'landmark {x: 0.1 y: 0.1} landmark {x: 0.5 y: 0.5 visibility: 0.0}'))
|
||||
def test_draw_single_landmark_point(self, landmark_list_text):
|
||||
landmark_list = text_format.Parse(landmark_list_text,
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.circle(expected_result, (10, 10),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(image, landmark_list)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('landmarks_have_x_and_y_only',
|
||||
'landmark {x: 0.1 y: 0.5} landmark {x: 0.5 y: 0.1}'),
|
||||
('landmark_zero_visibility_and_presence',
|
||||
'landmark {x: 0.1 y: 0.5 presence: 0.5}'
|
||||
'landmark {x: 0.5 y: 0.1 visibility: 0.5}'))
|
||||
def test_draw_landmarks_and_connections(self, landmark_list_text):
|
||||
landmark_list = text_format.Parse(landmark_list_text,
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (10, 50)
|
||||
end_point = (50, 10)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.color,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, end_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image, landmark_list=landmark_list, connections=[(0, 1)])
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_axis(self):
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
origin = (50, 50)
|
||||
x_axis = (75, 50)
|
||||
y_axis = (50, 22)
|
||||
z_axis = (50, 77)
|
||||
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
r = np.sqrt(2.) / 2.
|
||||
rotation = np.array([[1., 0., 0.], [0., r, -r], [0., r, r]])
|
||||
translation = np.array([0, 0, -0.2])
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_axis_zero_translation(self):
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
origin = (50, 50)
|
||||
x_axis = (0, 50)
|
||||
y_axis = (50, 100)
|
||||
z_axis = (50, 50)
|
||||
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
rotation = np.eye(3, dtype=np.float32)
|
||||
translation = np.zeros((3,), dtype=np.float32)
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_min_and_max_coordinate_values(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.0 y: 1.0}'
|
||||
'landmark {x: 1.0 y: 0.0}', landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (0, 99)
|
||||
end_point = (99, 0)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.color,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, end_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image, landmark_list=landmark_list, connections=[(0, 1)])
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_drawing_spec(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.1 y: 0.1}'
|
||||
'landmark {x: 0.8 y: 0.8}', landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
landmark_drawing_spec = drawing_utils.DrawingSpec(
|
||||
color=(0, 0, 255), thickness=5)
|
||||
connection_drawing_spec = drawing_utils.DrawingSpec(
|
||||
color=(255, 0, 0), thickness=3)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (10, 10)
|
||||
end_point = (80, 80)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
connection_drawing_spec.color, connection_drawing_spec.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
landmark_drawing_spec.circle_radius, landmark_drawing_spec.color,
|
||||
landmark_drawing_spec.thickness)
|
||||
cv2.circle(expected_result, end_point, landmark_drawing_spec.circle_radius,
|
||||
landmark_drawing_spec.color, landmark_drawing_spec.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image,
|
||||
landmark_list=landmark_list,
|
||||
connections=[(0, 1)],
|
||||
landmark_drawing_spec=landmark_drawing_spec,
|
||||
connection_drawing_spec=connection_drawing_spec)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,112 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Face Detection."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple, Union
|
||||
|
||||
import numpy as np
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import location_data_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb'
|
||||
FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb'
|
||||
|
||||
|
||||
def get_key_point(
|
||||
detection: detection_pb2.Detection, key_point_enum: 'FaceKeyPoint'
|
||||
) -> Union[None, location_data_pb2.LocationData.RelativeKeypoint]:
|
||||
"""A convenience method to return a face key point by the FaceKeyPoint type.
|
||||
|
||||
Args:
|
||||
detection: A detection proto message that contains face key points.
|
||||
key_point_enum: A FaceKeyPoint type.
|
||||
|
||||
Returns:
|
||||
A RelativeKeypoint proto message.
|
||||
"""
|
||||
if not detection or not detection.location_data:
|
||||
return None
|
||||
return detection.location_data.relative_keypoints[key_point_enum]
|
||||
|
||||
|
||||
class FaceKeyPoint(enum.IntEnum):
|
||||
"""The enum type of the six face detection key points."""
|
||||
RIGHT_EYE = 0
|
||||
LEFT_EYE = 1
|
||||
NOSE_TIP = 2
|
||||
MOUTH_CENTER = 3
|
||||
RIGHT_EAR_TRAGION = 4
|
||||
LEFT_EAR_TRAGION = 5
|
||||
|
||||
|
||||
class FaceDetection(SolutionBase):
|
||||
"""MediaPipe Face Detection.
|
||||
|
||||
MediaPipe Face Detection processes an RGB image and returns a list of the
|
||||
detected face location data.
|
||||
|
||||
Please refer to
|
||||
https://solutions.mediapipe.dev/face_detection#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self, min_detection_confidence=0.5, model_selection=0):
|
||||
"""Initializes a MediaPipe Face Detection object.
|
||||
|
||||
Args:
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/face_detection#min_detection_confidence.
|
||||
model_selection: 0 or 1. 0 to select a short-range model that works
|
||||
best for faces within 2 meters from the camera, and 1 for a full-range
|
||||
model best for faces within 5 meters. See details in
|
||||
https://solutions.mediapipe.dev/face_detection#model_selection.
|
||||
"""
|
||||
|
||||
binary_graph_path = FULL_RANGE_GRAPH_FILE_PATH if model_selection == 1 else SHORT_RANGE_GRAPH_FILE_PATH
|
||||
subgraph_name = 'facedetectionfullrangecommon' if model_selection == 1 else 'facedetectionshortrangecommon'
|
||||
|
||||
super().__init__(
|
||||
binary_graph_path=binary_graph_path,
|
||||
calculator_params={
|
||||
subgraph_name + '__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
},
|
||||
outputs=['detections'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns a list of the detected face location data.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "detections" field that contains a list of the
|
||||
detected face location data.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,92 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.face_detection."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import face_detection as mp_faces
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
SHORT_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 182], [460, 186], [420, 241],
|
||||
[417, 284], [295, 199], [502, 198]]
|
||||
FULL_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 181], [455, 181], [413, 233],
|
||||
[411, 278], [306, 204], [499, 207]]
|
||||
DIFF_THRESHOLD = 5 # pixels
|
||||
|
||||
|
||||
class FaceDetectionTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
for detection in results.detections:
|
||||
mp_drawing.draw_detection(frame, detection)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_faces.FaceDetection() as faces:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
with mp_faces.FaceDetection(min_detection_confidence=0.5) as faces:
|
||||
results = faces.process(image)
|
||||
self.assertIsNone(results.detections)
|
||||
|
||||
@parameterized.named_parameters(('short_range_model', 0),
|
||||
('full_range_model', 1))
|
||||
def test_face(self, model_selection):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
rows, cols, _ = image.shape
|
||||
with mp_faces.FaceDetection(
|
||||
min_detection_confidence=0.5, model_selection=model_selection) as faces:
|
||||
for idx in range(5):
|
||||
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
location_data = results.detections[0].location_data
|
||||
x = [keypoint.x * cols for keypoint in location_data.relative_keypoints]
|
||||
y = [keypoint.y * rows for keypoint in location_data.relative_keypoints]
|
||||
face_keypoints = np.column_stack((x, y))
|
||||
if model_selection == 0:
|
||||
prediction_error = np.abs(
|
||||
np.asarray(face_keypoints) -
|
||||
np.asarray(SHORT_RANGE_EXPECTED_FACE_KEY_POINTS))
|
||||
else:
|
||||
prediction_error = np.abs(
|
||||
np.asarray(face_keypoints) -
|
||||
np.asarray(FULL_RANGE_EXPECTED_FACE_KEY_POINTS))
|
||||
|
||||
self.assertLen(results.detections, 1)
|
||||
self.assertLen(location_data.relative_keypoints, 6)
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,238 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe FaceMesh."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb'
|
||||
FACE_CONNECTIONS = frozenset([
|
||||
# Lips.
|
||||
(61, 146),
|
||||
(146, 91),
|
||||
(91, 181),
|
||||
(181, 84),
|
||||
(84, 17),
|
||||
(17, 314),
|
||||
(314, 405),
|
||||
(405, 321),
|
||||
(321, 375),
|
||||
(375, 291),
|
||||
(61, 185),
|
||||
(185, 40),
|
||||
(40, 39),
|
||||
(39, 37),
|
||||
(37, 0),
|
||||
(0, 267),
|
||||
(267, 269),
|
||||
(269, 270),
|
||||
(270, 409),
|
||||
(409, 291),
|
||||
(78, 95),
|
||||
(95, 88),
|
||||
(88, 178),
|
||||
(178, 87),
|
||||
(87, 14),
|
||||
(14, 317),
|
||||
(317, 402),
|
||||
(402, 318),
|
||||
(318, 324),
|
||||
(324, 308),
|
||||
(78, 191),
|
||||
(191, 80),
|
||||
(80, 81),
|
||||
(81, 82),
|
||||
(82, 13),
|
||||
(13, 312),
|
||||
(312, 311),
|
||||
(311, 310),
|
||||
(310, 415),
|
||||
(415, 308),
|
||||
# Left eye.
|
||||
(263, 249),
|
||||
(249, 390),
|
||||
(390, 373),
|
||||
(373, 374),
|
||||
(374, 380),
|
||||
(380, 381),
|
||||
(381, 382),
|
||||
(382, 362),
|
||||
(263, 466),
|
||||
(466, 388),
|
||||
(388, 387),
|
||||
(387, 386),
|
||||
(386, 385),
|
||||
(385, 384),
|
||||
(384, 398),
|
||||
(398, 362),
|
||||
# Left eyebrow.
|
||||
(276, 283),
|
||||
(283, 282),
|
||||
(282, 295),
|
||||
(295, 285),
|
||||
(300, 293),
|
||||
(293, 334),
|
||||
(334, 296),
|
||||
(296, 336),
|
||||
# Right eye.
|
||||
(33, 7),
|
||||
(7, 163),
|
||||
(163, 144),
|
||||
(144, 145),
|
||||
(145, 153),
|
||||
(153, 154),
|
||||
(154, 155),
|
||||
(155, 133),
|
||||
(33, 246),
|
||||
(246, 161),
|
||||
(161, 160),
|
||||
(160, 159),
|
||||
(159, 158),
|
||||
(158, 157),
|
||||
(157, 173),
|
||||
(173, 133),
|
||||
# Right eyebrow.
|
||||
(46, 53),
|
||||
(53, 52),
|
||||
(52, 65),
|
||||
(65, 55),
|
||||
(70, 63),
|
||||
(63, 105),
|
||||
(105, 66),
|
||||
(66, 107),
|
||||
# Face oval.
|
||||
(10, 338),
|
||||
(338, 297),
|
||||
(297, 332),
|
||||
(332, 284),
|
||||
(284, 251),
|
||||
(251, 389),
|
||||
(389, 356),
|
||||
(356, 454),
|
||||
(454, 323),
|
||||
(323, 361),
|
||||
(361, 288),
|
||||
(288, 397),
|
||||
(397, 365),
|
||||
(365, 379),
|
||||
(379, 378),
|
||||
(378, 400),
|
||||
(400, 377),
|
||||
(377, 152),
|
||||
(152, 148),
|
||||
(148, 176),
|
||||
(176, 149),
|
||||
(149, 150),
|
||||
(150, 136),
|
||||
(136, 172),
|
||||
(172, 58),
|
||||
(58, 132),
|
||||
(132, 93),
|
||||
(93, 234),
|
||||
(234, 127),
|
||||
(127, 162),
|
||||
(162, 21),
|
||||
(21, 54),
|
||||
(54, 103),
|
||||
(103, 67),
|
||||
(67, 109),
|
||||
(109, 10)
|
||||
])
|
||||
|
||||
|
||||
class FaceMesh(SolutionBase):
|
||||
"""MediaPipe FaceMesh.
|
||||
|
||||
MediaPipe FaceMesh processes an RGB image and returns the face landmarks on
|
||||
each detected face.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/face_mesh#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
max_num_faces=1,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe FaceMesh object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#static_image_mode.
|
||||
max_num_faces: Maximum number of faces to detect. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#max_num_faces.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
face landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#min_tracking_confidence.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'num_faces': max_num_faces,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'facedetectionshortrangecpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'facelandmarkcpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['multi_face_landmarks'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the face landmarks on each detected face.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "multi_face_landmarks" field that contains the
|
||||
face landmarks on each detected face.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,125 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.face_mesh."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import face_mesh as mp_faces
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 5 # pixels
|
||||
EYE_INDICES_TO_LANDMARKS = {
|
||||
33: [345, 178],
|
||||
7: [348, 179],
|
||||
163: [352, 178],
|
||||
144: [357, 179],
|
||||
145: [365, 179],
|
||||
153: [371, 179],
|
||||
154: [378, 178],
|
||||
155: [381, 177],
|
||||
133: [383, 177],
|
||||
246: [347, 175],
|
||||
161: [350, 174],
|
||||
160: [355, 172],
|
||||
159: [362, 170],
|
||||
158: [368, 171],
|
||||
157: [375, 172],
|
||||
173: [380, 175],
|
||||
263: [467, 176],
|
||||
249: [464, 177],
|
||||
390: [460, 177],
|
||||
373: [455, 178],
|
||||
374: [448, 179],
|
||||
380: [441, 179],
|
||||
381: [435, 178],
|
||||
382: [432, 177],
|
||||
362: [430, 177],
|
||||
466: [465, 175],
|
||||
388: [462, 173],
|
||||
387: [457, 171],
|
||||
386: [450, 170],
|
||||
385: [444, 171],
|
||||
384: [437, 172],
|
||||
398: [432, 175]
|
||||
}
|
||||
|
||||
|
||||
class FaceMeshTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
for face_landmarks in results.multi_face_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image=frame,
|
||||
landmark_list=face_landmarks,
|
||||
landmark_drawing_spec=drawing_spec)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_faces.FaceMesh() as faces:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_faces.FaceMesh() as faces:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = faces.process(image)
|
||||
self.assertIsNone(results.multi_face_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_face(self, static_image_mode: bool, num_frames: int):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
rows, cols, _ = image.shape
|
||||
with mp_faces.FaceMesh(
|
||||
static_image_mode=static_image_mode,
|
||||
min_detection_confidence=0.5) as faces:
|
||||
for idx in range(num_frames):
|
||||
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
multi_face_landmarks = []
|
||||
for landmarks in results.multi_face_landmarks:
|
||||
self.assertLen(landmarks.landmark, 468)
|
||||
x = [landmark.x * cols for landmark in landmarks.landmark]
|
||||
y = [landmark.y * rows for landmark in landmarks.landmark]
|
||||
face_landmarks = np.column_stack((x, y))
|
||||
multi_face_landmarks.append(face_landmarks)
|
||||
self.assertLen(multi_face_landmarks, 1)
|
||||
# Verify the eye landmarks are correct as sanity check.
|
||||
for eye_idx, gt_lds in EYE_INDICES_TO_LANDMARKS.items():
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_face_landmarks[0][eye_idx]) - np.asarray(gt_lds))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,164 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Hands."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
|
||||
class HandLandmark(enum.IntEnum):
|
||||
"""The 21 hand landmarks."""
|
||||
WRIST = 0
|
||||
THUMB_CMC = 1
|
||||
THUMB_MCP = 2
|
||||
THUMB_IP = 3
|
||||
THUMB_TIP = 4
|
||||
INDEX_FINGER_MCP = 5
|
||||
INDEX_FINGER_PIP = 6
|
||||
INDEX_FINGER_DIP = 7
|
||||
INDEX_FINGER_TIP = 8
|
||||
MIDDLE_FINGER_MCP = 9
|
||||
MIDDLE_FINGER_PIP = 10
|
||||
MIDDLE_FINGER_DIP = 11
|
||||
MIDDLE_FINGER_TIP = 12
|
||||
RING_FINGER_MCP = 13
|
||||
RING_FINGER_PIP = 14
|
||||
RING_FINGER_DIP = 15
|
||||
RING_FINGER_TIP = 16
|
||||
PINKY_MCP = 17
|
||||
PINKY_PIP = 18
|
||||
PINKY_DIP = 19
|
||||
PINKY_TIP = 20
|
||||
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
|
||||
HAND_CONNECTIONS = frozenset([
|
||||
(HandLandmark.WRIST, HandLandmark.THUMB_CMC),
|
||||
(HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
|
||||
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
|
||||
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP),
|
||||
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
|
||||
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.INDEX_FINGER_PIP),
|
||||
(HandLandmark.INDEX_FINGER_PIP, HandLandmark.INDEX_FINGER_DIP),
|
||||
(HandLandmark.INDEX_FINGER_DIP, HandLandmark.INDEX_FINGER_TIP),
|
||||
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.MIDDLE_FINGER_PIP),
|
||||
(HandLandmark.MIDDLE_FINGER_PIP, HandLandmark.MIDDLE_FINGER_DIP),
|
||||
(HandLandmark.MIDDLE_FINGER_DIP, HandLandmark.MIDDLE_FINGER_TIP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.RING_FINGER_MCP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.RING_FINGER_PIP),
|
||||
(HandLandmark.RING_FINGER_PIP, HandLandmark.RING_FINGER_DIP),
|
||||
(HandLandmark.RING_FINGER_DIP, HandLandmark.RING_FINGER_TIP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.WRIST, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
|
||||
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
|
||||
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP)
|
||||
])
|
||||
|
||||
|
||||
class Hands(SolutionBase):
|
||||
"""MediaPipe Hands.
|
||||
|
||||
MediaPipe Hands processes an RGB image and returns the hand landmarks and
|
||||
handedness (left v.s. right hand) of each detected hand.
|
||||
|
||||
Note that it determines handedness assuming the input image is mirrored,
|
||||
i.e., taken with a front-facing/selfie camera (
|
||||
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
|
||||
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
|
||||
to flip the image first for a correct handedness output.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/hands#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Hand object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/hands#static_image_mode.
|
||||
max_num_hands: Maximum number of hands to detect. See details in
|
||||
https://solutions.mediapipe.dev/hands#max_num_hands.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for hand
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/hands#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
hand landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/hands#min_tracking_confidence.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'num_hands': max_num_hands,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'handlandmarkcpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['multi_hand_landmarks', 'multi_handedness'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
|
||||
contains the hand landmarks on each detected hand and a "multi_handedness"
|
||||
field that contains the handedness (left v.s. right hand) of the detected
|
||||
hand.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,110 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.hands."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_styles
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import hands as mp_hands
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 15 # pixels
|
||||
EXPECTED_HAND_COORDINATES_PREDICTION = [[[144, 345], [211, 323], [257, 286],
|
||||
[289, 237], [322, 203], [219, 216],
|
||||
[238, 138], [249, 90], [253, 51],
|
||||
[177, 204], [184, 115], [187, 60],
|
||||
[185, 19], [138, 208], [131, 127],
|
||||
[124, 77], [117, 36], [106, 222],
|
||||
[92, 159], [79, 124], [68, 93]],
|
||||
[[577, 37], [504, 56], [459, 94],
|
||||
[429, 146], [397, 182], [496, 167],
|
||||
[479, 245], [469, 292], [464, 330],
|
||||
[540, 177], [534, 265], [533, 319],
|
||||
[536, 360], [581, 172], [587, 252],
|
||||
[593, 304], [599, 346], [615, 157],
|
||||
[628, 223], [638, 258], [648, 288]]]
|
||||
|
||||
|
||||
class HandsTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
||||
drawing_styles.get_default_hand_landmark_style(),
|
||||
drawing_styles.get_default_hand_connection_style())
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_hands.Hands() as hands:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_hands.Hands() as hands:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = hands.process(image)
|
||||
self.assertIsNone(results.multi_hand_landmarks)
|
||||
self.assertIsNone(results.multi_handedness)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_multi_hands(self, static_image_mode, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_hands.Hands(
|
||||
static_image_mode=static_image_mode,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.5) as hands:
|
||||
for idx in range(num_frames):
|
||||
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
handedness = [
|
||||
handedness.classification[0].label
|
||||
for handedness in results.multi_handedness
|
||||
]
|
||||
multi_hand_coordinates = []
|
||||
rows, cols, _ = image.shape
|
||||
for landmarks in results.multi_hand_landmarks:
|
||||
self.assertLen(landmarks.landmark, 21)
|
||||
x = [landmark.x * cols for landmark in landmarks.landmark]
|
||||
y = [landmark.y * rows for landmark in landmarks.landmark]
|
||||
hand_coordinates = np.column_stack((x, y))
|
||||
multi_hand_coordinates.append(hand_coordinates)
|
||||
self.assertLen(handedness, 2)
|
||||
self.assertLen(multi_hand_coordinates, 2)
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_hand_coordinates) -
|
||||
np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,152 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Holistic."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmark_projection_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.python.solutions.face_mesh import FACE_CONNECTIONS
|
||||
from mediapipe.python.solutions.hands import HAND_CONNECTIONS
|
||||
from mediapipe.python.solutions.hands import HandLandmark
|
||||
from mediapipe.python.solutions.pose import POSE_CONNECTIONS
|
||||
from mediapipe.python.solutions.pose import PoseLandmark
|
||||
# pylint: enable=unused-import
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
|
||||
|
||||
|
||||
def _download_oss_pose_landmark_model(model_complexity):
|
||||
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
|
||||
|
||||
if model_complexity == 0:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
|
||||
elif model_complexity == 2:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
|
||||
|
||||
|
||||
class Holistic(SolutionBase):
|
||||
"""MediaPipe Holistic.
|
||||
|
||||
MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
|
||||
right hand landmarks, and face mesh landmarks on the most prominent person
|
||||
detected.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
model_complexity=1,
|
||||
smooth_landmarks=True,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Holistic object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/holistic#static_image_mode.
|
||||
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
|
||||
details in https://solutions.mediapipe.dev/holistic#model_complexity.
|
||||
smooth_landmarks: Whether to filter landmarks across different input
|
||||
images to reduce jitter. See details in
|
||||
https://solutions.mediapipe.dev/holistic#smooth_landmarks.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
pose landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
|
||||
"""
|
||||
_download_oss_pose_landmark_model(model_complexity)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_complexity': model_complexity,
|
||||
'smooth_landmarks': smooth_landmarks and not static_image_mode,
|
||||
},
|
||||
calculator_params={
|
||||
'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=[
|
||||
'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
|
||||
'right_hand_landmarks', 'face_landmarks'
|
||||
])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple that has five fields describing the landmarks on the most
|
||||
prominate person detected:
|
||||
1) "pose_landmarks" field that contains the pose landmarks.
|
||||
2) "pose_world_landmarks" field that contains the pose landmarks in
|
||||
real-world 3D coordinates that are in meters with the origin at the
|
||||
center between hips.
|
||||
3) "left_hand_landmarks" field that contains the left-hand landmarks.
|
||||
4) "right_hand_landmarks" field that contains the right-hand landmarks.
|
||||
5) "face_landmarks" field that contains the face landmarks.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.pose_landmarks:
|
||||
for landmark in results.pose_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
if results.pose_world_landmarks:
|
||||
for landmark in results.pose_world_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
return results
|
@@ -0,0 +1,134 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.pose."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import holistic as mp_holistic
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
POSE_DIFF_THRESHOLD = 30 # pixels
|
||||
HAND_DIFF_THRESHOLD = 30 # pixels
|
||||
EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
|
||||
[801, 233], [773, 231], [766, 231],
|
||||
[759, 232], [802, 242], [751, 239],
|
||||
[791, 258], [766, 258], [830, 301],
|
||||
[708, 298], [910, 248], [635, 234],
|
||||
[954, 161], [593, 136], [961, 137],
|
||||
[583, 110], [952, 132], [592, 106],
|
||||
[950, 141], [596, 115], [793, 500],
|
||||
[724, 502], [874, 626], [640, 629],
|
||||
[965, 756], [542, 760], [962, 779],
|
||||
[533, 781], [1025, 797], [487, 803]])
|
||||
EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
|
||||
[945, 141], [947, 134], [945, 136],
|
||||
[939, 122], [935, 113], [931, 106],
|
||||
[951, 134], [946, 118], [942, 108],
|
||||
[938, 100], [957, 135], [954, 120],
|
||||
[951, 111], [948, 103], [964, 138],
|
||||
[964, 128], [965, 122], [965, 117]])
|
||||
EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
|
||||
[613, 103], [617, 96], [596, 100],
|
||||
[595, 84], [594, 74], [593, 68],
|
||||
[588, 100], [586, 84], [585, 73],
|
||||
[584, 65], [581, 103], [579, 89],
|
||||
[579, 79], [579, 72], [575, 109],
|
||||
[571, 99], [570, 93], [569, 87]])
|
||||
|
||||
|
||||
class PoseTest(parameterized.TestCase):
|
||||
|
||||
def _landmarks_list_to_array(self, landmark_list, image_shape):
|
||||
rows, cols, _ = image_shape
|
||||
return np.asarray([(lmk.x * cols, lmk.y * rows)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _assert_diff_less(self, array1, array2, threshold):
|
||||
npt.assert_array_less(np.abs(array1 - array2), threshold)
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
mp_drawing.draw_landmarks(
|
||||
image=frame,
|
||||
landmark_list=results.face_landmarks,
|
||||
landmark_drawing_spec=drawing_spec)
|
||||
mp_drawing.draw_landmarks(frame, results.left_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(frame, results.right_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
|
||||
mp_holistic.POSE_CONNECTIONS)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_holistic.Holistic() as holistic:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_holistic.Holistic() as holistic:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = holistic.process(image)
|
||||
self.assertIsNone(results.pose_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_lite', True, 0, 3),
|
||||
('static_full', True, 1, 3),
|
||||
('static_heavy', True, 2, 3),
|
||||
('video_lite', False, 0, 3),
|
||||
('video_full', False, 1, 3),
|
||||
('video_heavy', False, 2, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/holistic.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_holistic.Holistic(static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity) as holistic:
|
||||
for idx in range(num_frames):
|
||||
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.pose_landmarks, image.shape),
|
||||
EXPECTED_POSE_LANDMARKS,
|
||||
POSE_DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.left_hand_landmarks,
|
||||
image.shape),
|
||||
EXPECTED_LEFT_HAND_LANDMARKS,
|
||||
HAND_DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.right_hand_landmarks,
|
||||
image.shape),
|
||||
EXPECTED_RIGHT_HAND_LANDMARKS,
|
||||
HAND_DIFF_THRESHOLD)
|
||||
# TODO: Verify the correctness of the face landmarks.
|
||||
self.assertLen(results.face_landmarks.landmark, 468)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,292 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Objectron."""
|
||||
|
||||
import enum
|
||||
from typing import List, Tuple, NamedTuple, Optional
|
||||
|
||||
import attr
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
|
||||
from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmark_projection_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
from mediapipe.modules.objectron.calculators import annotation_data_pb2
|
||||
from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
|
||||
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
|
||||
|
||||
class BoxLandmark(enum.IntEnum):
|
||||
"""The 9 3D box landmarks."""
|
||||
#
|
||||
# 3 + + + + + + + + 7
|
||||
# +\ +\ UP
|
||||
# + \ + \
|
||||
# + \ + \ |
|
||||
# + 4 + + + + + + + + 8 | y
|
||||
# + + + + |
|
||||
# + + + + |
|
||||
# + + (0) + + .------- x
|
||||
# + + + + \
|
||||
# 1 + + + + + + + + 5 + \
|
||||
# \ + \ + \ z
|
||||
# \ + \ + \
|
||||
# \+ \+
|
||||
# 2 + + + + + + + + 6
|
||||
CENTER = 0
|
||||
BACK_BOTTOM_LEFT = 1
|
||||
FRONT_BOTTOM_LEFT = 2
|
||||
BACK_TOP_LEFT = 3
|
||||
FRONT_TOP_LEFT = 4
|
||||
BACK_BOTTOM_RIGHT = 5
|
||||
FRONT_BOTTOM_RIGHT = 6
|
||||
BACK_TOP_RIGHT = 7
|
||||
FRONT_TOP_RIGHT = 8
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
|
||||
BOX_CONNECTIONS = frozenset([
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
|
||||
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
|
||||
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
|
||||
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
|
||||
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
|
||||
(BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
|
||||
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
|
||||
(BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
(BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
])
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class ObjectronModel(object):
|
||||
model_path: str
|
||||
label_name: str
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class ShoeModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_sneakers.tflite')
|
||||
label_name: str = 'Footwear'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class ChairModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_chair.tflite')
|
||||
label_name: str = 'Chair'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class CameraModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_camera.tflite')
|
||||
label_name: str = 'Camera'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class CupModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_cup.tflite')
|
||||
label_name: str = 'Coffee cup, Mug'
|
||||
|
||||
_MODEL_DICT = {
|
||||
'Shoe': ShoeModel(),
|
||||
'Chair': ChairModel(),
|
||||
'Cup': CupModel(),
|
||||
'Camera': CameraModel()
|
||||
}
|
||||
|
||||
|
||||
def _download_oss_objectron_models(objectron_model: str):
|
||||
"""Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
|
||||
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
|
||||
)
|
||||
download_utils.download_oss_model(objectron_model)
|
||||
|
||||
|
||||
def get_model_by_name(name: str) -> ObjectronModel:
|
||||
if name not in _MODEL_DICT:
|
||||
raise ValueError(f'{name} is not a valid model name for Objectron.')
|
||||
_download_oss_objectron_models(_MODEL_DICT[name].model_path)
|
||||
return _MODEL_DICT[name]
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class ObjectronOutputs(object):
|
||||
landmarks_2d: landmark_pb2.NormalizedLandmarkList
|
||||
landmarks_3d: landmark_pb2.LandmarkList
|
||||
rotation: np.ndarray
|
||||
translation: np.ndarray
|
||||
scale: np.ndarray
|
||||
|
||||
|
||||
class Objectron(SolutionBase):
|
||||
"""MediaPipe Objectron.
|
||||
|
||||
MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
|
||||
and 2D rectangular bounding box of each detected object.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode: bool = False,
|
||||
max_num_objects: int = 5,
|
||||
min_detection_confidence: float = 0.5,
|
||||
min_tracking_confidence: float = 0.99,
|
||||
model_name: str = 'Shoe',
|
||||
focal_length: Tuple[float, float] = (1.0, 1.0),
|
||||
principal_point: Tuple[float, float] = (0.0, 0.0),
|
||||
image_size: Optional[Tuple[int, int]] = None,
|
||||
):
|
||||
"""Initializes a MediaPipe Objectron class.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream.
|
||||
max_num_objects: Maximum number of objects to detect.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
|
||||
detection to be considered successful.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
box landmarks to be considered tracked successfully.
|
||||
model_name: Name of model to use for predicting box landmarks, currently
|
||||
support {'Shoe', 'Chair', 'Cup', 'Camera'}.
|
||||
focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
|
||||
space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
|
||||
should provide image_size = (image_width, image_height) to enable
|
||||
conversions inside the API.
|
||||
principal_point: Camera principal point (px, py), by default is defined in
|
||||
NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
|
||||
users should provide image_size = (image_width, image_height) to enable
|
||||
conversions inside the API.
|
||||
image_size (Optional): size (image_width, image_height) of the input image
|
||||
, ONLY needed when use focal_length and principal_point in pixel space.
|
||||
|
||||
Raises:
|
||||
ConnectionError: If the objectron open source model can't be downloaded
|
||||
from the MediaPipe Github repo.
|
||||
"""
|
||||
# Get Camera parameters.
|
||||
fx, fy = focal_length
|
||||
px, py = principal_point
|
||||
if image_size is not None:
|
||||
half_width = image_size[0] / 2.0
|
||||
half_height = image_size[1] / 2.0
|
||||
fx = fx / half_width
|
||||
fy = fy / half_height
|
||||
px = - (px - half_width) / half_width
|
||||
py = - (py - half_height) / half_height
|
||||
|
||||
# Create and init model.
|
||||
model = get_model_by_name(model_name)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'box_landmark_model_path': model.model_path,
|
||||
'allowed_labels': model.label_name,
|
||||
'max_num_objects': max_num_objects,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
('objectdetectionoidv4subgraph'
|
||||
'__TensorsToDetectionsCalculator.min_score_thresh'):
|
||||
min_detection_confidence,
|
||||
('boxlandmarksubgraph__ThresholdingCalculator'
|
||||
'.threshold'):
|
||||
min_tracking_confidence,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_focal_x'): fx,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_focal_y'): fy,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_principal_point_x'): px,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_principal_point_y'): py,
|
||||
},
|
||||
outputs=['detected_objects'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "detected_objects" field that contains a list
|
||||
of detected 3D bounding boxes. Each detected box is represented as an
|
||||
"ObjectronOutputs" instance.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.detected_objects:
|
||||
results.detected_objects = self._convert_format(results.detected_objects)
|
||||
else:
|
||||
results.detected_objects = None
|
||||
return results
|
||||
|
||||
def _convert_format(
|
||||
self,
|
||||
inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
|
||||
new_outputs = list()
|
||||
for annotation in inputs.annotations:
|
||||
# Get 3d object pose.
|
||||
rotation = np.reshape(np.array(annotation.rotation), (3, 3))
|
||||
translation = np.array(annotation.translation)
|
||||
scale = np.array(annotation.scale)
|
||||
# Get 2d/3d landmakrs.
|
||||
landmarks_2d = landmark_pb2.NormalizedLandmarkList()
|
||||
landmarks_3d = landmark_pb2.LandmarkList()
|
||||
for keypoint in annotation.keypoints:
|
||||
point_2d = keypoint.point_2d
|
||||
landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
|
||||
point_3d = keypoint.point_3d
|
||||
landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
|
||||
|
||||
# Add to objectron outputs.
|
||||
new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
|
||||
rotation, translation, scale=scale))
|
||||
return new_outputs
|
@@ -0,0 +1,81 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.objectron."""
|
||||
|
||||
import os
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
from mediapipe.python.solutions import objectron as mp_objectron
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 30 # pixels
|
||||
EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
|
||||
[383, 505], [80, 478], [408, 345],
|
||||
[130, 347], [384, 355], [72, 353]],
|
||||
[[241, 206], [411, 279], [131, 280],
|
||||
[392, 249], [78, 252], [412, 155],
|
||||
[140, 178], [396, 105], [89, 137]]]
|
||||
|
||||
|
||||
class ObjectronTest(parameterized.TestCase):
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_objectron.Objectron() as objectron:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_objectron.Objectron() as objectron:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = objectron.process(image)
|
||||
self.assertIsNone(results.detected_objects)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_multi_objects(self, static_image_mode, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
with mp_objectron.Objectron(
|
||||
static_image_mode=static_image_mode,
|
||||
max_num_objects=2,
|
||||
min_detection_confidence=0.5) as objectron:
|
||||
for _ in range(num_frames):
|
||||
results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
multi_box_coordinates = []
|
||||
for detected_object in results.detected_objects:
|
||||
landmarks = detected_object.landmarks_2d
|
||||
self.assertLen(landmarks.landmark, 9)
|
||||
x = [landmark.x for landmark in landmarks.landmark]
|
||||
y = [landmark.y for landmark in landmarks.landmark]
|
||||
box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
|
||||
multi_box_coordinates.append(box_coordinates)
|
||||
self.assertLen(multi_box_coordinates, 2)
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_box_coordinates) -
|
||||
np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,216 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Pose."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
from mediapipe.calculators.util import visibility_smoothing_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
|
||||
|
||||
class PoseLandmark(enum.IntEnum):
|
||||
"""The 33 pose landmarks."""
|
||||
NOSE = 0
|
||||
LEFT_EYE_INNER = 1
|
||||
LEFT_EYE = 2
|
||||
LEFT_EYE_OUTER = 3
|
||||
RIGHT_EYE_INNER = 4
|
||||
RIGHT_EYE = 5
|
||||
RIGHT_EYE_OUTER = 6
|
||||
LEFT_EAR = 7
|
||||
RIGHT_EAR = 8
|
||||
MOUTH_LEFT = 9
|
||||
MOUTH_RIGHT = 10
|
||||
LEFT_SHOULDER = 11
|
||||
RIGHT_SHOULDER = 12
|
||||
LEFT_ELBOW = 13
|
||||
RIGHT_ELBOW = 14
|
||||
LEFT_WRIST = 15
|
||||
RIGHT_WRIST = 16
|
||||
LEFT_PINKY = 17
|
||||
RIGHT_PINKY = 18
|
||||
LEFT_INDEX = 19
|
||||
RIGHT_INDEX = 20
|
||||
LEFT_THUMB = 21
|
||||
RIGHT_THUMB = 22
|
||||
LEFT_HIP = 23
|
||||
RIGHT_HIP = 24
|
||||
LEFT_KNEE = 25
|
||||
RIGHT_KNEE = 26
|
||||
LEFT_ANKLE = 27
|
||||
RIGHT_ANKLE = 28
|
||||
LEFT_HEEL = 29
|
||||
RIGHT_HEEL = 30
|
||||
LEFT_FOOT_INDEX = 31
|
||||
RIGHT_FOOT_INDEX = 32
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
|
||||
POSE_CONNECTIONS = frozenset([
|
||||
(PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
|
||||
(PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
|
||||
(PoseLandmark.RIGHT_EYE, PoseLandmark.RIGHT_EYE_OUTER),
|
||||
(PoseLandmark.RIGHT_EYE_OUTER, PoseLandmark.RIGHT_EAR),
|
||||
(PoseLandmark.NOSE, PoseLandmark.LEFT_EYE_INNER),
|
||||
(PoseLandmark.LEFT_EYE_INNER, PoseLandmark.LEFT_EYE),
|
||||
(PoseLandmark.LEFT_EYE, PoseLandmark.LEFT_EYE_OUTER),
|
||||
(PoseLandmark.LEFT_EYE_OUTER, PoseLandmark.LEFT_EAR),
|
||||
(PoseLandmark.MOUTH_RIGHT, PoseLandmark.MOUTH_LEFT),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.LEFT_SHOULDER),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_ELBOW),
|
||||
(PoseLandmark.RIGHT_ELBOW, PoseLandmark.RIGHT_WRIST),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_PINKY),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_INDEX),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_THUMB),
|
||||
(PoseLandmark.RIGHT_PINKY, PoseLandmark.RIGHT_INDEX),
|
||||
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_ELBOW),
|
||||
(PoseLandmark.LEFT_ELBOW, PoseLandmark.LEFT_WRIST),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_PINKY),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_INDEX),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_THUMB),
|
||||
(PoseLandmark.LEFT_PINKY, PoseLandmark.LEFT_INDEX),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_HIP),
|
||||
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_HIP),
|
||||
(PoseLandmark.RIGHT_HIP, PoseLandmark.LEFT_HIP),
|
||||
(PoseLandmark.RIGHT_HIP, PoseLandmark.RIGHT_KNEE),
|
||||
(PoseLandmark.LEFT_HIP, PoseLandmark.LEFT_KNEE),
|
||||
(PoseLandmark.RIGHT_KNEE, PoseLandmark.RIGHT_ANKLE),
|
||||
(PoseLandmark.LEFT_KNEE, PoseLandmark.LEFT_ANKLE),
|
||||
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_HEEL),
|
||||
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_HEEL),
|
||||
(PoseLandmark.RIGHT_HEEL, PoseLandmark.RIGHT_FOOT_INDEX),
|
||||
(PoseLandmark.LEFT_HEEL, PoseLandmark.LEFT_FOOT_INDEX),
|
||||
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_FOOT_INDEX),
|
||||
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_FOOT_INDEX),
|
||||
])
|
||||
|
||||
|
||||
def _download_oss_pose_landmark_model(model_complexity):
|
||||
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
|
||||
|
||||
if model_complexity == 0:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
|
||||
elif model_complexity == 2:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
|
||||
|
||||
|
||||
class Pose(SolutionBase):
|
||||
"""MediaPipe Pose.
|
||||
|
||||
MediaPipe Pose processes an RGB image and returns pose landmarks on the most
|
||||
prominent person detected.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
model_complexity=1,
|
||||
smooth_landmarks=True,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Pose object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/pose#static_image_mode.
|
||||
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
|
||||
details in https://solutions.mediapipe.dev/pose#model_complexity.
|
||||
smooth_landmarks: Whether to filter landmarks across different input
|
||||
images to reduce jitter. See details in
|
||||
https://solutions.mediapipe.dev/pose#smooth_landmarks.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/pose#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
pose landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/pose#min_tracking_confidence.
|
||||
"""
|
||||
_download_oss_pose_landmark_model(model_complexity)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_complexity': model_complexity,
|
||||
'smooth_landmarks': smooth_landmarks and not static_image_mode,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['pose_landmarks', 'pose_world_landmarks'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the pose landmarks on the most prominent person detected.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple that has two fields describing the landmarks on the most
|
||||
prominate person detected:
|
||||
1) "pose_landmarks" field that contains the pose landmarks.
|
||||
2) "pose_world_landmarks" field that contains the pose landmarks in
|
||||
real-world 3D coordinates that are in meters with the origin at the
|
||||
center between hips.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.pose_landmarks:
|
||||
for landmark in results.pose_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
if results.pose_world_landmarks:
|
||||
for landmark in results.pose_world_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
return results
|
@@ -0,0 +1,197 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.pose."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import pose as mp_pose
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 15 # pixels
|
||||
EXPECTED_POSE_LANDMARKS = np.array([[460, 283], [467, 273], [471, 273],
|
||||
[474, 273], [465, 273], [465, 273],
|
||||
[466, 273], [491, 277], [480, 277],
|
||||
[470, 294], [465, 294], [545, 319],
|
||||
[453, 329], [622, 323], [375, 316],
|
||||
[696, 316], [299, 307], [719, 316],
|
||||
[278, 306], [721, 311], [274, 304],
|
||||
[713, 313], [283, 306], [520, 476],
|
||||
[467, 471], [612, 550], [358, 490],
|
||||
[701, 613], [349, 611], [709, 624],
|
||||
[363, 630], [730, 633], [303, 628]])
|
||||
WORLD_DIFF_THRESHOLD = 0.2 # meters
|
||||
EXPECTED_POSE_WORLD_LANDMARKS = np.array([
|
||||
[-0.11, -0.59, -0.15], [-0.09, -0.64, -0.16], [-0.09, -0.64, -0.16],
|
||||
[-0.09, -0.64, -0.16], [-0.11, -0.64, -0.14], [-0.11, -0.64, -0.14],
|
||||
[-0.11, -0.64, -0.14], [0.01, -0.65, -0.15], [-0.06, -0.64, -0.05],
|
||||
[-0.07, -0.57, -0.15], [-0.09, -0.57, -0.12], [0.18, -0.49, -0.09],
|
||||
[-0.14, -0.5, -0.03], [0.41, -0.48, -0.11], [-0.42, -0.5, -0.02],
|
||||
[0.64, -0.49, -0.17], [-0.63, -0.51, -0.13], [0.7, -0.5, -0.19],
|
||||
[-0.71, -0.53, -0.15], [0.72, -0.51, -0.23], [-0.69, -0.54, -0.19],
|
||||
[0.66, -0.49, -0.19], [-0.64, -0.52, -0.15], [0.09, 0., -0.04],
|
||||
[-0.09, -0., 0.03], [0.41, 0.23, -0.09], [-0.43, 0.1, -0.11],
|
||||
[0.69, 0.49, -0.04], [-0.48, 0.47, -0.02], [0.72, 0.52, -0.04],
|
||||
[-0.48, 0.51, -0.02], [0.8, 0.5, -0.14], [-0.59, 0.52, -0.11],
|
||||
])
|
||||
|
||||
|
||||
class PoseTest(parameterized.TestCase):
|
||||
|
||||
def _landmarks_list_to_array(self, landmark_list, image_shape):
|
||||
rows, cols, _ = image_shape
|
||||
return np.asarray([(lmk.x * cols, lmk.y * rows, lmk.z * cols)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _world_landmarks_list_to_array(self, landmark_list):
|
||||
return np.asarray([(lmk.x, lmk.y, lmk.z)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _assert_diff_less(self, array1, array2, threshold):
|
||||
npt.assert_array_less(np.abs(array1 - array2), threshold)
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
|
||||
mp_pose.POSE_CONNECTIONS)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_pose.Pose() as pose:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_pose.Pose() as pose:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = pose.process(image)
|
||||
self.assertIsNone(results.pose_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_lite', True, 0, 3),
|
||||
('static_full', True, 1, 3),
|
||||
('static_heavy', True, 2, 3),
|
||||
('video_lite', False, 0, 3),
|
||||
('video_full', False, 1, 3),
|
||||
('video_heavy', False, 2, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_pose.Pose(static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity) as pose:
|
||||
for idx in range(num_frames):
|
||||
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
# TODO: Add rendering of world 3D when supported.
|
||||
self._annotate(image.copy(), results, idx)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.pose_landmarks,
|
||||
image.shape)[:, :2],
|
||||
EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._world_landmarks_list_to_array(results.pose_world_landmarks),
|
||||
EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('full', 1, 'pose_squats.full.npz'))
|
||||
def test_on_video(self, model_complexity, expected_name):
|
||||
"""Tests pose models on a video."""
|
||||
# If set to `True` will dump actual predictions to .npz and JSON files.
|
||||
dump_predictions = False
|
||||
# Set threshold for comparing actual and expected predictions in pixels.
|
||||
diff_threshold = 15
|
||||
world_diff_threshold = 0.1
|
||||
|
||||
video_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/pose_squats.mp4')
|
||||
expected_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/{}'.format(expected_name))
|
||||
|
||||
# Predict pose landmarks for each frame.
|
||||
video_cap = cv2.VideoCapture(video_path)
|
||||
actual_per_frame = []
|
||||
actual_world_per_frame = []
|
||||
frame_idx = 0
|
||||
with mp_pose.Pose(static_image_mode=False,
|
||||
model_complexity=model_complexity) as pose:
|
||||
while True:
|
||||
# Get next frame of the video.
|
||||
success, input_frame = video_cap.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
# Run pose tracker.
|
||||
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
|
||||
result = pose.process(image=input_frame)
|
||||
pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
|
||||
input_frame.shape)
|
||||
pose_world_landmarks = self._world_landmarks_list_to_array(
|
||||
result.pose_world_landmarks)
|
||||
|
||||
actual_per_frame.append(pose_landmarks)
|
||||
actual_world_per_frame.append(pose_world_landmarks)
|
||||
|
||||
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
|
||||
self._annotate(input_frame, result, frame_idx)
|
||||
frame_idx += 1
|
||||
actual = np.array(actual_per_frame)
|
||||
actual_world = np.array(actual_world_per_frame)
|
||||
|
||||
if dump_predictions:
|
||||
# Dump .npz
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
np.savez(tmp_file, predictions=actual, predictions_world=actual_world)
|
||||
print('Predictions saved as .npz to {}'.format(tmp_file.name))
|
||||
|
||||
# Dump JSON
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
with open(tmp_file.name, 'w') as fl:
|
||||
dump_data = {
|
||||
'predictions': np.around(actual, 3).tolist(),
|
||||
'predictions_world': np.around(actual_world, 3).tolist()
|
||||
}
|
||||
fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
|
||||
print('Predictions saved as JSON to {}'.format(tmp_file.name))
|
||||
|
||||
# Validate actual vs. expected landmarks.
|
||||
expected = np.load(expected_path)['predictions']
|
||||
assert actual.shape == expected.shape, (
|
||||
'Unexpected shape of predictions: {} instead of {}'.format(
|
||||
actual.shape, expected.shape))
|
||||
self._assert_diff_less(
|
||||
actual[..., :2], expected[..., :2], threshold=diff_threshold)
|
||||
|
||||
# Validate actual vs. expected world landmarks.
|
||||
expected_world = np.load(expected_path)['predictions_world']
|
||||
assert actual_world.shape == expected_world.shape, (
|
||||
'Unexpected shape of world predictions: {} instead of {}'.format(
|
||||
actual_world.shape, expected_world.shape))
|
||||
self._assert_diff_less(
|
||||
actual_world, expected_world, threshold=world_diff_threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,76 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Selfie Segmentation."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
|
||||
|
||||
|
||||
class SelfieSegmentation(SolutionBase):
|
||||
"""MediaPipe Selfie Segmentation.
|
||||
|
||||
MediaPipe Selfie Segmentation processes an RGB image and returns a
|
||||
segmentation mask.
|
||||
|
||||
Please refer to
|
||||
https://solutions.mediapipe.dev/selfie_segmentation#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self, model_selection=0):
|
||||
"""Initializes a MediaPipe Selfie Segmentation object.
|
||||
|
||||
Args:
|
||||
model_selection: 0 or 1. 0 to select a general-purpose model, and 1 to
|
||||
select a model more optimized for landscape images. See details in
|
||||
https://solutions.mediapipe.dev/selfie_segmentation#model_selection.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_selection': model_selection,
|
||||
},
|
||||
outputs=['segmentation_mask'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns a segmentation mask.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "segmentation_mask" field that contains a float
|
||||
type 2d np array representing the mask.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,68 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.selfie_segmentation."""
|
||||
|
||||
import os
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import selfie_segmentation as mp_selfie_segmentation
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
|
||||
|
||||
class SelfieSegmentationTest(parameterized.TestCase):
|
||||
|
||||
def _draw(self, frame: np.ndarray, mask: np.ndarray):
|
||||
frame = np.minimum(frame, np.stack((mask,) * 3, axis=-1))
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + '.png')
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
selfie_segmentation.process(
|
||||
np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = selfie_segmentation.process(image)
|
||||
normalized_segmentation_mask = (results.segmentation_mask *
|
||||
255).astype(int)
|
||||
self.assertLess(np.amax(normalized_segmentation_mask), 1)
|
||||
|
||||
@parameterized.named_parameters(('general', 0), ('landscape', 1))
|
||||
def test_segmentation(self, model_selection):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_selfie_segmentation.SelfieSegmentation(
|
||||
model_selection=model_selection) as selfie_segmentation:
|
||||
results = selfie_segmentation.process(
|
||||
cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
normalized_segmentation_mask = (results.segmentation_mask *
|
||||
255).astype(int)
|
||||
self._draw(image.copy(), normalized_segmentation_mask)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
Reference in New Issue
Block a user