Initial commit

This commit is contained in:
Untriex Programming
2021-08-31 22:06:02 +02:00
commit 9b6723e11e
5142 changed files with 1455625 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Solutions Python API."""
import mediapipe.python.solutions.drawing_styles
import mediapipe.python.solutions.drawing_utils
import mediapipe.python.solutions.face_detection
import mediapipe.python.solutions.face_mesh
import mediapipe.python.solutions.hands
import mediapipe.python.solutions.holistic
import mediapipe.python.solutions.objectron
import mediapipe.python.solutions.pose
import mediapipe.python.solutions.selfie_segmentation

View File

@@ -0,0 +1,37 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Downloading utils."""
import os
import shutil
import urllib.request
_OSS_URL_PREFIX = 'https://github.com/google/mediapipe/raw/master/'
def download_oss_model(model_path: str):
"""Downloads the oss model from the MediaPipe GitHub repo if it doesn't exist in the package."""
mp_root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-4])
model_abspath = os.path.join(mp_root_path, model_path)
if os.path.exists(model_abspath):
return
model_url = _OSS_URL_PREFIX + model_path
print('Downloading model to ' + model_abspath)
with urllib.request.urlopen(model_url) as response, open(model_abspath,
'wb') as out_file:
if response.code != 200:
raise ConnectionError('Cannot download ' + model_path +
' from the MediaPipe Github repo.')
shutil.copyfileobj(response, out_file)

View File

@@ -0,0 +1,146 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless requi_RED by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe solution drawing styles."""
from typing import Mapping, Tuple
from mediapipe.python.solutions.drawing_utils import DrawingSpec
from mediapipe.python.solutions.hands import HandLandmark
_RADIUS = 5
_RED = (54, 67, 244)
_GREEN = (118, 230, 0)
_BLUE = (192, 101, 21)
_YELLOW = (0, 204, 255)
_GRAY = (174, 164, 144)
_PURPLE = (128, 64, 128)
_PEACH = (180, 229, 255)
# Hands
_THICKNESS_WRIST_MCP = 3
_THICKNESS_FINGER = 2
_THICKNESS_DOT = -1
# Hand landmarks
_PALM_LANMARKS = (HandLandmark.WRIST, HandLandmark.THUMB_CMC,
HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP)
_THUMP_LANDMARKS = (HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP,
HandLandmark.THUMB_TIP)
_INDEX_FINGER_LANDMARKS = (HandLandmark.INDEX_FINGER_PIP,
HandLandmark.INDEX_FINGER_DIP,
HandLandmark.INDEX_FINGER_TIP)
_MIDDLE_FINGER_LANDMARKS = (HandLandmark.MIDDLE_FINGER_PIP,
HandLandmark.MIDDLE_FINGER_DIP,
HandLandmark.MIDDLE_FINGER_TIP)
_RING_FINGER_LANDMARKS = (HandLandmark.RING_FINGER_PIP,
HandLandmark.RING_FINGER_DIP,
HandLandmark.RING_FINGER_TIP)
_PINKY_FINGER_LANDMARKS = (HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP,
HandLandmark.PINKY_TIP)
_HAND_LANDMARK_STYLE = {
_PALM_LANMARKS:
DrawingSpec(
color=_RED, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_THUMP_LANDMARKS:
DrawingSpec(
color=_PEACH, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_INDEX_FINGER_LANDMARKS:
DrawingSpec(
color=_PURPLE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_MIDDLE_FINGER_LANDMARKS:
DrawingSpec(
color=_YELLOW, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_RING_FINGER_LANDMARKS:
DrawingSpec(
color=_GREEN, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_PINKY_FINGER_LANDMARKS:
DrawingSpec(
color=_BLUE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
}
# Hand connections
_PALM_CONNECTIONS = ((HandLandmark.WRIST, HandLandmark.THUMB_CMC),
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
(HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.RING_FINGER_MCP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
(HandLandmark.INDEX_FINGER_MCP,
HandLandmark.MIDDLE_FINGER_MCP), (HandLandmark.WRIST,
HandLandmark.PINKY_MCP))
_THUMB_CONNECTIONS = ((HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP))
_INDEX_FINGER_CONNECTIONS = ((HandLandmark.INDEX_FINGER_MCP,
HandLandmark.INDEX_FINGER_PIP),
(HandLandmark.INDEX_FINGER_PIP,
HandLandmark.INDEX_FINGER_DIP),
(HandLandmark.INDEX_FINGER_DIP,
HandLandmark.INDEX_FINGER_TIP))
_MIDDLE_FINGER_CONNECTIONS = ((HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.MIDDLE_FINGER_PIP),
(HandLandmark.MIDDLE_FINGER_PIP,
HandLandmark.MIDDLE_FINGER_DIP),
(HandLandmark.MIDDLE_FINGER_DIP,
HandLandmark.MIDDLE_FINGER_TIP))
_RING_FINGER_CONNECTIONS = ((HandLandmark.RING_FINGER_MCP,
HandLandmark.RING_FINGER_PIP),
(HandLandmark.RING_FINGER_PIP,
HandLandmark.RING_FINGER_DIP),
(HandLandmark.RING_FINGER_DIP,
HandLandmark.RING_FINGER_TIP))
_PINKY_FINGER_CONNECTIONS = ((HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP))
_HAND_CONNECTION_STYLE = {
_PALM_CONNECTIONS:
DrawingSpec(color=_GRAY, thickness=_THICKNESS_WRIST_MCP),
_THUMB_CONNECTIONS:
DrawingSpec(color=_PEACH, thickness=_THICKNESS_FINGER),
_INDEX_FINGER_CONNECTIONS:
DrawingSpec(color=_PURPLE, thickness=_THICKNESS_FINGER),
_MIDDLE_FINGER_CONNECTIONS:
DrawingSpec(color=_YELLOW, thickness=_THICKNESS_FINGER),
_RING_FINGER_CONNECTIONS:
DrawingSpec(color=_GREEN, thickness=_THICKNESS_FINGER),
_PINKY_FINGER_CONNECTIONS:
DrawingSpec(color=_BLUE, thickness=_THICKNESS_FINGER)
}
def get_default_hand_landmark_style() -> Mapping[int, DrawingSpec]:
"""Returns the default hand landmark drawing style.
Returns:
A mapping from each hand landmark to the default drawing spec.
"""
hand_landmark_style = {}
for k, v in _HAND_LANDMARK_STYLE.items():
for landmark in k:
hand_landmark_style[landmark] = v
return hand_landmark_style
def get_default_hand_connection_style(
) -> Mapping[Tuple[int, int], DrawingSpec]:
"""Returns the default hand connection drawing style.
Returns:
A mapping from each hand connection to the default drawing spec.
"""
hand_connection_style = {}
for k, v in _HAND_CONNECTION_STYLE.items():
for connection in k:
hand_connection_style[connection] = v
return hand_connection_style

View File

@@ -0,0 +1,307 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe solution drawing utils."""
import math
from typing import List, Mapping, Optional, Tuple, Union
import cv2
import dataclasses
import matplotlib.pyplot as plt
import numpy as np
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import location_data_pb2
from mediapipe.framework.formats import landmark_pb2
PRESENCE_THRESHOLD = 0.5
RGB_CHANNELS = 3
BLACK_COLOR = (0, 0, 0)
RED_COLOR = (0, 0, 255)
GREEN_COLOR = (0, 128, 0)
BLUE_COLOR = (255, 0, 0)
VISIBILITY_THRESHOLD = 0.5
@dataclasses.dataclass
class DrawingSpec:
# Color for drawing the annotation. Default to the green color.
color: Tuple[int, int, int] = (0, 255, 0)
# Thickness for drawing the annotation. Default to 2 pixels.
thickness: int = 2
# Circle radius. Default to 2 pixels.
circle_radius: int = 2
def _normalized_to_pixel_coordinates(
normalized_x: float, normalized_y: float, image_width: int,
image_height: int) -> Union[None, Tuple[int, int]]:
"""Converts normalized value pair to pixel coordinates."""
# Checks if the float value is between 0 and 1.
def is_valid_normalized_value(value: float) -> bool:
return (value > 0 or math.isclose(0, value)) and (value < 1 or
math.isclose(1, value))
if not (is_valid_normalized_value(normalized_x) and
is_valid_normalized_value(normalized_y)):
# TODO: Draw coordinates even if it's outside of the image bounds.
return None
x_px = min(math.floor(normalized_x * image_width), image_width - 1)
y_px = min(math.floor(normalized_y * image_height), image_height - 1)
return x_px, y_px
def draw_detection(
image: np.ndarray,
detection: detection_pb2.Detection,
keypoint_drawing_spec: DrawingSpec = DrawingSpec(color=RED_COLOR),
bbox_drawing_spec: DrawingSpec = DrawingSpec()):
"""Draws the detction bounding box and keypoints on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
detection: A detection proto message to be annotated on the image.
keypoint_drawing_spec: A DrawingSpec object that specifies the keypoints'
drawing settings such as color, line thickness, and circle radius.
bbox_drawing_spec: A DrawingSpec object that specifies the bounding box's
drawing settings such as color and line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
b) If the location data is not relative data.
"""
if not detection.location_data:
return
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
location = detection.location_data
if location.format != location_data_pb2.LocationData.RELATIVE_BOUNDING_BOX:
raise ValueError(
'LocationData must be relative for this drawing funtion to work.')
# Draws keypoints.
for keypoint in location.relative_keypoints:
keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
image_cols, image_rows)
cv2.circle(image, keypoint_px, keypoint_drawing_spec.circle_radius,
keypoint_drawing_spec.color, keypoint_drawing_spec.thickness)
# Draws bounding box if exists.
if not location.HasField('relative_bounding_box'):
return
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + +relative_bounding_box.height, image_cols,
image_rows)
cv2.rectangle(image, rect_start_point, rect_end_point,
bbox_drawing_spec.color, bbox_drawing_spec.thickness)
def draw_landmarks(
image: np.ndarray,
landmark_list: landmark_pb2.NormalizedLandmarkList,
connections: Optional[List[Tuple[int, int]]] = None,
landmark_drawing_spec: Union[DrawingSpec,
Mapping[int, DrawingSpec]] = DrawingSpec(
color=RED_COLOR),
connection_drawing_spec: Union[DrawingSpec,
Mapping[Tuple[int, int],
DrawingSpec]] = DrawingSpec()):
"""Draws the landmarks and the connections on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
landmark_list: A normalized landmark list proto message to be annotated on
the image.
connections: A list of landmark index tuples that specifies how landmarks to
be connected in the drawing.
landmark_drawing_spec: Either a DrawingSpec object or a mapping from
hand landmarks to the DrawingSpecs that specifies the landmarks' drawing
settings such as color, line thickness, and circle radius.
connection_drawing_spec: Either a DrawingSpec object or a mapping from
hand connections to the DrawingSpecs that specifies the
connections' drawing settings such as color and line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
b) If any connetions contain invalid landmark index.
"""
if not landmark_list:
return
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
idx_to_coordinates = {}
for idx, landmark in enumerate(landmark_list.landmark):
if ((landmark.HasField('visibility') and
landmark.visibility < VISIBILITY_THRESHOLD) or
(landmark.HasField('presence') and
landmark.presence < PRESENCE_THRESHOLD)):
continue
landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y,
image_cols, image_rows)
if landmark_px:
idx_to_coordinates[idx] = landmark_px
if connections:
num_landmarks = len(landmark_list.landmark)
# Draws the connections if the start and end landmarks are both visible.
for connection in connections:
start_idx = connection[0]
end_idx = connection[1]
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
raise ValueError(f'Landmark index is out of range. Invalid connection '
f'from landmark #{start_idx} to landmark #{end_idx}.')
if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
drawing_spec = connection_drawing_spec[connection] if isinstance(
connection_drawing_spec, Mapping) else connection_drawing_spec
cv2.line(image, idx_to_coordinates[start_idx],
idx_to_coordinates[end_idx], drawing_spec.color,
drawing_spec.thickness)
# Draws landmark points after finishing the connection lines, which is
# aesthetically better.
for idx, landmark_px in idx_to_coordinates.items():
drawing_spec = landmark_drawing_spec[idx] if isinstance(
landmark_drawing_spec, Mapping) else landmark_drawing_spec
cv2.circle(image, landmark_px, drawing_spec.circle_radius,
drawing_spec.color, drawing_spec.thickness)
def draw_axis(
image: np.ndarray,
rotation: np.ndarray,
translation: np.ndarray,
focal_length: Tuple[float, float] = (1.0, 1.0),
principal_point: Tuple[float, float] = (0.0, 0.0),
axis_length: float = 0.1,
axis_drawing_spec: DrawingSpec = DrawingSpec()):
"""Draws the 3D axis on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
rotation: Rotation matrix from object to camera coordinate frame.
translation: Translation vector from object to camera coordinate frame.
focal_length: camera focal length along x and y directions.
principal_point: camera principal point in x and y.
axis_length: length of the axis in the drawing.
axis_drawing_spec: A DrawingSpec object that specifies the xyz axis
drawing settings such as line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
"""
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
# Create axis points in camera coordinate frame.
axis_world = np.float32([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
axis_cam = np.matmul(rotation, axis_length*axis_world.T).T + translation
x = axis_cam[..., 0]
y = axis_cam[..., 1]
z = axis_cam[..., 2]
# Project 3D points to NDC space.
fx, fy = focal_length
px, py = principal_point
x_ndc = np.clip(-fx * x / (z + 1e-5) + px, -1., 1.)
y_ndc = np.clip(-fy * y / (z + 1e-5) + py, -1., 1.)
# Convert from NDC space to image space.
x_im = np.int32((1 + x_ndc) * 0.5 * image_cols)
y_im = np.int32((1 - y_ndc) * 0.5 * image_rows)
# Draw xyz axis on the image.
origin = (x_im[0], y_im[0])
x_axis = (x_im[1], y_im[1])
y_axis = (x_im[2], y_im[2])
z_axis = (x_im[3], y_im[3])
cv2.arrowedLine(image, origin, x_axis, RED_COLOR,
axis_drawing_spec.thickness)
cv2.arrowedLine(image, origin, y_axis, GREEN_COLOR,
axis_drawing_spec.thickness)
cv2.arrowedLine(image, origin, z_axis, BLUE_COLOR,
axis_drawing_spec.thickness)
def _normalize_color(color):
return tuple(v / 255. for v in color)
def plot_landmarks(landmark_list: landmark_pb2.NormalizedLandmarkList,
connections: Optional[List[Tuple[int, int]]] = None,
landmark_drawing_spec: DrawingSpec = DrawingSpec(
color=RED_COLOR, thickness=5),
connection_drawing_spec: DrawingSpec = DrawingSpec(
color=BLACK_COLOR, thickness=5),
elevation: int = 10,
azimuth: int = 10):
"""Plot the landmarks and the connections in matplotlib 3d.
Args:
landmark_list: A normalized landmark list proto message to be plotted.
connections: A list of landmark index tuples that specifies how landmarks to
be connected.
landmark_drawing_spec: A DrawingSpec object that specifies the landmarks'
drawing settings such as color and line thickness.
connection_drawing_spec: A DrawingSpec object that specifies the
connections' drawing settings such as color and line thickness.
elevation: The elevation from which to view the plot.
azimuth: the azimuth angle to rotate the plot.
Raises:
ValueError: If any connetions contain invalid landmark index.
"""
if not landmark_list:
return
plt.figure(figsize=(10, 10))
ax = plt.axes(projection='3d')
ax.view_init(elev=elevation, azim=azimuth)
plotted_landmarks = {}
for idx, landmark in enumerate(landmark_list.landmark):
if ((landmark.HasField('visibility') and
landmark.visibility < VISIBILITY_THRESHOLD) or
(landmark.HasField('presence') and
landmark.presence < PRESENCE_THRESHOLD)):
continue
ax.scatter3D(
xs=[-landmark.z],
ys=[landmark.x],
zs=[-landmark.y],
color=_normalize_color(landmark_drawing_spec.color[::-1]),
linewidth=landmark_drawing_spec.thickness)
plotted_landmarks[idx] = (-landmark.z, landmark.x, -landmark.y)
if connections:
num_landmarks = len(landmark_list.landmark)
# Draws the connections if the start and end landmarks are both visible.
for connection in connections:
start_idx = connection[0]
end_idx = connection[1]
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
raise ValueError(f'Landmark index is out of range. Invalid connection '
f'from landmark #{start_idx} to landmark #{end_idx}.')
if start_idx in plotted_landmarks and end_idx in plotted_landmarks:
landmark_pair = [
plotted_landmarks[start_idx], plotted_landmarks[end_idx]
]
ax.plot3D(
xs=[landmark_pair[0][0], landmark_pair[1][0]],
ys=[landmark_pair[0][1], landmark_pair[1][1]],
zs=[landmark_pair[0][2], landmark_pair[1][2]],
color=_normalize_color(connection_drawing_spec.color[::-1]),
linewidth=connection_drawing_spec.thickness)
plt.show()

View File

@@ -0,0 +1,231 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.drawing_utils."""
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
from google.protobuf import text_format
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import landmark_pb2
from mediapipe.python.solutions import drawing_utils
DEFAULT_BBOX_DRAWING_SPEC = drawing_utils.DrawingSpec()
DEFAULT_CONNECTION_DRAWING_SPEC = drawing_utils.DrawingSpec()
DEFAULT_CIRCLE_DRAWING_SPEC = drawing_utils.DrawingSpec(color=(0, 0, 255))
DEFAULT_AXIS_DRAWING_SPEC = drawing_utils.DrawingSpec()
class DrawingUtilTest(parameterized.TestCase):
def test_invalid_input_image(self):
image = np.arange(18, dtype=np.uint8).reshape(3, 3, 2)
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
drawing_utils.draw_landmarks(image, landmark_pb2.NormalizedLandmarkList())
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
drawing_utils.draw_detection(image, detection_pb2.Detection())
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
rotation = np.eye(3, dtype=np.float32)
translation = np.array([0., 0., 1.])
drawing_utils.draw_axis(image, rotation, translation)
def test_invalid_connection(self):
landmark_list = text_format.Parse(
'landmark {x: 0.5 y: 0.5} landmark {x: 0.2 y: 0.2}',
landmark_pb2.NormalizedLandmarkList())
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with self.assertRaisesRegex(ValueError, 'Landmark index is out of range.'):
drawing_utils.draw_landmarks(image, landmark_list, [(0, 2)])
def test_unqualified_detection(self):
detection = text_format.Parse('location_data {format: GLOBAL}',
detection_pb2.Detection())
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with self.assertRaisesRegex(ValueError, 'LocationData must be relative'):
drawing_utils.draw_detection(image, detection)
def test_draw_keypoints_only(self):
detection = text_format.Parse(
'location_data {'
' format: RELATIVE_BOUNDING_BOX'
' relative_keypoints {x: 0 y: 1}'
' relative_keypoints {x: 1 y: 0}}', detection_pb2.Detection())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.circle(expected_result, (0, 99),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, (99, 0),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_detection(image, detection)
np.testing.assert_array_equal(image, expected_result)
def test_draw_bboxs_only(self):
detection = text_format.Parse(
'location_data {'
' format: RELATIVE_BOUNDING_BOX'
' relative_bounding_box {xmin: 0 ymin: 0 width: 1 height: 1}}',
detection_pb2.Detection())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.rectangle(expected_result, (0, 0), (99, 99),
DEFAULT_BBOX_DRAWING_SPEC.color,
DEFAULT_BBOX_DRAWING_SPEC.thickness)
drawing_utils.draw_detection(image, detection)
np.testing.assert_array_equal(image, expected_result)
@parameterized.named_parameters(
('landmark_list_has_only_one_element', 'landmark {x: 0.1 y: 0.1}'),
('second_landmark_is_invisible',
'landmark {x: 0.1 y: 0.1} landmark {x: 0.5 y: 0.5 visibility: 0.0}'))
def test_draw_single_landmark_point(self, landmark_list_text):
landmark_list = text_format.Parse(landmark_list_text,
landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.circle(expected_result, (10, 10),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(image, landmark_list)
np.testing.assert_array_equal(image, expected_result)
@parameterized.named_parameters(
('landmarks_have_x_and_y_only',
'landmark {x: 0.1 y: 0.5} landmark {x: 0.5 y: 0.1}'),
('landmark_zero_visibility_and_presence',
'landmark {x: 0.1 y: 0.5 presence: 0.5}'
'landmark {x: 0.5 y: 0.1 visibility: 0.5}'))
def test_draw_landmarks_and_connections(self, landmark_list_text):
landmark_list = text_format.Parse(landmark_list_text,
landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
start_point = (10, 50)
end_point = (50, 10)
cv2.line(expected_result, start_point, end_point,
DEFAULT_CONNECTION_DRAWING_SPEC.color,
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
cv2.circle(expected_result, start_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, end_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(
image=image, landmark_list=landmark_list, connections=[(0, 1)])
np.testing.assert_array_equal(image, expected_result)
def test_draw_axis(self):
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
origin = (50, 50)
x_axis = (75, 50)
y_axis = (50, 22)
z_axis = (50, 77)
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
r = np.sqrt(2.) / 2.
rotation = np.array([[1., 0., 0.], [0., r, -r], [0., r, r]])
translation = np.array([0, 0, -0.2])
drawing_utils.draw_axis(image, rotation, translation)
np.testing.assert_array_equal(image, expected_result)
def test_draw_axis_zero_translation(self):
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
origin = (50, 50)
x_axis = (0, 50)
y_axis = (50, 100)
z_axis = (50, 50)
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
rotation = np.eye(3, dtype=np.float32)
translation = np.zeros((3,), dtype=np.float32)
drawing_utils.draw_axis(image, rotation, translation)
np.testing.assert_array_equal(image, expected_result)
def test_min_and_max_coordinate_values(self):
landmark_list = text_format.Parse(
'landmark {x: 0.0 y: 1.0}'
'landmark {x: 1.0 y: 0.0}', landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
start_point = (0, 99)
end_point = (99, 0)
cv2.line(expected_result, start_point, end_point,
DEFAULT_CONNECTION_DRAWING_SPEC.color,
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
cv2.circle(expected_result, start_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, end_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(
image=image, landmark_list=landmark_list, connections=[(0, 1)])
np.testing.assert_array_equal(image, expected_result)
def test_drawing_spec(self):
landmark_list = text_format.Parse(
'landmark {x: 0.1 y: 0.1}'
'landmark {x: 0.8 y: 0.8}', landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
landmark_drawing_spec = drawing_utils.DrawingSpec(
color=(0, 0, 255), thickness=5)
connection_drawing_spec = drawing_utils.DrawingSpec(
color=(255, 0, 0), thickness=3)
expected_result = np.copy(image)
start_point = (10, 10)
end_point = (80, 80)
cv2.line(expected_result, start_point, end_point,
connection_drawing_spec.color, connection_drawing_spec.thickness)
cv2.circle(expected_result, start_point,
landmark_drawing_spec.circle_radius, landmark_drawing_spec.color,
landmark_drawing_spec.thickness)
cv2.circle(expected_result, end_point, landmark_drawing_spec.circle_radius,
landmark_drawing_spec.color, landmark_drawing_spec.thickness)
drawing_utils.draw_landmarks(
image=image,
landmark_list=landmark_list,
connections=[(0, 1)],
landmark_drawing_spec=landmark_drawing_spec,
connection_drawing_spec=connection_drawing_spec)
np.testing.assert_array_equal(image, expected_result)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,112 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Face Detection."""
import enum
from typing import NamedTuple, Union
import numpy as np
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import location_data_pb2
# pylint: disable=unused-import
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb'
FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb'
def get_key_point(
detection: detection_pb2.Detection, key_point_enum: 'FaceKeyPoint'
) -> Union[None, location_data_pb2.LocationData.RelativeKeypoint]:
"""A convenience method to return a face key point by the FaceKeyPoint type.
Args:
detection: A detection proto message that contains face key points.
key_point_enum: A FaceKeyPoint type.
Returns:
A RelativeKeypoint proto message.
"""
if not detection or not detection.location_data:
return None
return detection.location_data.relative_keypoints[key_point_enum]
class FaceKeyPoint(enum.IntEnum):
"""The enum type of the six face detection key points."""
RIGHT_EYE = 0
LEFT_EYE = 1
NOSE_TIP = 2
MOUTH_CENTER = 3
RIGHT_EAR_TRAGION = 4
LEFT_EAR_TRAGION = 5
class FaceDetection(SolutionBase):
"""MediaPipe Face Detection.
MediaPipe Face Detection processes an RGB image and returns a list of the
detected face location data.
Please refer to
https://solutions.mediapipe.dev/face_detection#python-solution-api
for usage examples.
"""
def __init__(self, min_detection_confidence=0.5, model_selection=0):
"""Initializes a MediaPipe Face Detection object.
Args:
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
detection to be considered successful. See details in
https://solutions.mediapipe.dev/face_detection#min_detection_confidence.
model_selection: 0 or 1. 0 to select a short-range model that works
best for faces within 2 meters from the camera, and 1 for a full-range
model best for faces within 5 meters. See details in
https://solutions.mediapipe.dev/face_detection#model_selection.
"""
binary_graph_path = FULL_RANGE_GRAPH_FILE_PATH if model_selection == 1 else SHORT_RANGE_GRAPH_FILE_PATH
subgraph_name = 'facedetectionfullrangecommon' if model_selection == 1 else 'facedetectionshortrangecommon'
super().__init__(
binary_graph_path=binary_graph_path,
calculator_params={
subgraph_name + '__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
},
outputs=['detections'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns a list of the detected face location data.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "detections" field that contains a list of the
detected face location data.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,92 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.face_detection."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import face_detection as mp_faces
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
SHORT_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 182], [460, 186], [420, 241],
[417, 284], [295, 199], [502, 198]]
FULL_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 181], [455, 181], [413, 233],
[411, 278], [306, 204], [499, 207]]
DIFF_THRESHOLD = 5 # pixels
class FaceDetectionTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
for detection in results.detections:
mp_drawing.draw_detection(frame, detection)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_faces.FaceDetection() as faces:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
with mp_faces.FaceDetection(min_detection_confidence=0.5) as faces:
results = faces.process(image)
self.assertIsNone(results.detections)
@parameterized.named_parameters(('short_range_model', 0),
('full_range_model', 1))
def test_face(self, model_selection):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
rows, cols, _ = image.shape
with mp_faces.FaceDetection(
min_detection_confidence=0.5, model_selection=model_selection) as faces:
for idx in range(5):
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
location_data = results.detections[0].location_data
x = [keypoint.x * cols for keypoint in location_data.relative_keypoints]
y = [keypoint.y * rows for keypoint in location_data.relative_keypoints]
face_keypoints = np.column_stack((x, y))
if model_selection == 0:
prediction_error = np.abs(
np.asarray(face_keypoints) -
np.asarray(SHORT_RANGE_EXPECTED_FACE_KEY_POINTS))
else:
prediction_error = np.abs(
np.asarray(face_keypoints) -
np.asarray(FULL_RANGE_EXPECTED_FACE_KEY_POINTS))
self.assertLen(results.detections, 1)
self.assertLen(location_data.relative_keypoints, 6)
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,238 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe FaceMesh."""
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb'
FACE_CONNECTIONS = frozenset([
# Lips.
(61, 146),
(146, 91),
(91, 181),
(181, 84),
(84, 17),
(17, 314),
(314, 405),
(405, 321),
(321, 375),
(375, 291),
(61, 185),
(185, 40),
(40, 39),
(39, 37),
(37, 0),
(0, 267),
(267, 269),
(269, 270),
(270, 409),
(409, 291),
(78, 95),
(95, 88),
(88, 178),
(178, 87),
(87, 14),
(14, 317),
(317, 402),
(402, 318),
(318, 324),
(324, 308),
(78, 191),
(191, 80),
(80, 81),
(81, 82),
(82, 13),
(13, 312),
(312, 311),
(311, 310),
(310, 415),
(415, 308),
# Left eye.
(263, 249),
(249, 390),
(390, 373),
(373, 374),
(374, 380),
(380, 381),
(381, 382),
(382, 362),
(263, 466),
(466, 388),
(388, 387),
(387, 386),
(386, 385),
(385, 384),
(384, 398),
(398, 362),
# Left eyebrow.
(276, 283),
(283, 282),
(282, 295),
(295, 285),
(300, 293),
(293, 334),
(334, 296),
(296, 336),
# Right eye.
(33, 7),
(7, 163),
(163, 144),
(144, 145),
(145, 153),
(153, 154),
(154, 155),
(155, 133),
(33, 246),
(246, 161),
(161, 160),
(160, 159),
(159, 158),
(158, 157),
(157, 173),
(173, 133),
# Right eyebrow.
(46, 53),
(53, 52),
(52, 65),
(65, 55),
(70, 63),
(63, 105),
(105, 66),
(66, 107),
# Face oval.
(10, 338),
(338, 297),
(297, 332),
(332, 284),
(284, 251),
(251, 389),
(389, 356),
(356, 454),
(454, 323),
(323, 361),
(361, 288),
(288, 397),
(397, 365),
(365, 379),
(379, 378),
(378, 400),
(400, 377),
(377, 152),
(152, 148),
(148, 176),
(176, 149),
(149, 150),
(150, 136),
(136, 172),
(172, 58),
(58, 132),
(132, 93),
(93, 234),
(234, 127),
(127, 162),
(162, 21),
(21, 54),
(54, 103),
(103, 67),
(67, 109),
(109, 10)
])
class FaceMesh(SolutionBase):
"""MediaPipe FaceMesh.
MediaPipe FaceMesh processes an RGB image and returns the face landmarks on
each detected face.
Please refer to https://solutions.mediapipe.dev/face_mesh#python-solution-api
for usage examples.
"""
def __init__(self,
static_image_mode=False,
max_num_faces=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe FaceMesh object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/face_mesh#static_image_mode.
max_num_faces: Maximum number of faces to detect. See details in
https://solutions.mediapipe.dev/face_mesh#max_num_faces.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
detection to be considered successful. See details in
https://solutions.mediapipe.dev/face_mesh#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
face landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/face_mesh#min_tracking_confidence.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'num_faces': max_num_faces,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'facedetectionshortrangecpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'facelandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['multi_face_landmarks'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the face landmarks on each detected face.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "multi_face_landmarks" field that contains the
face landmarks on each detected face.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,125 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.face_mesh."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import face_mesh as mp_faces
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 5 # pixels
EYE_INDICES_TO_LANDMARKS = {
33: [345, 178],
7: [348, 179],
163: [352, 178],
144: [357, 179],
145: [365, 179],
153: [371, 179],
154: [378, 178],
155: [381, 177],
133: [383, 177],
246: [347, 175],
161: [350, 174],
160: [355, 172],
159: [362, 170],
158: [368, 171],
157: [375, 172],
173: [380, 175],
263: [467, 176],
249: [464, 177],
390: [460, 177],
373: [455, 178],
374: [448, 179],
380: [441, 179],
381: [435, 178],
382: [432, 177],
362: [430, 177],
466: [465, 175],
388: [462, 173],
387: [457, 171],
386: [450, 170],
385: [444, 171],
384: [437, 172],
398: [432, 175]
}
class FaceMeshTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image=frame,
landmark_list=face_landmarks,
landmark_drawing_spec=drawing_spec)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_faces.FaceMesh() as faces:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_faces.FaceMesh() as faces:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = faces.process(image)
self.assertIsNone(results.multi_face_landmarks)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_face(self, static_image_mode: bool, num_frames: int):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
rows, cols, _ = image.shape
with mp_faces.FaceMesh(
static_image_mode=static_image_mode,
min_detection_confidence=0.5) as faces:
for idx in range(num_frames):
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
multi_face_landmarks = []
for landmarks in results.multi_face_landmarks:
self.assertLen(landmarks.landmark, 468)
x = [landmark.x * cols for landmark in landmarks.landmark]
y = [landmark.y * rows for landmark in landmarks.landmark]
face_landmarks = np.column_stack((x, y))
multi_face_landmarks.append(face_landmarks)
self.assertLen(multi_face_landmarks, 1)
# Verify the eye landmarks are correct as sanity check.
for eye_idx, gt_lds in EYE_INDICES_TO_LANDMARKS.items():
prediction_error = np.abs(
np.asarray(multi_face_landmarks[0][eye_idx]) - np.asarray(gt_lds))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,164 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Hands."""
import enum
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
class HandLandmark(enum.IntEnum):
"""The 21 hand landmarks."""
WRIST = 0
THUMB_CMC = 1
THUMB_MCP = 2
THUMB_IP = 3
THUMB_TIP = 4
INDEX_FINGER_MCP = 5
INDEX_FINGER_PIP = 6
INDEX_FINGER_DIP = 7
INDEX_FINGER_TIP = 8
MIDDLE_FINGER_MCP = 9
MIDDLE_FINGER_PIP = 10
MIDDLE_FINGER_DIP = 11
MIDDLE_FINGER_TIP = 12
RING_FINGER_MCP = 13
RING_FINGER_PIP = 14
RING_FINGER_DIP = 15
RING_FINGER_TIP = 16
PINKY_MCP = 17
PINKY_PIP = 18
PINKY_DIP = 19
PINKY_TIP = 20
BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
HAND_CONNECTIONS = frozenset([
(HandLandmark.WRIST, HandLandmark.THUMB_CMC),
(HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP),
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.INDEX_FINGER_PIP),
(HandLandmark.INDEX_FINGER_PIP, HandLandmark.INDEX_FINGER_DIP),
(HandLandmark.INDEX_FINGER_DIP, HandLandmark.INDEX_FINGER_TIP),
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP),
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.MIDDLE_FINGER_PIP),
(HandLandmark.MIDDLE_FINGER_PIP, HandLandmark.MIDDLE_FINGER_DIP),
(HandLandmark.MIDDLE_FINGER_DIP, HandLandmark.MIDDLE_FINGER_TIP),
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.RING_FINGER_MCP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.RING_FINGER_PIP),
(HandLandmark.RING_FINGER_PIP, HandLandmark.RING_FINGER_DIP),
(HandLandmark.RING_FINGER_DIP, HandLandmark.RING_FINGER_TIP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
(HandLandmark.WRIST, HandLandmark.PINKY_MCP),
(HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP)
])
class Hands(SolutionBase):
"""MediaPipe Hands.
MediaPipe Hands processes an RGB image and returns the hand landmarks and
handedness (left v.s. right hand) of each detected hand.
Note that it determines handedness assuming the input image is mirrored,
i.e., taken with a front-facing/selfie camera (
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
to flip the image first for a correct handedness output.
Please refer to https://solutions.mediapipe.dev/hands#python-solution-api for
usage examples.
"""
def __init__(self,
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Hand object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/hands#static_image_mode.
max_num_hands: Maximum number of hands to detect. See details in
https://solutions.mediapipe.dev/hands#max_num_hands.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for hand
detection to be considered successful. See details in
https://solutions.mediapipe.dev/hands#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
hand landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/hands#min_tracking_confidence.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'num_hands': max_num_hands,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'handlandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['multi_hand_landmarks', 'multi_handedness'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
contains the hand landmarks on each detected hand and a "multi_handedness"
field that contains the handedness (left v.s. right hand) of the detected
hand.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,110 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.hands."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_styles
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import hands as mp_hands
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 15 # pixels
EXPECTED_HAND_COORDINATES_PREDICTION = [[[144, 345], [211, 323], [257, 286],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]],
[[577, 37], [504, 56], [459, 94],
[429, 146], [397, 182], [496, 167],
[479, 245], [469, 292], [464, 330],
[540, 177], [534, 265], [533, 319],
[536, 360], [581, 172], [587, 252],
[593, 304], [599, 346], [615, 157],
[628, 223], [638, 258], [648, 288]]]
class HandsTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
drawing_styles.get_default_hand_landmark_style(),
drawing_styles.get_default_hand_connection_style())
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_hands.Hands() as hands:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_hands.Hands() as hands:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = hands.process(image)
self.assertIsNone(results.multi_hand_landmarks)
self.assertIsNone(results.multi_handedness)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_multi_hands(self, static_image_mode, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg')
image = cv2.imread(image_path)
with mp_hands.Hands(
static_image_mode=static_image_mode,
max_num_hands=2,
min_detection_confidence=0.5) as hands:
for idx in range(num_frames):
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
handedness = [
handedness.classification[0].label
for handedness in results.multi_handedness
]
multi_hand_coordinates = []
rows, cols, _ = image.shape
for landmarks in results.multi_hand_landmarks:
self.assertLen(landmarks.landmark, 21)
x = [landmark.x * cols for landmark in landmarks.landmark]
y = [landmark.y * rows for landmark in landmarks.landmark]
hand_coordinates = np.column_stack((x, y))
multi_hand_coordinates.append(hand_coordinates)
self.assertLen(handedness, 2)
self.assertLen(multi_hand_coordinates, 2)
prediction_error = np.abs(
np.asarray(multi_hand_coordinates) -
np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,152 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Holistic."""
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmark_projection_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
# pylint: disable=unused-import
from mediapipe.python.solutions.face_mesh import FACE_CONNECTIONS
from mediapipe.python.solutions.hands import HAND_CONNECTIONS
from mediapipe.python.solutions.hands import HandLandmark
from mediapipe.python.solutions.pose import POSE_CONNECTIONS
from mediapipe.python.solutions.pose import PoseLandmark
# pylint: enable=unused-import
BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
def _download_oss_pose_landmark_model(model_complexity):
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
if model_complexity == 0:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
elif model_complexity == 2:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
class Holistic(SolutionBase):
"""MediaPipe Holistic.
MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
right hand landmarks, and face mesh landmarks on the most prominent person
detected.
Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
for usage examples.
"""
def __init__(self,
static_image_mode=False,
model_complexity=1,
smooth_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Holistic object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/holistic#static_image_mode.
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
details in https://solutions.mediapipe.dev/holistic#model_complexity.
smooth_landmarks: Whether to filter landmarks across different input
images to reduce jitter. See details in
https://solutions.mediapipe.dev/holistic#smooth_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
pose landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
"""
_download_oss_pose_landmark_model(model_complexity)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_complexity': model_complexity,
'smooth_landmarks': smooth_landmarks and not static_image_mode,
},
calculator_params={
'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=[
'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
'right_hand_landmarks', 'face_landmarks'
])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple that has five fields describing the landmarks on the most
prominate person detected:
1) "pose_landmarks" field that contains the pose landmarks.
2) "pose_world_landmarks" field that contains the pose landmarks in
real-world 3D coordinates that are in meters with the origin at the
center between hips.
3) "left_hand_landmarks" field that contains the left-hand landmarks.
4) "right_hand_landmarks" field that contains the right-hand landmarks.
5) "face_landmarks" field that contains the face landmarks.
"""
results = super().process(input_data={'image': image})
if results.pose_landmarks:
for landmark in results.pose_landmarks.landmark:
landmark.ClearField('presence')
if results.pose_world_landmarks:
for landmark in results.pose_world_landmarks.landmark:
landmark.ClearField('presence')
return results

View File

@@ -0,0 +1,134 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.pose."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import holistic as mp_holistic
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
POSE_DIFF_THRESHOLD = 30 # pixels
HAND_DIFF_THRESHOLD = 30 # pixels
EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
[801, 233], [773, 231], [766, 231],
[759, 232], [802, 242], [751, 239],
[791, 258], [766, 258], [830, 301],
[708, 298], [910, 248], [635, 234],
[954, 161], [593, 136], [961, 137],
[583, 110], [952, 132], [592, 106],
[950, 141], [596, 115], [793, 500],
[724, 502], [874, 626], [640, 629],
[965, 756], [542, 760], [962, 779],
[533, 781], [1025, 797], [487, 803]])
EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
[945, 141], [947, 134], [945, 136],
[939, 122], [935, 113], [931, 106],
[951, 134], [946, 118], [942, 108],
[938, 100], [957, 135], [954, 120],
[951, 111], [948, 103], [964, 138],
[964, 128], [965, 122], [965, 117]])
EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
[613, 103], [617, 96], [596, 100],
[595, 84], [594, 74], [593, 68],
[588, 100], [586, 84], [585, 73],
[584, 65], [581, 103], [579, 89],
[579, 79], [579, 72], [575, 109],
[571, 99], [570, 93], [569, 87]])
class PoseTest(parameterized.TestCase):
def _landmarks_list_to_array(self, landmark_list, image_shape):
rows, cols, _ = image_shape
return np.asarray([(lmk.x * cols, lmk.y * rows)
for lmk in landmark_list.landmark])
def _assert_diff_less(self, array1, array2, threshold):
npt.assert_array_less(np.abs(array1 - array2), threshold)
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
mp_drawing.draw_landmarks(
image=frame,
landmark_list=results.face_landmarks,
landmark_drawing_spec=drawing_spec)
mp_drawing.draw_landmarks(frame, results.left_hand_landmarks,
mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(frame, results.right_hand_landmarks,
mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
mp_holistic.POSE_CONNECTIONS)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_holistic.Holistic() as holistic:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_holistic.Holistic() as holistic:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = holistic.process(image)
self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3),
('static_full', True, 1, 3),
('static_heavy', True, 2, 3),
('video_lite', False, 0, 3),
('video_full', False, 1, 3),
('video_heavy', False, 2, 3))
def test_on_image(self, static_image_mode, model_complexity, num_frames):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/holistic.jpg')
image = cv2.imread(image_path)
with mp_holistic.Holistic(static_image_mode=static_image_mode,
model_complexity=model_complexity) as holistic:
for idx in range(num_frames):
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
self._assert_diff_less(
self._landmarks_list_to_array(results.pose_landmarks, image.shape),
EXPECTED_POSE_LANDMARKS,
POSE_DIFF_THRESHOLD)
self._assert_diff_less(
self._landmarks_list_to_array(results.left_hand_landmarks,
image.shape),
EXPECTED_LEFT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD)
self._assert_diff_less(
self._landmarks_list_to_array(results.right_hand_landmarks,
image.shape),
EXPECTED_RIGHT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD)
# TODO: Verify the correctness of the face landmarks.
self.assertLen(results.face_landmarks.landmark, 468)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,292 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Objectron."""
import enum
from typing import List, Tuple, NamedTuple, Optional
import attr
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmark_projection_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
from mediapipe.framework.formats import landmark_pb2
from mediapipe.modules.objectron.calculators import annotation_data_pb2
from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
class BoxLandmark(enum.IntEnum):
"""The 9 3D box landmarks."""
#
# 3 + + + + + + + + 7
# +\ +\ UP
# + \ + \
# + \ + \ |
# + 4 + + + + + + + + 8 | y
# + + + + |
# + + + + |
# + + (0) + + .------- x
# + + + + \
# 1 + + + + + + + + 5 + \
# \ + \ + \ z
# \ + \ + \
# \+ \+
# 2 + + + + + + + + 6
CENTER = 0
BACK_BOTTOM_LEFT = 1
FRONT_BOTTOM_LEFT = 2
BACK_TOP_LEFT = 3
FRONT_TOP_LEFT = 4
BACK_BOTTOM_RIGHT = 5
FRONT_BOTTOM_RIGHT = 6
BACK_TOP_RIGHT = 7
FRONT_TOP_RIGHT = 8
BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
BOX_CONNECTIONS = frozenset([
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
(BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
(BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
(BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
])
@attr.s(auto_attribs=True)
class ObjectronModel(object):
model_path: str
label_name: str
@attr.s(auto_attribs=True, frozen=True)
class ShoeModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_sneakers.tflite')
label_name: str = 'Footwear'
@attr.s(auto_attribs=True, frozen=True)
class ChairModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_chair.tflite')
label_name: str = 'Chair'
@attr.s(auto_attribs=True, frozen=True)
class CameraModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_camera.tflite')
label_name: str = 'Camera'
@attr.s(auto_attribs=True, frozen=True)
class CupModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_cup.tflite')
label_name: str = 'Coffee cup, Mug'
_MODEL_DICT = {
'Shoe': ShoeModel(),
'Chair': ChairModel(),
'Cup': CupModel(),
'Camera': CameraModel()
}
def _download_oss_objectron_models(objectron_model: str):
"""Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
download_utils.download_oss_model(
'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
)
download_utils.download_oss_model(objectron_model)
def get_model_by_name(name: str) -> ObjectronModel:
if name not in _MODEL_DICT:
raise ValueError(f'{name} is not a valid model name for Objectron.')
_download_oss_objectron_models(_MODEL_DICT[name].model_path)
return _MODEL_DICT[name]
@attr.s(auto_attribs=True)
class ObjectronOutputs(object):
landmarks_2d: landmark_pb2.NormalizedLandmarkList
landmarks_3d: landmark_pb2.LandmarkList
rotation: np.ndarray
translation: np.ndarray
scale: np.ndarray
class Objectron(SolutionBase):
"""MediaPipe Objectron.
MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
and 2D rectangular bounding box of each detected object.
"""
def __init__(self,
static_image_mode: bool = False,
max_num_objects: int = 5,
min_detection_confidence: float = 0.5,
min_tracking_confidence: float = 0.99,
model_name: str = 'Shoe',
focal_length: Tuple[float, float] = (1.0, 1.0),
principal_point: Tuple[float, float] = (0.0, 0.0),
image_size: Optional[Tuple[int, int]] = None,
):
"""Initializes a MediaPipe Objectron class.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream.
max_num_objects: Maximum number of objects to detect.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
detection to be considered successful.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
box landmarks to be considered tracked successfully.
model_name: Name of model to use for predicting box landmarks, currently
support {'Shoe', 'Chair', 'Cup', 'Camera'}.
focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
should provide image_size = (image_width, image_height) to enable
conversions inside the API.
principal_point: Camera principal point (px, py), by default is defined in
NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
users should provide image_size = (image_width, image_height) to enable
conversions inside the API.
image_size (Optional): size (image_width, image_height) of the input image
, ONLY needed when use focal_length and principal_point in pixel space.
Raises:
ConnectionError: If the objectron open source model can't be downloaded
from the MediaPipe Github repo.
"""
# Get Camera parameters.
fx, fy = focal_length
px, py = principal_point
if image_size is not None:
half_width = image_size[0] / 2.0
half_height = image_size[1] / 2.0
fx = fx / half_width
fy = fy / half_height
px = - (px - half_width) / half_width
py = - (py - half_height) / half_height
# Create and init model.
model = get_model_by_name(model_name)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'box_landmark_model_path': model.model_path,
'allowed_labels': model.label_name,
'max_num_objects': max_num_objects,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
('objectdetectionoidv4subgraph'
'__TensorsToDetectionsCalculator.min_score_thresh'):
min_detection_confidence,
('boxlandmarksubgraph__ThresholdingCalculator'
'.threshold'):
min_tracking_confidence,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_focal_x'): fx,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_focal_y'): fy,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_principal_point_x'): px,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_principal_point_y'): py,
},
outputs=['detected_objects'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "detected_objects" field that contains a list
of detected 3D bounding boxes. Each detected box is represented as an
"ObjectronOutputs" instance.
"""
results = super().process(input_data={'image': image})
if results.detected_objects:
results.detected_objects = self._convert_format(results.detected_objects)
else:
results.detected_objects = None
return results
def _convert_format(
self,
inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
new_outputs = list()
for annotation in inputs.annotations:
# Get 3d object pose.
rotation = np.reshape(np.array(annotation.rotation), (3, 3))
translation = np.array(annotation.translation)
scale = np.array(annotation.scale)
# Get 2d/3d landmakrs.
landmarks_2d = landmark_pb2.NormalizedLandmarkList()
landmarks_3d = landmark_pb2.LandmarkList()
for keypoint in annotation.keypoints:
point_2d = keypoint.point_2d
landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
point_3d = keypoint.point_3d
landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
# Add to objectron outputs.
new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
rotation, translation, scale=scale))
return new_outputs

View File

@@ -0,0 +1,81 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.objectron."""
import os
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
from mediapipe.python.solutions import objectron as mp_objectron
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 30 # pixels
EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
[383, 505], [80, 478], [408, 345],
[130, 347], [384, 355], [72, 353]],
[[241, 206], [411, 279], [131, 280],
[392, 249], [78, 252], [412, 155],
[140, 178], [396, 105], [89, 137]]]
class ObjectronTest(parameterized.TestCase):
def test_invalid_image_shape(self):
with mp_objectron.Objectron() as objectron:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_objectron.Objectron() as objectron:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = objectron.process(image)
self.assertIsNone(results.detected_objects)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_multi_objects(self, static_image_mode, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
image = cv2.imread(image_path)
with mp_objectron.Objectron(
static_image_mode=static_image_mode,
max_num_objects=2,
min_detection_confidence=0.5) as objectron:
for _ in range(num_frames):
results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
multi_box_coordinates = []
for detected_object in results.detected_objects:
landmarks = detected_object.landmarks_2d
self.assertLen(landmarks.landmark, 9)
x = [landmark.x for landmark in landmarks.landmark]
y = [landmark.y for landmark in landmarks.landmark]
box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
multi_box_coordinates.append(box_coordinates)
self.assertLen(multi_box_coordinates, 2)
prediction_error = np.abs(
np.asarray(multi_box_coordinates) -
np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,216 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Pose."""
import enum
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
from mediapipe.calculators.util import visibility_smoothing_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
class PoseLandmark(enum.IntEnum):
"""The 33 pose landmarks."""
NOSE = 0
LEFT_EYE_INNER = 1
LEFT_EYE = 2
LEFT_EYE_OUTER = 3
RIGHT_EYE_INNER = 4
RIGHT_EYE = 5
RIGHT_EYE_OUTER = 6
LEFT_EAR = 7
RIGHT_EAR = 8
MOUTH_LEFT = 9
MOUTH_RIGHT = 10
LEFT_SHOULDER = 11
RIGHT_SHOULDER = 12
LEFT_ELBOW = 13
RIGHT_ELBOW = 14
LEFT_WRIST = 15
RIGHT_WRIST = 16
LEFT_PINKY = 17
RIGHT_PINKY = 18
LEFT_INDEX = 19
RIGHT_INDEX = 20
LEFT_THUMB = 21
RIGHT_THUMB = 22
LEFT_HIP = 23
RIGHT_HIP = 24
LEFT_KNEE = 25
RIGHT_KNEE = 26
LEFT_ANKLE = 27
RIGHT_ANKLE = 28
LEFT_HEEL = 29
RIGHT_HEEL = 30
LEFT_FOOT_INDEX = 31
RIGHT_FOOT_INDEX = 32
BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
POSE_CONNECTIONS = frozenset([
(PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
(PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
(PoseLandmark.RIGHT_EYE, PoseLandmark.RIGHT_EYE_OUTER),
(PoseLandmark.RIGHT_EYE_OUTER, PoseLandmark.RIGHT_EAR),
(PoseLandmark.NOSE, PoseLandmark.LEFT_EYE_INNER),
(PoseLandmark.LEFT_EYE_INNER, PoseLandmark.LEFT_EYE),
(PoseLandmark.LEFT_EYE, PoseLandmark.LEFT_EYE_OUTER),
(PoseLandmark.LEFT_EYE_OUTER, PoseLandmark.LEFT_EAR),
(PoseLandmark.MOUTH_RIGHT, PoseLandmark.MOUTH_LEFT),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.LEFT_SHOULDER),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_ELBOW),
(PoseLandmark.RIGHT_ELBOW, PoseLandmark.RIGHT_WRIST),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_PINKY),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_INDEX),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_THUMB),
(PoseLandmark.RIGHT_PINKY, PoseLandmark.RIGHT_INDEX),
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_ELBOW),
(PoseLandmark.LEFT_ELBOW, PoseLandmark.LEFT_WRIST),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_PINKY),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_INDEX),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_THUMB),
(PoseLandmark.LEFT_PINKY, PoseLandmark.LEFT_INDEX),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_HIP),
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_HIP),
(PoseLandmark.RIGHT_HIP, PoseLandmark.LEFT_HIP),
(PoseLandmark.RIGHT_HIP, PoseLandmark.RIGHT_KNEE),
(PoseLandmark.LEFT_HIP, PoseLandmark.LEFT_KNEE),
(PoseLandmark.RIGHT_KNEE, PoseLandmark.RIGHT_ANKLE),
(PoseLandmark.LEFT_KNEE, PoseLandmark.LEFT_ANKLE),
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_HEEL),
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_HEEL),
(PoseLandmark.RIGHT_HEEL, PoseLandmark.RIGHT_FOOT_INDEX),
(PoseLandmark.LEFT_HEEL, PoseLandmark.LEFT_FOOT_INDEX),
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_FOOT_INDEX),
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_FOOT_INDEX),
])
def _download_oss_pose_landmark_model(model_complexity):
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
if model_complexity == 0:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
elif model_complexity == 2:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
class Pose(SolutionBase):
"""MediaPipe Pose.
MediaPipe Pose processes an RGB image and returns pose landmarks on the most
prominent person detected.
Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
usage examples.
"""
def __init__(self,
static_image_mode=False,
model_complexity=1,
smooth_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Pose object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/pose#static_image_mode.
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
details in https://solutions.mediapipe.dev/pose#model_complexity.
smooth_landmarks: Whether to filter landmarks across different input
images to reduce jitter. See details in
https://solutions.mediapipe.dev/pose#smooth_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in
https://solutions.mediapipe.dev/pose#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
pose landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/pose#min_tracking_confidence.
"""
_download_oss_pose_landmark_model(model_complexity)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_complexity': model_complexity,
'smooth_landmarks': smooth_landmarks and not static_image_mode,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['pose_landmarks', 'pose_world_landmarks'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the pose landmarks on the most prominent person detected.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple that has two fields describing the landmarks on the most
prominate person detected:
1) "pose_landmarks" field that contains the pose landmarks.
2) "pose_world_landmarks" field that contains the pose landmarks in
real-world 3D coordinates that are in meters with the origin at the
center between hips.
"""
results = super().process(input_data={'image': image})
if results.pose_landmarks:
for landmark in results.pose_landmarks.landmark:
landmark.ClearField('presence')
if results.pose_world_landmarks:
for landmark in results.pose_world_landmarks.landmark:
landmark.ClearField('presence')
return results

View File

@@ -0,0 +1,197 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.pose."""
import json
import os
import tempfile
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import pose as mp_pose
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 15 # pixels
EXPECTED_POSE_LANDMARKS = np.array([[460, 283], [467, 273], [471, 273],
[474, 273], [465, 273], [465, 273],
[466, 273], [491, 277], [480, 277],
[470, 294], [465, 294], [545, 319],
[453, 329], [622, 323], [375, 316],
[696, 316], [299, 307], [719, 316],
[278, 306], [721, 311], [274, 304],
[713, 313], [283, 306], [520, 476],
[467, 471], [612, 550], [358, 490],
[701, 613], [349, 611], [709, 624],
[363, 630], [730, 633], [303, 628]])
WORLD_DIFF_THRESHOLD = 0.2 # meters
EXPECTED_POSE_WORLD_LANDMARKS = np.array([
[-0.11, -0.59, -0.15], [-0.09, -0.64, -0.16], [-0.09, -0.64, -0.16],
[-0.09, -0.64, -0.16], [-0.11, -0.64, -0.14], [-0.11, -0.64, -0.14],
[-0.11, -0.64, -0.14], [0.01, -0.65, -0.15], [-0.06, -0.64, -0.05],
[-0.07, -0.57, -0.15], [-0.09, -0.57, -0.12], [0.18, -0.49, -0.09],
[-0.14, -0.5, -0.03], [0.41, -0.48, -0.11], [-0.42, -0.5, -0.02],
[0.64, -0.49, -0.17], [-0.63, -0.51, -0.13], [0.7, -0.5, -0.19],
[-0.71, -0.53, -0.15], [0.72, -0.51, -0.23], [-0.69, -0.54, -0.19],
[0.66, -0.49, -0.19], [-0.64, -0.52, -0.15], [0.09, 0., -0.04],
[-0.09, -0., 0.03], [0.41, 0.23, -0.09], [-0.43, 0.1, -0.11],
[0.69, 0.49, -0.04], [-0.48, 0.47, -0.02], [0.72, 0.52, -0.04],
[-0.48, 0.51, -0.02], [0.8, 0.5, -0.14], [-0.59, 0.52, -0.11],
])
class PoseTest(parameterized.TestCase):
def _landmarks_list_to_array(self, landmark_list, image_shape):
rows, cols, _ = image_shape
return np.asarray([(lmk.x * cols, lmk.y * rows, lmk.z * cols)
for lmk in landmark_list.landmark])
def _world_landmarks_list_to_array(self, landmark_list):
return np.asarray([(lmk.x, lmk.y, lmk.z)
for lmk in landmark_list.landmark])
def _assert_diff_less(self, array1, array2, threshold):
npt.assert_array_less(np.abs(array1 - array2), threshold)
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
mp_pose.POSE_CONNECTIONS)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_pose.Pose() as pose:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_pose.Pose() as pose:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = pose.process(image)
self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3),
('static_full', True, 1, 3),
('static_heavy', True, 2, 3),
('video_lite', False, 0, 3),
('video_full', False, 1, 3),
('video_heavy', False, 2, 3))
def test_on_image(self, static_image_mode, model_complexity, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
image = cv2.imread(image_path)
with mp_pose.Pose(static_image_mode=static_image_mode,
model_complexity=model_complexity) as pose:
for idx in range(num_frames):
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# TODO: Add rendering of world 3D when supported.
self._annotate(image.copy(), results, idx)
self._assert_diff_less(
self._landmarks_list_to_array(results.pose_landmarks,
image.shape)[:, :2],
EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
self._assert_diff_less(
self._world_landmarks_list_to_array(results.pose_world_landmarks),
EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
@parameterized.named_parameters(
('full', 1, 'pose_squats.full.npz'))
def test_on_video(self, model_complexity, expected_name):
"""Tests pose models on a video."""
# If set to `True` will dump actual predictions to .npz and JSON files.
dump_predictions = False
# Set threshold for comparing actual and expected predictions in pixels.
diff_threshold = 15
world_diff_threshold = 0.1
video_path = os.path.join(os.path.dirname(__file__),
'testdata/pose_squats.mp4')
expected_path = os.path.join(os.path.dirname(__file__),
'testdata/{}'.format(expected_name))
# Predict pose landmarks for each frame.
video_cap = cv2.VideoCapture(video_path)
actual_per_frame = []
actual_world_per_frame = []
frame_idx = 0
with mp_pose.Pose(static_image_mode=False,
model_complexity=model_complexity) as pose:
while True:
# Get next frame of the video.
success, input_frame = video_cap.read()
if not success:
break
# Run pose tracker.
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
result = pose.process(image=input_frame)
pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
input_frame.shape)
pose_world_landmarks = self._world_landmarks_list_to_array(
result.pose_world_landmarks)
actual_per_frame.append(pose_landmarks)
actual_world_per_frame.append(pose_world_landmarks)
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
self._annotate(input_frame, result, frame_idx)
frame_idx += 1
actual = np.array(actual_per_frame)
actual_world = np.array(actual_world_per_frame)
if dump_predictions:
# Dump .npz
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
np.savez(tmp_file, predictions=actual, predictions_world=actual_world)
print('Predictions saved as .npz to {}'.format(tmp_file.name))
# Dump JSON
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
with open(tmp_file.name, 'w') as fl:
dump_data = {
'predictions': np.around(actual, 3).tolist(),
'predictions_world': np.around(actual_world, 3).tolist()
}
fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
print('Predictions saved as JSON to {}'.format(tmp_file.name))
# Validate actual vs. expected landmarks.
expected = np.load(expected_path)['predictions']
assert actual.shape == expected.shape, (
'Unexpected shape of predictions: {} instead of {}'.format(
actual.shape, expected.shape))
self._assert_diff_less(
actual[..., :2], expected[..., :2], threshold=diff_threshold)
# Validate actual vs. expected world landmarks.
expected_world = np.load(expected_path)['predictions_world']
assert actual_world.shape == expected_world.shape, (
'Unexpected shape of world predictions: {} instead of {}'.format(
actual_world.shape, expected_world.shape))
self._assert_diff_less(
actual_world, expected_world, threshold=world_diff_threshold)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,76 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Selfie Segmentation."""
from typing import NamedTuple
import numpy as np
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
class SelfieSegmentation(SolutionBase):
"""MediaPipe Selfie Segmentation.
MediaPipe Selfie Segmentation processes an RGB image and returns a
segmentation mask.
Please refer to
https://solutions.mediapipe.dev/selfie_segmentation#python-solution-api for
usage examples.
"""
def __init__(self, model_selection=0):
"""Initializes a MediaPipe Selfie Segmentation object.
Args:
model_selection: 0 or 1. 0 to select a general-purpose model, and 1 to
select a model more optimized for landscape images. See details in
https://solutions.mediapipe.dev/selfie_segmentation#model_selection.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_selection': model_selection,
},
outputs=['segmentation_mask'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns a segmentation mask.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "segmentation_mask" field that contains a float
type 2d np array representing the mask.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,68 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.selfie_segmentation."""
import os
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import selfie_segmentation as mp_selfie_segmentation
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
class SelfieSegmentationTest(parameterized.TestCase):
def _draw(self, frame: np.ndarray, mask: np.ndarray):
frame = np.minimum(frame, np.stack((mask,) * 3, axis=-1))
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + '.png')
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
selfie_segmentation.process(
np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = selfie_segmentation.process(image)
normalized_segmentation_mask = (results.segmentation_mask *
255).astype(int)
self.assertLess(np.amax(normalized_segmentation_mask), 1)
@parameterized.named_parameters(('general', 0), ('landscape', 1))
def test_segmentation(self, model_selection):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
with mp_selfie_segmentation.SelfieSegmentation(
model_selection=model_selection) as selfie_segmentation:
results = selfie_segmentation.process(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
normalized_segmentation_mask = (results.segmentation_mask *
255).astype(int)
self._draw(image.copy(), normalized_segmentation_mask)
if __name__ == '__main__':
absltest.main()