Initial commit

This commit is contained in:
Untriex Programming
2021-08-31 22:06:02 +02:00
commit 9b6723e11e
5142 changed files with 1455625 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Python API."""
from mediapipe.python._framework_bindings import resource_util
from mediapipe.python._framework_bindings.calculator_graph import CalculatorGraph
from mediapipe.python._framework_bindings.calculator_graph import GraphInputStreamAddMode
from mediapipe.python._framework_bindings.image import Image
from mediapipe.python._framework_bindings.image_frame import ImageFormat
from mediapipe.python._framework_bindings.image_frame import ImageFrame
from mediapipe.python._framework_bindings.matrix import Matrix
from mediapipe.python._framework_bindings.packet import Packet
from mediapipe.python._framework_bindings.timestamp import Timestamp
from mediapipe.python._framework_bindings.validated_graph_config import ValidatedGraphConfig
import mediapipe.python.packet_creator
import mediapipe.python.packet_getter

View File

@@ -0,0 +1,216 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python._framework_bindings.calculator_graph."""
# Dependency imports
from absl.testing import absltest
import mediapipe as mp
from google.protobuf import text_format
from mediapipe.framework import calculator_pb2
class GraphTest(absltest.TestCase):
def test_invalid_binary_graph_file(self):
with self.assertRaisesRegex(
FileNotFoundError,
'(No such file or directory|The path does not exist)'):
mp.CalculatorGraph(binary_graph_path='/tmp/abc.binarypb')
def test_invalid_node_config(self):
text_config = """
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
input_stream: 'in'
output_stream: 'out'
}
"""
config_proto = calculator_pb2.CalculatorGraphConfig()
text_format.Parse(text_config, config_proto)
with self.assertRaisesRegex(
ValueError,
'Input and output streams to PassThroughCalculator must use matching tags and indexes.'
):
mp.CalculatorGraph(graph_config=config_proto)
def test_invalid_calculator_type(self):
text_config = """
node {
calculator: 'SomeUnknownCalculator'
input_stream: 'in'
output_stream: 'out'
}
"""
config_proto = calculator_pb2.CalculatorGraphConfig()
text_format.Parse(text_config, config_proto)
with self.assertRaisesRegex(
RuntimeError, 'Unable to find Calculator \"SomeUnknownCalculator\"'):
mp.CalculatorGraph(graph_config=config_proto)
def test_graph_initialized_with_proto_config(self):
text_config = """
max_queue_size: 1
input_stream: 'in'
output_stream: 'out'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
"""
config_proto = calculator_pb2.CalculatorGraphConfig()
text_format.Parse(text_config, config_proto)
graph = mp.CalculatorGraph(graph_config=config_proto)
hello_world_packet = mp.packet_creator.create_string('hello world')
out = []
graph = mp.CalculatorGraph(graph_config=config_proto)
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
graph.start_run()
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet, timestamp=0)
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet.at(1))
graph.close()
self.assertEqual(graph.graph_input_stream_add_mode,
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
self.assertEqual(graph.max_queue_size, 1)
self.assertFalse(graph.has_error())
self.assertLen(out, 2)
self.assertEqual(out[0].timestamp, 0)
self.assertEqual(out[1].timestamp, 1)
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
def test_graph_initialized_with_text_config(self):
text_config = """
max_queue_size: 1
input_stream: 'in'
output_stream: 'out'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
"""
hello_world_packet = mp.packet_creator.create_string('hello world')
out = []
graph = mp.CalculatorGraph(graph_config=text_config)
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
graph.start_run()
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet.at(0))
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet, timestamp=1)
graph.close()
self.assertEqual(graph.graph_input_stream_add_mode,
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
self.assertEqual(graph.max_queue_size, 1)
self.assertFalse(graph.has_error())
self.assertLen(out, 2)
self.assertEqual(out[0].timestamp, 0)
self.assertEqual(out[1].timestamp, 1)
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
def test_graph_validation_and_initialization(self):
text_config = """
max_queue_size: 1
input_stream: 'in'
output_stream: 'out'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
"""
hello_world_packet = mp.packet_creator.create_string('hello world')
out = []
validated_graph_config = mp.ValidatedGraphConfig()
self.assertFalse(validated_graph_config.initialized())
validated_graph_config.initialize(graph_config=text_config)
self.assertTrue(validated_graph_config.initialized())
graph = mp.CalculatorGraph(validated_graph_config=validated_graph_config)
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
graph.start_run()
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet.at(0))
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet, timestamp=1)
graph.close()
self.assertEqual(graph.graph_input_stream_add_mode,
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
self.assertEqual(graph.max_queue_size, 1)
self.assertFalse(graph.has_error())
self.assertLen(out, 2)
self.assertEqual(out[0].timestamp, 0)
self.assertEqual(out[1].timestamp, 1)
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
def test_insert_packets_with_same_timestamp(self):
text_config = """
max_queue_size: 1
input_stream: 'in'
output_stream: 'out'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
"""
config_proto = calculator_pb2.CalculatorGraphConfig()
text_format.Parse(text_config, config_proto)
hello_world_packet = mp.packet_creator.create_string('hello world')
out = []
graph = mp.CalculatorGraph(graph_config=config_proto)
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
graph.start_run()
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet.at(0))
graph.wait_until_idle()
graph.add_packet_to_input_stream(
stream='in', packet=hello_world_packet.at(0))
with self.assertRaisesRegex(
ValueError, 'Current minimum expected timestamp is 1 but received 0.'):
graph.wait_until_idle()
def test_side_packet_graph(self):
text_config = """
node {
calculator: 'StringToUint64Calculator'
input_side_packet: "string"
output_side_packet: "number"
}
"""
config_proto = calculator_pb2.CalculatorGraphConfig()
text_format.Parse(text_config, config_proto)
graph = mp.CalculatorGraph(graph_config=config_proto)
graph.start_run(
input_side_packets={'string': mp.packet_creator.create_string('42')})
graph.wait_until_done()
self.assertFalse(graph.has_error())
self.assertEqual(
mp.packet_getter.get_uint(graph.get_output_side_packet('number')), 42)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,186 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python._framework_bindings.image_frame."""
import gc
import random
import sys
from absl.testing import absltest
import cv2
import mediapipe as mp
import numpy as np
import PIL.Image
# TODO: Add unit tests specifically for memory management.
class ImageFrameTest(absltest.TestCase):
def test_create_image_frame_from_gray_cv_mat(self):
w, h = random.randrange(3, 100), random.randrange(3, 100)
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8),
cv2.COLOR_RGB2GRAY)
mat[2, 2] = 42
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.GRAY8, data=mat)
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
print(image_frame[w, h, 1])
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[w, h])
self.assertEqual(42, image_frame[2, 2])
def test_create_image_frame_from_rgb_cv_mat(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
mat[2, 2, 1] = 42
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[w, h, channels])
self.assertEqual(42, image_frame[2, 2, 1])
def test_create_image_frame_from_rgb48_cv_mat(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
cv2.COLOR_RGB2BGR)
mat[2, 2, 1] = 42
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB48, data=mat)
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[w, h, channels])
self.assertEqual(42, image_frame[2, 2, 1])
def test_create_image_frame_from_gray_pil_image(self):
w, h = random.randrange(3, 100), random.randrange(3, 100)
img = PIL.Image.fromarray(
np.random.randint(2**8 - 1, size=(h, w), dtype=np.uint8), 'L')
image_frame = mp.ImageFrame(
image_format=mp.ImageFormat.GRAY8, data=np.asarray(img))
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
print(image_frame[w, h, 1])
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[w, h])
def test_create_image_frame_from_rgb_pil_image(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
img = PIL.Image.fromarray(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
'RGB')
image_frame = mp.ImageFrame(
image_format=mp.ImageFormat.SRGB, data=np.asarray(img))
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[w, h, channels])
def test_create_image_frame_from_rgba64_pil_image(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 4
img = PIL.Image.fromarray(
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
'RGBA')
image_frame = mp.ImageFrame(
image_format=mp.ImageFormat.SRGBA64,
data=np.asarray(img).astype(np.uint16))
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image_frame[1000, 1000, 1000])
def test_image_frame_numby_view(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
output_ndarray = image_frame.numpy_view()
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
# The output of numpy_view() is a reference to the internal data and it's
# unwritable after creation.
with self.assertRaisesRegex(ValueError,
'assignment destination is read-only'):
output_ndarray[0, 0, 0] = 0
copied_ndarray = np.copy(output_ndarray)
copied_ndarray[0, 0, 0] = 0
def test_cropped_gray8_image(self):
w, h = random.randrange(20, 100), random.randrange(20, 100)
channels, offset = 3, 10
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2GRAY)
image_frame = mp.ImageFrame(
image_format=mp.ImageFormat.GRAY8,
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset]))
self.assertTrue(
np.array_equal(mat[offset:-offset, offset:-offset],
image_frame.numpy_view()))
def test_cropped_rgb_image(self):
w, h = random.randrange(20, 100), random.randrange(20, 100)
channels, offset = 3, 10
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
image_frame = mp.ImageFrame(
image_format=mp.ImageFormat.SRGB,
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset, :]))
self.assertTrue(
np.array_equal(mat[offset:-offset, offset:-offset, :],
image_frame.numpy_view()))
# For image frames that store contiguous data, the output of numpy_view()
# points to the pixel data of the original image frame object. The life cycle
# of the data array should tie to the image frame object.
def test_image_frame_numpy_view_with_contiguous_data(self):
w, h = 640, 480
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertTrue(image_frame.is_contiguous())
initial_ref_count = sys.getrefcount(image_frame)
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
# Get 2 data array objects and verify that the image frame's ref count is
# increased by 2.
np_view = image_frame.numpy_view()
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count + 1)
np_view2 = image_frame.numpy_view()
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count + 2)
del np_view
del np_view2
gc.collect()
# After the two data array objects getting destroyed, the current ref count
# should euqal to the initial ref count.
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
# For image frames that store non contiguous data, the output of numpy_view()
# stores a copy of the pixel data of the image frame object. The life cycle of
# the data array doesn't tie to the image frame object.
def test_image_frame_numpy_view_with_non_contiguous_data(self):
w, h = 641, 481
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertFalse(image_frame.is_contiguous())
initial_ref_count = sys.getrefcount(image_frame)
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
np_view = image_frame.numpy_view()
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
del np_view
gc.collect()
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,183 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python._framework_bindings.image."""
import gc
import random
import sys
from absl.testing import absltest
import cv2
import mediapipe as mp
import numpy as np
import PIL.Image
# TODO: Add unit tests specifically for memory management.
class ImageTest(absltest.TestCase):
def test_create_image_from_gray_cv_mat(self):
w, h = random.randrange(3, 100), random.randrange(3, 100)
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8),
cv2.COLOR_RGB2GRAY)
mat[2, 2] = 42
image = mp.Image(image_format=mp.ImageFormat.GRAY8, data=mat)
self.assertTrue(np.array_equal(mat, image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
print(image[w, h, 1])
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[w, h])
self.assertEqual(42, image[2, 2])
def test_create_image_from_rgb_cv_mat(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
mat[2, 2, 1] = 42
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertTrue(np.array_equal(mat, image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[w, h, channels])
self.assertEqual(42, image[2, 2, 1])
def test_create_image_from_rgb48_cv_mat(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
cv2.COLOR_RGB2BGR)
mat[2, 2, 1] = 42
image = mp.Image(image_format=mp.ImageFormat.SRGB48, data=mat)
self.assertTrue(np.array_equal(mat, image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[w, h, channels])
self.assertEqual(42, image[2, 2, 1])
def test_create_image_from_gray_pil_image(self):
w, h = random.randrange(3, 100), random.randrange(3, 100)
img = PIL.Image.fromarray(
np.random.randint(2**8 - 1, size=(h, w), dtype=np.uint8), 'L')
image = mp.Image(image_format=mp.ImageFormat.GRAY8, data=np.asarray(img))
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
print(image[w, h, 1])
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[w, h])
def test_create_image_from_rgb_pil_image(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
img = PIL.Image.fromarray(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
'RGB')
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(img))
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[w, h, channels])
def test_create_image_from_rgba64_pil_image(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 4
img = PIL.Image.fromarray(
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
'RGBA')
image = mp.Image(
image_format=mp.ImageFormat.SRGBA64,
data=np.asarray(img).astype(np.uint16))
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
with self.assertRaisesRegex(IndexError, 'out of bounds'):
print(image[1000, 1000, 1000])
def test_image_numby_view(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
output_ndarray = image.numpy_view()
self.assertTrue(np.array_equal(mat, image.numpy_view()))
# The output of numpy_view() is a reference to the internal data and it's
# unwritable after creation.
with self.assertRaisesRegex(ValueError,
'assignment destination is read-only'):
output_ndarray[0, 0, 0] = 0
copied_ndarray = np.copy(output_ndarray)
copied_ndarray[0, 0, 0] = 0
def test_cropped_gray8_image(self):
w, h = random.randrange(20, 100), random.randrange(20, 100)
channels, offset = 3, 10
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2GRAY)
image = mp.Image(
image_format=mp.ImageFormat.GRAY8,
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset]))
self.assertTrue(
np.array_equal(mat[offset:-offset, offset:-offset], image.numpy_view()))
def test_cropped_rgb_image(self):
w, h = random.randrange(20, 100), random.randrange(20, 100)
channels, offset = 3, 10
mat = cv2.cvtColor(
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
cv2.COLOR_RGB2BGR)
image = mp.Image(
image_format=mp.ImageFormat.SRGB,
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset, :]))
self.assertTrue(
np.array_equal(mat[offset:-offset, offset:-offset, :],
image.numpy_view()))
# For image frames that store contiguous data, the output of numpy_view()
# points to the pixel data of the original image frame object. The life cycle
# of the data array should tie to the image frame object.
def test_image_numpy_view_with_contiguous_data(self):
w, h = 640, 480
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertTrue(image.is_contiguous())
initial_ref_count = sys.getrefcount(image)
self.assertTrue(np.array_equal(mat, image.numpy_view()))
# Get 2 data array objects and verify that the image frame's ref count is
# increased by 2.
np_view = image.numpy_view()
self.assertEqual(sys.getrefcount(image), initial_ref_count + 1)
np_view2 = image.numpy_view()
self.assertEqual(sys.getrefcount(image), initial_ref_count + 2)
del np_view
del np_view2
gc.collect()
# After the two data array objects getting destroyed, the current ref count
# should euqal to the initial ref count.
self.assertEqual(sys.getrefcount(image), initial_ref_count)
# For image frames that store non contiguous data, the output of numpy_view()
# stores a copy of the pixel data of the image frame object. The life cycle of
# the data array doesn't tie to the image frame object.
def test_image_numpy_view_with_non_contiguous_data(self):
w, h = 641, 481
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
self.assertFalse(image.is_contiguous())
initial_ref_count = sys.getrefcount(image)
self.assertTrue(np.array_equal(mat, image.numpy_view()))
np_view = image.numpy_view()
self.assertEqual(sys.getrefcount(image), initial_ref_count)
del np_view
gc.collect()
self.assertEqual(sys.getrefcount(image), initial_ref_count)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,274 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The public facing packet creator APIs."""
from typing import List, Union
import warnings
import numpy as np
from google.protobuf import message
from mediapipe.python._framework_bindings import _packet_creator
from mediapipe.python._framework_bindings import image
from mediapipe.python._framework_bindings import image_frame
from mediapipe.python._framework_bindings import packet
create_string = _packet_creator.create_string
create_bool = _packet_creator.create_bool
create_int = _packet_creator.create_int
create_int8 = _packet_creator.create_int8
create_int16 = _packet_creator.create_int16
create_int32 = _packet_creator.create_int32
create_int64 = _packet_creator.create_int64
create_uint8 = _packet_creator.create_uint8
create_uint16 = _packet_creator.create_uint16
create_uint32 = _packet_creator.create_uint32
create_uint64 = _packet_creator.create_uint64
create_float = _packet_creator.create_float
create_double = _packet_creator.create_double
create_int_array = _packet_creator.create_int_array
create_float_array = _packet_creator.create_float_array
create_int_vector = _packet_creator.create_int_vector
create_bool_vector = _packet_creator.create_bool_vector
create_float_vector = _packet_creator.create_float_vector
create_string_vector = _packet_creator.create_string_vector
create_packet_vector = _packet_creator.create_packet_vector
create_string_to_packet_map = _packet_creator.create_string_to_packet_map
create_matrix = _packet_creator.create_matrix
def create_image_frame(data: Union[image_frame.ImageFrame, np.ndarray],
*,
image_format: image_frame.ImageFormat = None,
copy: bool = None) -> packet.Packet:
"""Create a MediaPipe ImageFrame packet.
A MediaPipe ImageFrame packet can be created from an existing MediaPipe
ImageFrame object and the data will be realigned and copied into a new
ImageFrame object inside of the packet.
A MediaPipe ImageFrame packet can also be created from the raw pixel data
represented as a numpy array with one of the uint8, uint16, and float data
types. There are three data ownership modes depending on how the 'copy' arg
is set.
i) Default mode
If copy is not set, mutable data is always copied while the immutable data
is by reference.
ii) Copy mode (safe)
If copy is set to True, the data will be realigned and copied into an
ImageFrame object inside of the packet regardless the immutablity of the
original data.
iii) Reference mode (dangerous)
If copy is set to False, the data will be forced to be shared. If the data is
mutable (data.flags.writeable is True), a warning will be raised.
Args:
data: A MediaPipe ImageFrame object or the raw pixel data that is
represnted as a numpy ndarray.
image_format: One of the image_frame.ImageFormat enum types.
copy: Indicate if the packet should copy the data from the numpy nparray.
Returns:
A MediaPipe ImageFrame Packet.
Raises:
ValueError:
i) When "data" is a numpy ndarray, "image_format" is not provided or
the "data" array is not c_contiguous in the reference mode.
ii) When "data" is an ImageFrame object, the "image_format" arg doesn't
match the image format of the "data" ImageFrame object or "copy" is
explicitly set to False.
TypeError: If "image format" doesn't match "data" array's data type.
Examples:
np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8)
# Copy mode by default if the data array is writable.
image_frame_packet = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=np_array)
# Make the array unwriteable to trigger the reference mode.
np_array.flags.writeable = False
image_frame_packet = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=np_array)
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=np_array)
image_frame_packet = mp.packet_creator.create_image_frame(image_frame)
"""
if isinstance(data, image_frame.ImageFrame):
if image_format is not None and data.image_format != image_format:
raise ValueError(
'The provided image_format doesn\'t match the one from the data arg.')
if copy is not None and not copy:
# Taking a reference will make the created packet be mutable since the
# ImageFrame object can still be manipulated in Python, which voids packet
# immutability.
raise ValueError(
'Creating ImageFrame packet by taking a reference of another ImageFrame object is not supported yet.'
)
# pylint:disable=protected-access
return _packet_creator._create_image_frame_from_image_frame(data)
# pylint:enable=protected-access
else:
if image_format is None:
raise ValueError('Please provide \'image_format\' with \'data\'.')
# If copy arg is not set, copying the data if it's immutable. Otherwise,
# take a reference of the immutable data to avoid data copy.
if copy is None:
copy = True if data.flags.writeable else False
if not copy:
# TODO: Investigate why the first 2 bytes of the data has data
# corruption when "data" is not c_contiguous.
if not data.flags.c_contiguous:
raise ValueError(
'Reference mode is unavailable if \'data\' is not c_contiguous.')
if data.flags.writeable:
warnings.warn(
'\'data\' is still writeable. Taking a reference of the data to create ImageFrame packet is dangerous.',
RuntimeWarning, 2)
# pylint:disable=protected-access
return _packet_creator._create_image_frame_from_pixel_data(
image_format, data, copy)
# pylint:enable=protected-access
def create_image(data: Union[image.Image, np.ndarray],
*,
image_format: image_frame.ImageFormat = None,
copy: bool = None) -> packet.Packet:
"""Create a MediaPipe Image packet.
A MediaPipe Image packet can be created from an existing MediaPipe
Image object and the data will be realigned and copied into a new
Image object inside of the packet.
A MediaPipe Image packet can also be created from the raw pixel data
represented as a numpy array with one of the uint8, uint16, and float data
types. There are three data ownership modes depending on how the 'copy' arg
is set.
i) Default mode
If copy is not set, mutable data is always copied while the immutable data
is by reference.
ii) Copy mode (safe)
If copy is set to True, the data will be realigned and copied into an
Image object inside of the packet regardless the immutablity of the
original data.
iii) Reference mode (dangerous)
If copy is set to False, the data will be forced to be shared. If the data is
mutable (data.flags.writeable is True), a warning will be raised.
Args:
data: A MediaPipe Image object or the raw pixel data that is represnted as a
numpy ndarray.
image_format: One of the mp.ImageFormat enum types.
copy: Indicate if the packet should copy the data from the numpy nparray.
Returns:
A MediaPipe Image Packet.
Raises:
ValueError:
i) When "data" is a numpy ndarray, "image_format" is not provided or
the "data" array is not c_contiguous in the reference mode.
ii) When "data" is an Image object, the "image_format" arg doesn't
match the image format of the "data" Image object or "copy" is
explicitly set to False.
TypeError: If "image format" doesn't match "data" array's data type.
Examples:
np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8)
# Copy mode by default if the data array is writable.
image_packet = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=np_array)
# Make the array unwriteable to trigger the reference mode.
np_array.flags.writeable = False
image_packet = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=np_array)
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np_array)
image_packet = mp.packet_creator.create_image(image)
"""
if isinstance(data, image.Image):
if image_format is not None and data.image_format != image_format:
raise ValueError(
'The provided image_format doesn\'t match the one from the data arg.')
if copy is not None and not copy:
# Taking a reference will make the created packet be mutable since the
# Image object can still be manipulated in Python, which voids packet
# immutability.
raise ValueError(
'Creating Image packet by taking a reference of another Image object is not supported yet.'
)
# pylint:disable=protected-access
return _packet_creator._create_image_from_image(data)
# pylint:enable=protected-access
else:
if image_format is None:
raise ValueError('Please provide \'image_format\' with \'data\'.')
# If copy arg is not set, copying the data if it's immutable. Otherwise,
# take a reference of the immutable data to avoid data copy.
if copy is None:
copy = True if data.flags.writeable else False
if not copy:
# TODO: Investigate why the first 2 bytes of the data has data
# corruption when "data" is not c_contiguous.
if not data.flags.c_contiguous:
raise ValueError(
'Reference mode is unavailable if \'data\' is not c_contiguous.')
if data.flags.writeable:
warnings.warn(
'\'data\' is still writeable. Taking a reference of the data to create Image packet is dangerous.',
RuntimeWarning, 2)
# pylint:disable=protected-access
return _packet_creator._create_image_from_pixel_data(
image_format, data, copy)
# pylint:enable=protected-access
def create_proto(proto_message: message.Message) -> packet.Packet:
"""Create a MediaPipe protobuf message packet.
Args:
proto_message: A Python protobuf message.
Returns:
A MediaPipe protobuf message Packet.
Raises:
RuntimeError: If the protobuf message type is not registered in MediaPipe.
Examples:
detection = detection_pb2.Detection()
text_format.Parse('score: 0.5', detection)
packet = mp.packet_creator.create_proto(detection)
output_detection = mp.packet_getter.get_proto(packet)
"""
# pylint:disable=protected-access
return _packet_creator._create_proto(proto_message.DESCRIPTOR.full_name,
proto_message.SerializeToString())
# pylint:enable=protected-access
def create_proto_vector(message_list: List[message.Message]) -> packet.Packet:
raise NotImplementedError('create_proto_vector is not implemented.')

View File

@@ -0,0 +1,118 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The public facing packet getter APIs."""
from typing import List, Type
from google.protobuf import message
from google.protobuf import symbol_database
from mediapipe.python._framework_bindings import _packet_getter
from mediapipe.python._framework_bindings import packet as mp_packet
get_str = _packet_getter.get_str
get_bytes = _packet_getter.get_bytes
get_bool = _packet_getter.get_bool
get_int = _packet_getter.get_int
get_uint = _packet_getter.get_uint
get_float = _packet_getter.get_float
get_int_list = _packet_getter.get_int_list
get_bool_list = _packet_getter.get_bool_list
get_float_list = _packet_getter.get_float_list
get_str_list = _packet_getter.get_str_list
get_packet_list = _packet_getter.get_packet_list
get_str_to_packet_dict = _packet_getter.get_str_to_packet_dict
get_image = _packet_getter.get_image
get_image_frame = _packet_getter.get_image_frame
get_matrix = _packet_getter.get_matrix
def get_proto(packet: mp_packet.Packet) -> Type[message.Message]:
"""Get the content of a MediaPipe proto Packet as a proto message.
Args:
packet: A MediaPipe proto Packet.
Returns:
A proto message.
Raises:
TypeError: If the message descriptor can't be found by type name.
Examples:
detection = detection_pb2.Detection()
text_format.Parse('score: 0.5', detection)
proto_packet = mp.packet_creator.create_proto(detection)
output_proto = mp.packet_getter.get_proto(proto_packet)
"""
# pylint:disable=protected-access
proto_type_name = _packet_getter._get_proto_type_name(packet)
# pylint:enable=protected-access
try:
descriptor = symbol_database.Default().pool.FindMessageTypeByName(
proto_type_name)
except KeyError:
raise TypeError('Can not find message descriptor by type name: %s' %
proto_type_name)
message_class = symbol_database.Default().GetPrototype(descriptor)
# pylint:disable=protected-access
serialized_proto = _packet_getter._get_serialized_proto(packet)
# pylint:enable=protected-access
proto_message = message_class()
proto_message.ParseFromString(serialized_proto)
return proto_message
def get_proto_list(packet: mp_packet.Packet) -> List[message.Message]:
"""Get the content of a MediaPipe proto vector Packet as a proto message list.
Args:
packet: A MediaPipe proto vector Packet.
Returns:
A proto message list.
Raises:
TypeError: If the message descriptor can't be found by type name.
Examples:
proto_list = mp.packet_getter.get_proto_list(protos_packet)
"""
# pylint:disable=protected-access
vector_size = _packet_getter._get_proto_vector_size(packet)
# pylint:enable=protected-access
# Return empty list if the proto vector is empty.
if vector_size == 0:
return []
# pylint:disable=protected-access
proto_type_name = _packet_getter._get_proto_vector_element_type_name(packet)
# pylint:enable=protected-access
try:
descriptor = symbol_database.Default().pool.FindMessageTypeByName(
proto_type_name)
except KeyError:
raise TypeError('Can not find message descriptor by type name: %s' %
proto_type_name)
message_class = symbol_database.Default().GetPrototype(descriptor)
# pylint:disable=protected-access
serialized_protos = _packet_getter._get_serialized_proto_list(packet)
# pylint:enable=protected-access
proto_message_list = []
for serialized_proto in serialized_protos:
proto_message = message_class()
proto_message.ParseFromString(serialized_proto)
proto_message_list.append(proto_message)
return proto_message_list

View File

@@ -0,0 +1,506 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python._framework_bindings.packet."""
import gc
import random
import sys
from absl.testing import absltest
import mediapipe as mp
import numpy as np
from google.protobuf import text_format
from mediapipe.framework.formats import detection_pb2
class PacketTest(absltest.TestCase):
def test_empty_packet(self):
p = mp.Packet()
self.assertTrue(p.is_empty())
def test_boolean_packet(self):
p = mp.packet_creator.create_bool(True)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_bool(p), True)
self.assertEqual(p.timestamp, 0)
def test_int_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_int(2**32)
p = mp.packet_creator.create_int(42)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p), 42)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_int(np.intc(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_int8_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_int8(2**7)
p = mp.packet_creator.create_int8(2**7 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p), 2**7 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_int8(np.int8(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_int16_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_int16(2**15)
p = mp.packet_creator.create_int16(2**15 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p), 2**15 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_int16(np.int16(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_int32_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_int32(2**31)
p = mp.packet_creator.create_int32(2**31 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p), 2**31 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_int32(np.int32(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_int64_packet(self):
p = mp.packet_creator.create_int64(2**63 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p), 2**63 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_int64(np.int64(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_int(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_uint8_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_uint8(2**8)
p = mp.packet_creator.create_uint8(2**8 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p), 2**8 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_uint8(np.uint8(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_uint16_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_uint16(2**16)
p = mp.packet_creator.create_uint16(2**16 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p), 2**16 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_uint16(np.uint16(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_uint32_packet(self):
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
p = mp.packet_creator.create_uint32(2**32)
p = mp.packet_creator.create_uint32(2**32 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p), 2**32 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_uint32(np.uint32(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_uint64_packet(self):
p = mp.packet_creator.create_uint64(2**64 - 1)
p.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p), 2**64 - 1)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_uint64(np.uint64(1))
p2.timestamp = 0
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
self.assertEqual(p2.timestamp, 0)
def test_float_packet(self):
p = mp.packet_creator.create_float(0.42)
p.timestamp = 0
self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_float(np.float(0.42))
p2.timestamp = 0
self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42)
self.assertEqual(p2.timestamp, 0)
def test_double_packet(self):
p = mp.packet_creator.create_double(0.42)
p.timestamp = 0
self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42)
self.assertEqual(p.timestamp, 0)
p2 = mp.packet_creator.create_double(np.double(0.42))
p2.timestamp = 0
self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42)
self.assertEqual(p2.timestamp, 0)
def test_detection_proto_packet(self):
detection = detection_pb2.Detection()
text_format.Parse('score: 0.5', detection)
p = mp.packet_creator.create_proto(detection).at(100)
def test_string_packet(self):
p = mp.packet_creator.create_string('abc').at(100)
self.assertEqual(mp.packet_getter.get_str(p), 'abc')
self.assertEqual(p.timestamp, 100)
p.timestamp = 200
self.assertEqual(p.timestamp, 200)
def test_bytes_packet(self):
p = mp.packet_creator.create_string(b'xd0\xba\xd0').at(300)
self.assertEqual(mp.packet_getter.get_bytes(p), b'xd0\xba\xd0')
self.assertEqual(p.timestamp, 300)
def test_int_array_packet(self):
p = mp.packet_creator.create_int_array([1, 2, 3]).at(100)
self.assertEqual(p.timestamp, 100)
def test_float_array_packet(self):
p = mp.packet_creator.create_float_array([0.1, 0.2, 0.3]).at(100)
self.assertEqual(p.timestamp, 100)
def test_int_vector_packet(self):
p = mp.packet_creator.create_int_vector([1, 2, 3]).at(100)
self.assertEqual(mp.packet_getter.get_int_list(p), [1, 2, 3])
self.assertEqual(p.timestamp, 100)
def test_float_vector_packet(self):
p = mp.packet_creator.create_float_vector([0.1, 0.2, 0.3]).at(100)
output_list = mp.packet_getter.get_float_list(p)
self.assertAlmostEqual(output_list[0], 0.1)
self.assertAlmostEqual(output_list[1], 0.2)
self.assertAlmostEqual(output_list[2], 0.3)
self.assertEqual(p.timestamp, 100)
def test_string_vector_packet(self):
p = mp.packet_creator.create_string_vector(['a', 'b', 'c']).at(100)
output_list = mp.packet_getter.get_str_list(p)
self.assertEqual(output_list[0], 'a')
self.assertEqual(output_list[1], 'b')
self.assertEqual(output_list[2], 'c')
self.assertEqual(p.timestamp, 100)
def test_packet_vector_packet(self):
p = mp.packet_creator.create_packet_vector([
mp.packet_creator.create_float(0.42),
mp.packet_creator.create_int(42),
mp.packet_creator.create_string('42')
]).at(100)
output_list = mp.packet_getter.get_packet_list(p)
self.assertAlmostEqual(mp.packet_getter.get_float(output_list[0]), 0.42)
self.assertEqual(mp.packet_getter.get_int(output_list[1]), 42)
self.assertEqual(mp.packet_getter.get_str(output_list[2]), '42')
self.assertEqual(p.timestamp, 100)
def test_string_to_packet_map_packet(self):
p = mp.packet_creator.create_string_to_packet_map({
'float': mp.packet_creator.create_float(0.42),
'int': mp.packet_creator.create_int(42),
'string': mp.packet_creator.create_string('42')
}).at(100)
output_list = mp.packet_getter.get_str_to_packet_dict(p)
self.assertAlmostEqual(
mp.packet_getter.get_float(output_list['float']), 0.42)
self.assertEqual(mp.packet_getter.get_int(output_list['int']), 42)
self.assertEqual(mp.packet_getter.get_str(output_list['string']), '42')
self.assertEqual(p.timestamp, 100)
def test_uint8_image_packet(self):
uint8_img = np.random.randint(
2**8 - 1,
size=(random.randrange(3, 100), random.randrange(3, 100), 3),
dtype=np.uint8)
image_frame_packet = mp.packet_creator.create_image_frame(
mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=uint8_img))
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint8_img))
image_packet = mp.packet_creator.create_image(
mp.Image(image_format=mp.ImageFormat.SRGB, data=uint8_img))
output_image = mp.packet_getter.get_image(image_packet)
self.assertTrue(np.array_equal(output_image.numpy_view(), uint8_img))
def test_uint16_image_packet(self):
uint16_img = np.random.randint(
2**16 - 1,
size=(random.randrange(3, 100), random.randrange(3, 100), 4),
dtype=np.uint16)
image_frame_packet = mp.packet_creator.create_image_frame(
mp.ImageFrame(image_format=mp.ImageFormat.SRGBA64, data=uint16_img))
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint16_img))
image_packet = mp.packet_creator.create_image(
mp.Image(image_format=mp.ImageFormat.SRGBA64, data=uint16_img))
output_image = mp.packet_getter.get_image(image_packet)
self.assertTrue(np.array_equal(output_image.numpy_view(), uint16_img))
def test_float_image_frame_packet(self):
float_img = np.float32(
np.random.random_sample(
(random.randrange(3, 100), random.randrange(3, 100), 2)))
image_frame_packet = mp.packet_creator.create_image_frame(
mp.ImageFrame(image_format=mp.ImageFormat.VEC32F2, data=float_img))
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
self.assertTrue(np.allclose(output_image_frame.numpy_view(), float_img))
image_packet = mp.packet_creator.create_image(
mp.Image(image_format=mp.ImageFormat.VEC32F2, data=float_img))
output_image = mp.packet_getter.get_image(image_packet)
self.assertTrue(np.array_equal(output_image.numpy_view(), float_img))
def test_image_frame_packet_creation_copy_mode(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
# rgb_data is c_contiguous.
self.assertTrue(rgb_data.flags.c_contiguous)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
rgb_data = rgb_data[:, :, ::-1]
# rgb_data is now not c_contiguous. But, copy mode shouldn't be affected.
self.assertFalse(rgb_data.flags.c_contiguous)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
output_frame = mp.packet_getter.get_image_frame(p)
self.assertEqual(output_frame.height, h)
self.assertEqual(output_frame.width, w)
self.assertEqual(output_frame.channels, channels)
self.assertTrue(np.array_equal(output_frame.numpy_view(), rgb_data))
del p
del output_frame
gc.collect()
# Destroying the packet also doesn't affect the ref count becuase of the
# copy mode.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
def test_image_frame_packet_creation_reference_mode(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
rgb_data.flags.writeable = False
initial_ref_count = sys.getrefcount(rgb_data)
image_frame_packet = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# Reference mode increase the ref count of the rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
del image_frame_packet
gc.collect()
# Deleting image_frame_packet should decrese the ref count of rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
rgb_data_copy = np.copy(rgb_data)
# rgb_data_copy is a copy of rgb_data and should not increase the ref count.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
text_config = """
node {
calculator: 'PassThroughCalculator'
input_side_packet: "in"
output_side_packet: "out"
}
"""
graph = mp.CalculatorGraph(graph_config=text_config)
graph.start_run(
input_side_packets={
'in':
mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
})
# reference mode increase the ref count of the rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
graph.wait_until_done()
output_packet = graph.get_output_side_packet('out')
del rgb_data
del graph
gc.collect()
# The pixel data of the output image frame packet should still be valid
# after the graph and the original rgb_data data are deleted.
self.assertTrue(
np.array_equal(
mp.packet_getter.get_image_frame(output_packet).numpy_view(),
rgb_data_copy))
def test_image_frame_packet_copy_creation_with_cropping(self):
w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3
channels, offset = 3, 10
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image_frame(
image_format=mp.ImageFormat.SRGB,
data=rgb_data[offset:-offset, offset:-offset, :])
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
output_frame = mp.packet_getter.get_image_frame(p)
self.assertEqual(output_frame.height, h - 2 * offset)
self.assertEqual(output_frame.width, w - 2 * offset)
self.assertEqual(output_frame.channels, channels)
self.assertTrue(
np.array_equal(rgb_data[offset:-offset, offset:-offset, :],
output_frame.numpy_view()))
del p
del output_frame
gc.collect()
# Destroying the packet also doesn't affect the ref count becuase of the
# copy mode.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
def test_image_packet_creation_copy_mode(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
# rgb_data is c_contiguous.
self.assertTrue(rgb_data.flags.c_contiguous)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
rgb_data = rgb_data[:, :, ::-1]
# rgb_data is now not c_contiguous. But, copy mode shouldn't be affected.
self.assertFalse(rgb_data.flags.c_contiguous)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
output_image = mp.packet_getter.get_image(p)
self.assertEqual(output_image.height, h)
self.assertEqual(output_image.width, w)
self.assertEqual(output_image.channels, channels)
self.assertTrue(np.array_equal(output_image.numpy_view(), rgb_data))
del p
del output_image
gc.collect()
# Destroying the packet also doesn't affect the ref count becuase of the
# copy mode.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
def test_image_packet_creation_reference_mode(self):
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
rgb_data.flags.writeable = False
initial_ref_count = sys.getrefcount(rgb_data)
image_packet = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
# Reference mode increase the ref count of the rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
del image_packet
gc.collect()
# Deleting image_packet should decrese the ref count of rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
rgb_data_copy = np.copy(rgb_data)
# rgb_data_copy is a copy of rgb_data and should not increase the ref count.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
text_config = """
node {
calculator: 'PassThroughCalculator'
input_side_packet: "in"
output_side_packet: "out"
}
"""
graph = mp.CalculatorGraph(graph_config=text_config)
graph.start_run(
input_side_packets={
'in':
mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB, data=rgb_data)
})
# reference mode increase the ref count of the rgb_data by 1.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
graph.wait_until_done()
output_packet = graph.get_output_side_packet('out')
del rgb_data
del graph
gc.collect()
# The pixel data of the output image frame packet should still be valid
# after the graph and the original rgb_data data are deleted.
self.assertTrue(
np.array_equal(
mp.packet_getter.get_image(output_packet).numpy_view(),
rgb_data_copy))
def test_image_packet_copy_creation_with_cropping(self):
w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3
channels, offset = 3, 10
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
initial_ref_count = sys.getrefcount(rgb_data)
p = mp.packet_creator.create_image(
image_format=mp.ImageFormat.SRGB,
data=rgb_data[offset:-offset, offset:-offset, :])
# copy mode doesn't increase the ref count of the data.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
output_image = mp.packet_getter.get_image(p)
self.assertEqual(output_image.height, h - 2 * offset)
self.assertEqual(output_image.width, w - 2 * offset)
self.assertEqual(output_image.channels, channels)
self.assertTrue(
np.array_equal(rgb_data[offset:-offset, offset:-offset, :],
output_image.numpy_view()))
del p
del output_image
gc.collect()
# Destroying the packet also doesn't affect the ref count becuase of the
# copy mode.
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
def test_matrix_packet(self):
np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]])
initial_ref_count = sys.getrefcount(np_matrix)
p = mp.packet_creator.create_matrix(np_matrix)
# Copy mode should not increase the ref count of np_matrix.
self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix))
output_matrix = mp.packet_getter.get_matrix(p)
del np_matrix
gc.collect()
self.assertTrue(
np.allclose(output_matrix, np.array([[.1, .2, .3], [.4, .5, .6]])))
def test_matrix_packet_with_non_c_contiguous_data(self):
np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1]
# np_matrix is not c_contiguous.
self.assertFalse(np_matrix.flags.c_contiguous)
p = mp.packet_creator.create_matrix(np_matrix)
initial_ref_count = sys.getrefcount(np_matrix)
# Copy mode should not increase the ref count of np_matrix.
self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix))
output_matrix = mp.packet_getter.get_matrix(p)
del np_matrix
gc.collect()
self.assertTrue(
np.allclose(output_matrix,
np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1]))
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,543 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe SolutionBase module.
MediaPipe SolutionBase is the common base class for the high-level MediaPipe
Solution APIs such as BlazeFace, hand tracking, and BlazePose. The SolutionBase
class contains the shared logic among the high-level Solution APIs including
graph initialization, processing image/audio data, and graph shutdown. Thus,
users can easily create new MediaPipe Solution APIs on top of the SolutionBase
class.
"""
import collections
import enum
import os
from typing import Any, Iterable, List, Mapping, NamedTuple, Optional, Union
import numpy as np
from google.protobuf import descriptor
from google.protobuf import message
# resources dependency
# pylint: disable=unused-import
# pylint: enable=unused-import
from mediapipe.framework import calculator_pb2
# pylint: disable=unused-import
from mediapipe.framework.formats import detection_pb2
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
from mediapipe.calculators.image import image_transformation_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
from mediapipe.framework.formats import classification_pb2
from mediapipe.framework.formats import landmark_pb2
from mediapipe.framework.formats import rect_pb2
from mediapipe.modules.objectron.calculators import annotation_data_pb2
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python._framework_bindings import calculator_graph
from mediapipe.python._framework_bindings import image_frame
from mediapipe.python._framework_bindings import packet
from mediapipe.python._framework_bindings import resource_util
from mediapipe.python._framework_bindings import validated_graph_config
import mediapipe.python.packet_creator as packet_creator
import mediapipe.python.packet_getter as packet_getter
RGB_CHANNELS = 3
# TODO: Enable calculator options modification for more calculators.
CALCULATOR_TO_OPTIONS = {
'ConstantSidePacketCalculator':
constant_side_packet_calculator_pb2.ConstantSidePacketCalculatorOptions,
'ImageTransformationCalculator':
image_transformation_calculator_pb2
.ImageTransformationCalculatorOptions,
'LandmarksSmoothingCalculator':
landmarks_smoothing_calculator_pb2.LandmarksSmoothingCalculatorOptions,
'LogicCalculator':
logic_calculator_pb2.LogicCalculatorOptions,
'ThresholdingCalculator':
thresholding_calculator_pb2.ThresholdingCalculatorOptions,
'TensorsToDetectionsCalculator':
tensors_to_detections_calculator_pb2
.TensorsToDetectionsCalculatorOptions,
'Lift2DFrameAnnotationTo3DCalculator':
lift_2d_frame_annotation_to_3d_calculator_pb2
.Lift2DFrameAnnotationTo3DCalculatorOptions,
}
# TODO: Support more packet data types, such as "Any" type.
@enum.unique
class _PacketDataType(enum.Enum):
"""The packet data types supported by the SolutionBase class."""
STRING = 'string'
BOOL = 'bool'
BOOL_LIST = 'bool_list'
INT = 'int'
FLOAT = 'float'
FLOAT_LIST = 'float_list'
AUDIO = 'matrix'
IMAGE = 'image'
IMAGE_FRAME = 'image_frame'
PROTO = 'proto'
PROTO_LIST = 'proto_list'
@staticmethod
def from_registered_name(registered_name: str) -> '_PacketDataType':
return NAME_TO_TYPE[registered_name]
NAME_TO_TYPE: Mapping[str, '_PacketDataType'] = {
'string':
_PacketDataType.STRING,
'bool':
_PacketDataType.BOOL,
'::std::vector<bool>':
_PacketDataType.BOOL_LIST,
'int':
_PacketDataType.INT,
'float':
_PacketDataType.FLOAT,
'::std::vector<float>':
_PacketDataType.FLOAT_LIST,
'::mediapipe::Matrix':
_PacketDataType.AUDIO,
'::mediapipe::ImageFrame':
_PacketDataType.IMAGE_FRAME,
'::mediapipe::Classification':
_PacketDataType.PROTO,
'::mediapipe::ClassificationList':
_PacketDataType.PROTO,
'::mediapipe::ClassificationListCollection':
_PacketDataType.PROTO,
'::mediapipe::Detection':
_PacketDataType.PROTO,
'::mediapipe::DetectionList':
_PacketDataType.PROTO,
'::mediapipe::Landmark':
_PacketDataType.PROTO,
'::mediapipe::LandmarkList':
_PacketDataType.PROTO,
'::mediapipe::LandmarkListCollection':
_PacketDataType.PROTO,
'::mediapipe::NormalizedLandmark':
_PacketDataType.PROTO,
'::mediapipe::FrameAnnotation':
_PacketDataType.PROTO,
'::mediapipe::Trigger':
_PacketDataType.PROTO,
'::mediapipe::Rect':
_PacketDataType.PROTO,
'::mediapipe::NormalizedRect':
_PacketDataType.PROTO,
'::mediapipe::NormalizedLandmarkList':
_PacketDataType.PROTO,
'::mediapipe::NormalizedLandmarkListCollection':
_PacketDataType.PROTO,
'::mediapipe::Image':
_PacketDataType.IMAGE,
'::std::vector<::mediapipe::Classification>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::ClassificationList>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::Detection>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::DetectionList>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::Landmark>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::LandmarkList>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::NormalizedLandmark>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::NormalizedLandmarkList>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::Rect>':
_PacketDataType.PROTO_LIST,
'::std::vector<::mediapipe::NormalizedRect>':
_PacketDataType.PROTO_LIST,
}
class SolutionBase:
"""The common base class for the high-level MediaPipe Solution APIs.
The SolutionBase class contains the shared logic among the high-level solution
APIs including graph initialization, processing image/audio data, and graph
shutdown.
Example usage:
with solution_base.SolutionBase(
binary_graph_path='mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb',
side_inputs={'num_hands': 2}) as hand_tracker:
# Read an image and convert the BGR image to RGB.
input_image = cv2.cvtColor(cv2.imread('/tmp/hand.png'), COLOR_BGR2RGB)
results = hand_tracker.process(input_image)
print(results.palm_detections)
print(results.multi_hand_landmarks)
"""
def __init__(
self,
binary_graph_path: Optional[str] = None,
graph_config: Optional[calculator_pb2.CalculatorGraphConfig] = None,
calculator_params: Optional[Mapping[str, Any]] = None,
side_inputs: Optional[Mapping[str, Any]] = None,
outputs: Optional[List[str]] = None):
"""Initializes the SolutionBase object.
Args:
binary_graph_path: The path to a binary mediapipe graph file (.binarypb).
graph_config: A CalculatorGraphConfig proto message or its text proto
format.
calculator_params: A mapping from the
{calculator_name}.{options_field_name} str to the field value.
side_inputs: A mapping from the side packet name to the packet raw data.
outputs: A list of the graph output stream names to observe. If the list
is empty, all the output streams listed in the graph config will be
automatically observed by default.
Raises:
FileNotFoundError: If the binary graph file can't be found.
RuntimeError: If the underlying calculator graph can't be successfully
initialized or started.
ValueError: If any of the following:
a) If not exactly one of 'binary_graph_path' or 'graph_config' arguments
is provided.
b) If the graph validation process contains error.
c) If the registered type name of the streams and side packets can't be
found.
d) If the calculator options of the calculator listed in
calculator_params is not allowed to be modified.
e) If the calculator options field is a repeated field but the field
value to be set is not iterable.
"""
if bool(binary_graph_path) == bool(graph_config):
raise ValueError(
"Must provide exactly one of 'binary_graph_path' or 'graph_config'.")
# MediaPipe package root path
root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-3])
resource_util.set_resource_dir(root_path)
validated_graph = validated_graph_config.ValidatedGraphConfig()
if binary_graph_path:
validated_graph.initialize(
binary_graph_path=os.path.join(root_path, binary_graph_path))
else:
validated_graph.initialize(graph_config=graph_config)
canonical_graph_config_proto = self._initialize_graph_interface(
validated_graph, side_inputs, outputs)
if calculator_params:
self._modify_calculator_options(canonical_graph_config_proto,
calculator_params)
self._graph = calculator_graph.CalculatorGraph(
graph_config=canonical_graph_config_proto)
self._simulated_timestamp = 0
self._graph_outputs = {}
def callback(stream_name: str, output_packet: packet.Packet) -> None:
self._graph_outputs[stream_name] = output_packet
for stream_name in self._output_stream_type_info.keys():
self._graph.observe_output_stream(stream_name, callback, True)
self._input_side_packets = {
name: self._make_packet(self._side_input_type_info[name], data)
for name, data in (side_inputs or {}).items()
}
self._graph.start_run(self._input_side_packets)
# TODO: Use "inspect.Parameter" to fetch the input argument names and
# types from "_input_stream_type_info" and then auto generate the process
# method signature by "inspect.Signature" in __init__.
def process(
self, input_data: Union[np.ndarray, Mapping[str, Union[np.ndarray,
message.Message]]]
) -> NamedTuple:
"""Processes a set of RGB image data and output SolutionOutputs.
Args:
input_data: Either a single numpy ndarray object representing the solo
image input of a graph or a mapping from the stream name to the image or
proto data that represents every input streams of a graph.
Raises:
NotImplementedError: If input_data contains audio data or a list of proto
objects.
RuntimeError: If the underlying graph occurs any error.
ValueError: If the input image data is not three channel RGB.
Returns:
A NamedTuple object that contains the output data of a graph run.
The field names in the NamedTuple object are mapping to the graph output
stream names.
Examples:
solution = solution_base.SolutionBase(graph_config=hand_landmark_graph)
results = solution.process(cv2.imread('/tmp/hand0.png')[:, :, ::-1])
print(results.detection)
results = solution.process(
{'video_in' : cv2.imread('/tmp/hand1.png')[:, :, ::-1]})
print(results.hand_landmarks)
"""
self._graph_outputs.clear()
if isinstance(input_data, np.ndarray):
if len(self._input_stream_type_info.keys()) != 1:
raise ValueError(
"Can't process single image input since the graph has more than one input streams."
)
input_dict = {next(iter(self._input_stream_type_info)): input_data}
else:
input_dict = input_data
# Set the timestamp increment to 33333 us to simulate the 30 fps video
# input.
self._simulated_timestamp += 33333
for stream_name, data in input_dict.items():
input_stream_type = self._input_stream_type_info[stream_name]
if (input_stream_type == _PacketDataType.PROTO_LIST or
input_stream_type == _PacketDataType.AUDIO):
# TODO: Support audio data.
raise NotImplementedError(
f'SolutionBase can only process non-audio and non-proto-list data. '
f'{self._input_stream_type_info[stream_name].name} '
f'type is not supported yet.')
elif (input_stream_type == _PacketDataType.IMAGE_FRAME or
input_stream_type == _PacketDataType.IMAGE):
if data.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
self._graph.add_packet_to_input_stream(
stream=stream_name,
packet=self._make_packet(input_stream_type,
data).at(self._simulated_timestamp))
else:
self._graph.add_packet_to_input_stream(
stream=stream_name,
packet=self._make_packet(input_stream_type,
data).at(self._simulated_timestamp))
self._graph.wait_until_idle()
# Create a NamedTuple object where the field names are mapping to the graph
# output stream names.
solution_outputs = collections.namedtuple(
'SolutionOutputs', self._output_stream_type_info.keys())
for stream_name in self._output_stream_type_info.keys():
if stream_name in self._graph_outputs:
setattr(
solution_outputs, stream_name,
self._get_packet_content(self._output_stream_type_info[stream_name],
self._graph_outputs[stream_name]))
else:
setattr(solution_outputs, stream_name, None)
return solution_outputs
def close(self) -> None:
"""Closes all the input sources and the graph."""
self._graph.close()
self._graph = None
self._input_stream_type_info = None
self._output_stream_type_info = None
def reset(self) -> None:
"""Resets the graph for another run."""
if self._graph:
self._graph.close()
self._graph.start_run(self._input_side_packets)
def _initialize_graph_interface(
self,
validated_graph: validated_graph_config.ValidatedGraphConfig,
side_inputs: Optional[Mapping[str, Any]] = None,
outputs: Optional[List[str]] = None):
"""Gets graph interface type information and returns the canonical graph config proto."""
canonical_graph_config_proto = calculator_pb2.CalculatorGraphConfig()
canonical_graph_config_proto.ParseFromString(validated_graph.binary_config)
# Gets name from a 'TAG:index:name' str.
def get_name(tag_index_name):
return tag_index_name.split(':')[-1]
# Gets the packet type information of the input streams and output streams
# from the validated calculator graph. The mappings from the stream names to
# the packet data types is for deciding which packet creator and getter
# methods to call in the process() method.
def get_stream_packet_type(packet_tag_index_name):
return _PacketDataType.from_registered_name(
validated_graph.registered_stream_type_name(
get_name(packet_tag_index_name)))
self._input_stream_type_info = {
get_name(tag_index_name): get_stream_packet_type(tag_index_name)
for tag_index_name in canonical_graph_config_proto.input_stream
}
if not outputs:
output_streams = canonical_graph_config_proto.output_stream
else:
output_streams = outputs
self._output_stream_type_info = {
get_name(tag_index_name): get_stream_packet_type(tag_index_name)
for tag_index_name in output_streams
}
# Gets the packet type information of the input side packets from the
# validated calculator graph. The mappings from the side packet names to the
# packet data types is for making the input_side_packets dict for graph
# start_run().
def get_side_packet_type(packet_tag_index_name):
return _PacketDataType.from_registered_name(
validated_graph.registered_side_packet_type_name(
get_name(packet_tag_index_name)))
self._side_input_type_info = {
get_name(tag_index_name): get_side_packet_type(tag_index_name)
for tag_index_name, _ in (side_inputs or {}).items()
}
return canonical_graph_config_proto
def _modify_calculator_options(
self, calculator_graph_config: calculator_pb2.CalculatorGraphConfig,
calculator_params: Mapping[str, Any]) -> None:
"""Modifies the CalculatorOptions of the calculators listed in calculator_params."""
# Reorganizes the calculator options field data by calculator name and puts
# all the field data of the same calculator in a list.
def generate_nested_calculator_params(flat_map):
nested_map = {}
for compound_name, field_value in flat_map.items():
calculator_and_field_name = compound_name.split('.')
if len(calculator_and_field_name) != 2:
raise ValueError(
f'The key "{compound_name}" in the calculator_params is invalid.')
calculator_name = calculator_and_field_name[0]
field_name = calculator_and_field_name[1]
if calculator_name in nested_map:
nested_map[calculator_name].append((field_name, field_value))
else:
nested_map[calculator_name] = [(field_name, field_value)]
return nested_map
def modify_options_fields(calculator_options, options_field_list):
for field_name, field_value in options_field_list:
if field_value is None:
calculator_options.ClearField(field_name)
else:
field_label = calculator_options.DESCRIPTOR.fields_by_name[
field_name].label
if field_label is descriptor.FieldDescriptor.LABEL_REPEATED:
if not isinstance(field_value, Iterable):
raise ValueError(
f'{field_name} is a repeated proto field but the value '
f'to be set is {type(field_value)}, which is not iterable.')
# TODO: Support resetting the entire repeated field
# (array-option) and changing the individual values in the repeated
# field (array-element-option).
calculator_options.ClearField(field_name)
for elem in field_value:
getattr(calculator_options, field_name).append(elem)
else:
setattr(calculator_options, field_name, field_value)
nested_calculator_params = generate_nested_calculator_params(
calculator_params)
num_modified = 0
for node in calculator_graph_config.node:
if node.name not in nested_calculator_params:
continue
options_type = CALCULATOR_TO_OPTIONS.get(node.calculator)
if options_type is None:
raise ValueError(
f'Modifying the calculator options of {node.name} is not supported.'
)
options_field_list = nested_calculator_params[node.name]
if node.HasField('options') and node.node_options:
raise ValueError(
f'Cannot modify the calculator options of {node.name} because it '
f'has both options and node_options fields.')
if node.node_options:
# The "node_options" case for the proto3 syntax.
node_options_modified = False
for elem in node.node_options:
type_name = elem.type_url.split('/')[-1]
if type_name == options_type.DESCRIPTOR.full_name:
calculator_options = options_type.FromString(elem.value)
modify_options_fields(calculator_options, options_field_list)
elem.value = calculator_options.SerializeToString()
node_options_modified = True
break
# There is no existing node_options being modified. Add a new
# node_options instead.
if not node_options_modified:
calculator_options = options_type()
modify_options_fields(calculator_options, options_field_list)
node.node_options.add().Pack(calculator_options)
else:
# The "options" case for the proto2 syntax as well as the fallback
# when the calculator doesn't have either "options" or "node_options".
modify_options_fields(node.options.Extensions[options_type.ext],
options_field_list)
num_modified += 1
# Exits the loop early when every elements in nested_calculator_params
# have been visited.
if num_modified == len(nested_calculator_params):
break
def _make_packet(self, packet_data_type: _PacketDataType,
data: Any) -> packet.Packet:
if (packet_data_type == _PacketDataType.IMAGE_FRAME or
packet_data_type == _PacketDataType.IMAGE):
return getattr(packet_creator, 'create_' + packet_data_type.value)(
data, image_format=image_frame.ImageFormat.SRGB)
else:
return getattr(packet_creator, 'create_' + packet_data_type.value)(data)
def _get_packet_content(self, packet_data_type: _PacketDataType,
output_packet: packet.Packet) -> Any:
"""Gets packet content from a packet by type.
Args:
packet_data_type: The supported packet data type.
output_packet: The packet to get content from.
Returns:
Packet content by packet data type. None to indicate "no output".
"""
if output_packet.is_empty():
return None
if packet_data_type == _PacketDataType.STRING:
return packet_getter.get_str(output_packet)
elif (packet_data_type == _PacketDataType.IMAGE_FRAME or
packet_data_type == _PacketDataType.IMAGE):
return getattr(packet_getter, 'get_' +
packet_data_type.value)(output_packet).numpy_view()
else:
return getattr(packet_getter, 'get_' + packet_data_type.value)(
output_packet)
def __enter__(self):
"""A "with" statement support."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Closes all the input sources and the graph."""
self.close()

View File

@@ -0,0 +1,367 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solution_base."""
from absl.testing import absltest
from absl.testing import parameterized
import numpy as np
from google.protobuf import text_format
from mediapipe.framework import calculator_pb2
from mediapipe.framework.formats import detection_pb2
from mediapipe.python import solution_base
CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG = """
input_stream: 'image_in'
output_stream: 'image_out'
node {
name: 'ImageTransformation'
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
output_stream: 'IMAGE:image_out'
options: {
[mediapipe.ImageTransformationCalculatorOptions.ext] {
output_width: 10
output_height: 10
}
}
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 10
output_height: 10
}
}
}
"""
class SolutionBaseTest(parameterized.TestCase):
def test_invalid_initialization_arguments(self):
with self.assertRaisesRegex(
ValueError,
'Must provide exactly one of \'binary_graph_path\' or \'graph_config\'.'
):
solution_base.SolutionBase()
with self.assertRaisesRegex(
ValueError,
'Must provide exactly one of \'binary_graph_path\' or \'graph_config\'.'
):
solution_base.SolutionBase(
graph_config=calculator_pb2.CalculatorGraphConfig(),
binary_graph_path='/tmp/no_such.binarypb')
@parameterized.named_parameters(('no_graph_input_output_stream', """
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
""", RuntimeError, 'does not have a corresponding output stream.'),
('calcualtor_io_mismatch', """
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
input_stream: 'in2'
output_stream: 'out'
}
""", ValueError, 'must use matching tags and indexes.'),
('unkown_registered_stream_type_name', """
input_stream: 'in'
output_stream: 'out'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
output_stream: 'out'
}
""", RuntimeError, 'Unable to find the type for stream \"in\".'))
def test_invalid_config(self, text_config, error_type, error_message):
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
with self.assertRaisesRegex(error_type, error_message):
solution_base.SolutionBase(graph_config=config_proto)
def test_valid_input_data_type_proto(self):
text_config = """
input_stream: 'input_detections'
output_stream: 'output_detections'
node {
calculator: 'DetectionUniqueIdCalculator'
input_stream: 'DETECTION_LIST:input_detections'
output_stream: 'DETECTION_LIST:output_detections'
}
"""
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
with solution_base.SolutionBase(graph_config=config_proto) as solution:
input_detections = detection_pb2.DetectionList()
detection_1 = input_detections.detection.add()
text_format.Parse('score: 0.5', detection_1)
detection_2 = input_detections.detection.add()
text_format.Parse('score: 0.8', detection_2)
results = solution.process({'input_detections': input_detections})
self.assertTrue(hasattr(results, 'output_detections'))
self.assertLen(results.output_detections.detection, 2)
expected_detection_1 = detection_pb2.Detection()
text_format.Parse('score: 0.5, detection_id: 1', expected_detection_1)
expected_detection_2 = detection_pb2.Detection()
text_format.Parse('score: 0.8, detection_id: 2', expected_detection_2)
self.assertEqual(results.output_detections.detection[0],
expected_detection_1)
self.assertEqual(results.output_detections.detection[1],
expected_detection_2)
def test_invalid_input_data_type_proto_vector(self):
text_config = """
input_stream: 'input_detections'
output_stream: 'output_detections'
node {
calculator: 'DetectionUniqueIdCalculator'
input_stream: 'DETECTIONS:input_detections'
output_stream: 'DETECTIONS:output_detections'
}
"""
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
with solution_base.SolutionBase(graph_config=config_proto) as solution:
detection = detection_pb2.Detection()
text_format.Parse('score: 0.5', detection)
with self.assertRaisesRegex(
NotImplementedError,
'SolutionBase can only process non-audio and non-proto-list data. '
+ 'PROTO_LIST type is not supported.'
):
solution.process({'input_detections': detection})
def test_invalid_input_image_data(self):
text_config = """
input_stream: 'image_in'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
output_stream: 'IMAGE:transformed_image_in'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:transformed_image_in'
output_stream: 'IMAGE:image_out'
}
"""
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
with solution_base.SolutionBase(graph_config=config_proto) as solution:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
solution.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
@parameterized.named_parameters(('graph_without_side_packets', """
input_stream: 'image_in'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
output_stream: 'IMAGE:transformed_image_in'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:transformed_image_in'
output_stream: 'IMAGE:image_out'
}
""", None), ('graph_with_side_packets', """
input_stream: 'image_in'
input_side_packet: 'allow_signal'
input_side_packet: 'rotation_degrees'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
output_stream: 'IMAGE:transformed_image_in'
}
node {
calculator: 'GateCalculator'
input_stream: 'transformed_image_in'
input_side_packet: 'ALLOW:allow_signal'
output_stream: 'image_out_to_transform'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_out_to_transform'
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
output_stream: 'IMAGE:image_out'
}""", {
'allow_signal': True,
'rotation_degrees': 0
}))
def test_solution_process(self, text_config, side_inputs):
self._process_and_verify(
config_proto=text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig()),
side_inputs=side_inputs)
def test_invalid_calculator_options(self):
text_config = """
input_stream: 'image_in'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
output_stream: 'IMAGE:transformed_image_in'
}
node {
name: 'SignalGate'
calculator: 'GateCalculator'
input_stream: 'transformed_image_in'
input_side_packet: 'ALLOW:allow_signal'
output_stream: 'image_out_to_transform'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_out_to_transform'
output_stream: 'IMAGE:image_out'
}
"""
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
with self.assertRaisesRegex(
ValueError,
'Modifying the calculator options of SignalGate is not supported.'):
solution_base.SolutionBase(
graph_config=config_proto,
calculator_params={'SignalGate.invalid_field': 'I am invalid'})
def test_calculator_has_both_options_and_node_options(self):
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
calculator_pb2.CalculatorGraphConfig())
with self.assertRaisesRegex(ValueError,
'has both options and node_options fields.'):
solution_base.SolutionBase(
graph_config=config_proto,
calculator_params={
'ImageTransformation.output_width': 0,
'ImageTransformation.output_height': 0
})
def test_modifying_calculator_proto2_options(self):
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
calculator_pb2.CalculatorGraphConfig())
# To test proto2 options only, remove the proto3 node_options field from the
# graph config.
self.assertEqual('ImageTransformation', config_proto.node[0].name)
config_proto.node[0].ClearField('node_options')
self._process_and_verify(
config_proto=config_proto,
calculator_params={
'ImageTransformation.output_width': 0,
'ImageTransformation.output_height': 0
})
def test_modifying_calculator_proto3_node_options(self):
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
calculator_pb2.CalculatorGraphConfig())
# To test proto3 node options only, remove the proto2 options field from the
# graph config.
self.assertEqual('ImageTransformation', config_proto.node[0].name)
config_proto.node[0].ClearField('options')
self._process_and_verify(
config_proto=config_proto,
calculator_params={
'ImageTransformation.output_width': 0,
'ImageTransformation.output_height': 0
})
def test_adding_calculator_options(self):
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
calculator_pb2.CalculatorGraphConfig())
# To test a calculator with no options field, remove both proto2 options and
# proto3 node_options fields from the graph config.
self.assertEqual('ImageTransformation', config_proto.node[0].name)
config_proto.node[0].ClearField('options')
config_proto.node[0].ClearField('node_options')
self._process_and_verify(
config_proto=config_proto,
calculator_params={
'ImageTransformation.output_width': 0,
'ImageTransformation.output_height': 0
})
@parameterized.named_parameters(('graph_without_side_packets', """
input_stream: 'image_in'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
output_stream: 'IMAGE:transformed_image_in'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:transformed_image_in'
output_stream: 'IMAGE:image_out'
}
""", None), ('graph_with_side_packets', """
input_stream: 'image_in'
input_side_packet: 'allow_signal'
input_side_packet: 'rotation_degrees'
output_stream: 'image_out'
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_in'
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
output_stream: 'IMAGE:transformed_image_in'
}
node {
calculator: 'GateCalculator'
input_stream: 'transformed_image_in'
input_side_packet: 'ALLOW:allow_signal'
output_stream: 'image_out_to_transform'
}
node {
calculator: 'ImageTransformationCalculator'
input_stream: 'IMAGE:image_out_to_transform'
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
output_stream: 'IMAGE:image_out'
}""", {
'allow_signal': True,
'rotation_degrees': 0
}))
def test_solution_reset(self, text_config, side_inputs):
config_proto = text_format.Parse(text_config,
calculator_pb2.CalculatorGraphConfig())
input_image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with solution_base.SolutionBase(
graph_config=config_proto, side_inputs=side_inputs) as solution:
for _ in range(20):
outputs = solution.process(input_image)
self.assertTrue(np.array_equal(input_image, outputs.image_out))
solution.reset()
def _process_and_verify(self,
config_proto,
side_inputs=None,
calculator_params=None):
input_image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with solution_base.SolutionBase(
graph_config=config_proto,
side_inputs=side_inputs,
calculator_params=calculator_params) as solution:
outputs = solution.process(input_image)
outputs2 = solution.process({'image_in': input_image})
self.assertTrue(np.array_equal(input_image, outputs.image_out))
self.assertTrue(np.array_equal(input_image, outputs2.image_out))
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,25 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Solutions Python API."""
import mediapipe.python.solutions.drawing_styles
import mediapipe.python.solutions.drawing_utils
import mediapipe.python.solutions.face_detection
import mediapipe.python.solutions.face_mesh
import mediapipe.python.solutions.hands
import mediapipe.python.solutions.holistic
import mediapipe.python.solutions.objectron
import mediapipe.python.solutions.pose
import mediapipe.python.solutions.selfie_segmentation

View File

@@ -0,0 +1,37 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Downloading utils."""
import os
import shutil
import urllib.request
_OSS_URL_PREFIX = 'https://github.com/google/mediapipe/raw/master/'
def download_oss_model(model_path: str):
"""Downloads the oss model from the MediaPipe GitHub repo if it doesn't exist in the package."""
mp_root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-4])
model_abspath = os.path.join(mp_root_path, model_path)
if os.path.exists(model_abspath):
return
model_url = _OSS_URL_PREFIX + model_path
print('Downloading model to ' + model_abspath)
with urllib.request.urlopen(model_url) as response, open(model_abspath,
'wb') as out_file:
if response.code != 200:
raise ConnectionError('Cannot download ' + model_path +
' from the MediaPipe Github repo.')
shutil.copyfileobj(response, out_file)

View File

@@ -0,0 +1,146 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless requi_RED by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe solution drawing styles."""
from typing import Mapping, Tuple
from mediapipe.python.solutions.drawing_utils import DrawingSpec
from mediapipe.python.solutions.hands import HandLandmark
_RADIUS = 5
_RED = (54, 67, 244)
_GREEN = (118, 230, 0)
_BLUE = (192, 101, 21)
_YELLOW = (0, 204, 255)
_GRAY = (174, 164, 144)
_PURPLE = (128, 64, 128)
_PEACH = (180, 229, 255)
# Hands
_THICKNESS_WRIST_MCP = 3
_THICKNESS_FINGER = 2
_THICKNESS_DOT = -1
# Hand landmarks
_PALM_LANMARKS = (HandLandmark.WRIST, HandLandmark.THUMB_CMC,
HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP)
_THUMP_LANDMARKS = (HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP,
HandLandmark.THUMB_TIP)
_INDEX_FINGER_LANDMARKS = (HandLandmark.INDEX_FINGER_PIP,
HandLandmark.INDEX_FINGER_DIP,
HandLandmark.INDEX_FINGER_TIP)
_MIDDLE_FINGER_LANDMARKS = (HandLandmark.MIDDLE_FINGER_PIP,
HandLandmark.MIDDLE_FINGER_DIP,
HandLandmark.MIDDLE_FINGER_TIP)
_RING_FINGER_LANDMARKS = (HandLandmark.RING_FINGER_PIP,
HandLandmark.RING_FINGER_DIP,
HandLandmark.RING_FINGER_TIP)
_PINKY_FINGER_LANDMARKS = (HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP,
HandLandmark.PINKY_TIP)
_HAND_LANDMARK_STYLE = {
_PALM_LANMARKS:
DrawingSpec(
color=_RED, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_THUMP_LANDMARKS:
DrawingSpec(
color=_PEACH, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_INDEX_FINGER_LANDMARKS:
DrawingSpec(
color=_PURPLE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_MIDDLE_FINGER_LANDMARKS:
DrawingSpec(
color=_YELLOW, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_RING_FINGER_LANDMARKS:
DrawingSpec(
color=_GREEN, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
_PINKY_FINGER_LANDMARKS:
DrawingSpec(
color=_BLUE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
}
# Hand connections
_PALM_CONNECTIONS = ((HandLandmark.WRIST, HandLandmark.THUMB_CMC),
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
(HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.RING_FINGER_MCP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
(HandLandmark.INDEX_FINGER_MCP,
HandLandmark.MIDDLE_FINGER_MCP), (HandLandmark.WRIST,
HandLandmark.PINKY_MCP))
_THUMB_CONNECTIONS = ((HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP))
_INDEX_FINGER_CONNECTIONS = ((HandLandmark.INDEX_FINGER_MCP,
HandLandmark.INDEX_FINGER_PIP),
(HandLandmark.INDEX_FINGER_PIP,
HandLandmark.INDEX_FINGER_DIP),
(HandLandmark.INDEX_FINGER_DIP,
HandLandmark.INDEX_FINGER_TIP))
_MIDDLE_FINGER_CONNECTIONS = ((HandLandmark.MIDDLE_FINGER_MCP,
HandLandmark.MIDDLE_FINGER_PIP),
(HandLandmark.MIDDLE_FINGER_PIP,
HandLandmark.MIDDLE_FINGER_DIP),
(HandLandmark.MIDDLE_FINGER_DIP,
HandLandmark.MIDDLE_FINGER_TIP))
_RING_FINGER_CONNECTIONS = ((HandLandmark.RING_FINGER_MCP,
HandLandmark.RING_FINGER_PIP),
(HandLandmark.RING_FINGER_PIP,
HandLandmark.RING_FINGER_DIP),
(HandLandmark.RING_FINGER_DIP,
HandLandmark.RING_FINGER_TIP))
_PINKY_FINGER_CONNECTIONS = ((HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP))
_HAND_CONNECTION_STYLE = {
_PALM_CONNECTIONS:
DrawingSpec(color=_GRAY, thickness=_THICKNESS_WRIST_MCP),
_THUMB_CONNECTIONS:
DrawingSpec(color=_PEACH, thickness=_THICKNESS_FINGER),
_INDEX_FINGER_CONNECTIONS:
DrawingSpec(color=_PURPLE, thickness=_THICKNESS_FINGER),
_MIDDLE_FINGER_CONNECTIONS:
DrawingSpec(color=_YELLOW, thickness=_THICKNESS_FINGER),
_RING_FINGER_CONNECTIONS:
DrawingSpec(color=_GREEN, thickness=_THICKNESS_FINGER),
_PINKY_FINGER_CONNECTIONS:
DrawingSpec(color=_BLUE, thickness=_THICKNESS_FINGER)
}
def get_default_hand_landmark_style() -> Mapping[int, DrawingSpec]:
"""Returns the default hand landmark drawing style.
Returns:
A mapping from each hand landmark to the default drawing spec.
"""
hand_landmark_style = {}
for k, v in _HAND_LANDMARK_STYLE.items():
for landmark in k:
hand_landmark_style[landmark] = v
return hand_landmark_style
def get_default_hand_connection_style(
) -> Mapping[Tuple[int, int], DrawingSpec]:
"""Returns the default hand connection drawing style.
Returns:
A mapping from each hand connection to the default drawing spec.
"""
hand_connection_style = {}
for k, v in _HAND_CONNECTION_STYLE.items():
for connection in k:
hand_connection_style[connection] = v
return hand_connection_style

View File

@@ -0,0 +1,307 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe solution drawing utils."""
import math
from typing import List, Mapping, Optional, Tuple, Union
import cv2
import dataclasses
import matplotlib.pyplot as plt
import numpy as np
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import location_data_pb2
from mediapipe.framework.formats import landmark_pb2
PRESENCE_THRESHOLD = 0.5
RGB_CHANNELS = 3
BLACK_COLOR = (0, 0, 0)
RED_COLOR = (0, 0, 255)
GREEN_COLOR = (0, 128, 0)
BLUE_COLOR = (255, 0, 0)
VISIBILITY_THRESHOLD = 0.5
@dataclasses.dataclass
class DrawingSpec:
# Color for drawing the annotation. Default to the green color.
color: Tuple[int, int, int] = (0, 255, 0)
# Thickness for drawing the annotation. Default to 2 pixels.
thickness: int = 2
# Circle radius. Default to 2 pixels.
circle_radius: int = 2
def _normalized_to_pixel_coordinates(
normalized_x: float, normalized_y: float, image_width: int,
image_height: int) -> Union[None, Tuple[int, int]]:
"""Converts normalized value pair to pixel coordinates."""
# Checks if the float value is between 0 and 1.
def is_valid_normalized_value(value: float) -> bool:
return (value > 0 or math.isclose(0, value)) and (value < 1 or
math.isclose(1, value))
if not (is_valid_normalized_value(normalized_x) and
is_valid_normalized_value(normalized_y)):
# TODO: Draw coordinates even if it's outside of the image bounds.
return None
x_px = min(math.floor(normalized_x * image_width), image_width - 1)
y_px = min(math.floor(normalized_y * image_height), image_height - 1)
return x_px, y_px
def draw_detection(
image: np.ndarray,
detection: detection_pb2.Detection,
keypoint_drawing_spec: DrawingSpec = DrawingSpec(color=RED_COLOR),
bbox_drawing_spec: DrawingSpec = DrawingSpec()):
"""Draws the detction bounding box and keypoints on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
detection: A detection proto message to be annotated on the image.
keypoint_drawing_spec: A DrawingSpec object that specifies the keypoints'
drawing settings such as color, line thickness, and circle radius.
bbox_drawing_spec: A DrawingSpec object that specifies the bounding box's
drawing settings such as color and line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
b) If the location data is not relative data.
"""
if not detection.location_data:
return
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
location = detection.location_data
if location.format != location_data_pb2.LocationData.RELATIVE_BOUNDING_BOX:
raise ValueError(
'LocationData must be relative for this drawing funtion to work.')
# Draws keypoints.
for keypoint in location.relative_keypoints:
keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
image_cols, image_rows)
cv2.circle(image, keypoint_px, keypoint_drawing_spec.circle_radius,
keypoint_drawing_spec.color, keypoint_drawing_spec.thickness)
# Draws bounding box if exists.
if not location.HasField('relative_bounding_box'):
return
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + +relative_bounding_box.height, image_cols,
image_rows)
cv2.rectangle(image, rect_start_point, rect_end_point,
bbox_drawing_spec.color, bbox_drawing_spec.thickness)
def draw_landmarks(
image: np.ndarray,
landmark_list: landmark_pb2.NormalizedLandmarkList,
connections: Optional[List[Tuple[int, int]]] = None,
landmark_drawing_spec: Union[DrawingSpec,
Mapping[int, DrawingSpec]] = DrawingSpec(
color=RED_COLOR),
connection_drawing_spec: Union[DrawingSpec,
Mapping[Tuple[int, int],
DrawingSpec]] = DrawingSpec()):
"""Draws the landmarks and the connections on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
landmark_list: A normalized landmark list proto message to be annotated on
the image.
connections: A list of landmark index tuples that specifies how landmarks to
be connected in the drawing.
landmark_drawing_spec: Either a DrawingSpec object or a mapping from
hand landmarks to the DrawingSpecs that specifies the landmarks' drawing
settings such as color, line thickness, and circle radius.
connection_drawing_spec: Either a DrawingSpec object or a mapping from
hand connections to the DrawingSpecs that specifies the
connections' drawing settings such as color and line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
b) If any connetions contain invalid landmark index.
"""
if not landmark_list:
return
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
idx_to_coordinates = {}
for idx, landmark in enumerate(landmark_list.landmark):
if ((landmark.HasField('visibility') and
landmark.visibility < VISIBILITY_THRESHOLD) or
(landmark.HasField('presence') and
landmark.presence < PRESENCE_THRESHOLD)):
continue
landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y,
image_cols, image_rows)
if landmark_px:
idx_to_coordinates[idx] = landmark_px
if connections:
num_landmarks = len(landmark_list.landmark)
# Draws the connections if the start and end landmarks are both visible.
for connection in connections:
start_idx = connection[0]
end_idx = connection[1]
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
raise ValueError(f'Landmark index is out of range. Invalid connection '
f'from landmark #{start_idx} to landmark #{end_idx}.')
if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
drawing_spec = connection_drawing_spec[connection] if isinstance(
connection_drawing_spec, Mapping) else connection_drawing_spec
cv2.line(image, idx_to_coordinates[start_idx],
idx_to_coordinates[end_idx], drawing_spec.color,
drawing_spec.thickness)
# Draws landmark points after finishing the connection lines, which is
# aesthetically better.
for idx, landmark_px in idx_to_coordinates.items():
drawing_spec = landmark_drawing_spec[idx] if isinstance(
landmark_drawing_spec, Mapping) else landmark_drawing_spec
cv2.circle(image, landmark_px, drawing_spec.circle_radius,
drawing_spec.color, drawing_spec.thickness)
def draw_axis(
image: np.ndarray,
rotation: np.ndarray,
translation: np.ndarray,
focal_length: Tuple[float, float] = (1.0, 1.0),
principal_point: Tuple[float, float] = (0.0, 0.0),
axis_length: float = 0.1,
axis_drawing_spec: DrawingSpec = DrawingSpec()):
"""Draws the 3D axis on the image.
Args:
image: A three channel RGB image represented as numpy ndarray.
rotation: Rotation matrix from object to camera coordinate frame.
translation: Translation vector from object to camera coordinate frame.
focal_length: camera focal length along x and y directions.
principal_point: camera principal point in x and y.
axis_length: length of the axis in the drawing.
axis_drawing_spec: A DrawingSpec object that specifies the xyz axis
drawing settings such as line thickness.
Raises:
ValueError: If one of the followings:
a) If the input image is not three channel RGB.
"""
if image.shape[2] != RGB_CHANNELS:
raise ValueError('Input image must contain three channel rgb data.')
image_rows, image_cols, _ = image.shape
# Create axis points in camera coordinate frame.
axis_world = np.float32([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
axis_cam = np.matmul(rotation, axis_length*axis_world.T).T + translation
x = axis_cam[..., 0]
y = axis_cam[..., 1]
z = axis_cam[..., 2]
# Project 3D points to NDC space.
fx, fy = focal_length
px, py = principal_point
x_ndc = np.clip(-fx * x / (z + 1e-5) + px, -1., 1.)
y_ndc = np.clip(-fy * y / (z + 1e-5) + py, -1., 1.)
# Convert from NDC space to image space.
x_im = np.int32((1 + x_ndc) * 0.5 * image_cols)
y_im = np.int32((1 - y_ndc) * 0.5 * image_rows)
# Draw xyz axis on the image.
origin = (x_im[0], y_im[0])
x_axis = (x_im[1], y_im[1])
y_axis = (x_im[2], y_im[2])
z_axis = (x_im[3], y_im[3])
cv2.arrowedLine(image, origin, x_axis, RED_COLOR,
axis_drawing_spec.thickness)
cv2.arrowedLine(image, origin, y_axis, GREEN_COLOR,
axis_drawing_spec.thickness)
cv2.arrowedLine(image, origin, z_axis, BLUE_COLOR,
axis_drawing_spec.thickness)
def _normalize_color(color):
return tuple(v / 255. for v in color)
def plot_landmarks(landmark_list: landmark_pb2.NormalizedLandmarkList,
connections: Optional[List[Tuple[int, int]]] = None,
landmark_drawing_spec: DrawingSpec = DrawingSpec(
color=RED_COLOR, thickness=5),
connection_drawing_spec: DrawingSpec = DrawingSpec(
color=BLACK_COLOR, thickness=5),
elevation: int = 10,
azimuth: int = 10):
"""Plot the landmarks and the connections in matplotlib 3d.
Args:
landmark_list: A normalized landmark list proto message to be plotted.
connections: A list of landmark index tuples that specifies how landmarks to
be connected.
landmark_drawing_spec: A DrawingSpec object that specifies the landmarks'
drawing settings such as color and line thickness.
connection_drawing_spec: A DrawingSpec object that specifies the
connections' drawing settings such as color and line thickness.
elevation: The elevation from which to view the plot.
azimuth: the azimuth angle to rotate the plot.
Raises:
ValueError: If any connetions contain invalid landmark index.
"""
if not landmark_list:
return
plt.figure(figsize=(10, 10))
ax = plt.axes(projection='3d')
ax.view_init(elev=elevation, azim=azimuth)
plotted_landmarks = {}
for idx, landmark in enumerate(landmark_list.landmark):
if ((landmark.HasField('visibility') and
landmark.visibility < VISIBILITY_THRESHOLD) or
(landmark.HasField('presence') and
landmark.presence < PRESENCE_THRESHOLD)):
continue
ax.scatter3D(
xs=[-landmark.z],
ys=[landmark.x],
zs=[-landmark.y],
color=_normalize_color(landmark_drawing_spec.color[::-1]),
linewidth=landmark_drawing_spec.thickness)
plotted_landmarks[idx] = (-landmark.z, landmark.x, -landmark.y)
if connections:
num_landmarks = len(landmark_list.landmark)
# Draws the connections if the start and end landmarks are both visible.
for connection in connections:
start_idx = connection[0]
end_idx = connection[1]
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
raise ValueError(f'Landmark index is out of range. Invalid connection '
f'from landmark #{start_idx} to landmark #{end_idx}.')
if start_idx in plotted_landmarks and end_idx in plotted_landmarks:
landmark_pair = [
plotted_landmarks[start_idx], plotted_landmarks[end_idx]
]
ax.plot3D(
xs=[landmark_pair[0][0], landmark_pair[1][0]],
ys=[landmark_pair[0][1], landmark_pair[1][1]],
zs=[landmark_pair[0][2], landmark_pair[1][2]],
color=_normalize_color(connection_drawing_spec.color[::-1]),
linewidth=connection_drawing_spec.thickness)
plt.show()

View File

@@ -0,0 +1,231 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.drawing_utils."""
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
from google.protobuf import text_format
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import landmark_pb2
from mediapipe.python.solutions import drawing_utils
DEFAULT_BBOX_DRAWING_SPEC = drawing_utils.DrawingSpec()
DEFAULT_CONNECTION_DRAWING_SPEC = drawing_utils.DrawingSpec()
DEFAULT_CIRCLE_DRAWING_SPEC = drawing_utils.DrawingSpec(color=(0, 0, 255))
DEFAULT_AXIS_DRAWING_SPEC = drawing_utils.DrawingSpec()
class DrawingUtilTest(parameterized.TestCase):
def test_invalid_input_image(self):
image = np.arange(18, dtype=np.uint8).reshape(3, 3, 2)
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
drawing_utils.draw_landmarks(image, landmark_pb2.NormalizedLandmarkList())
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
drawing_utils.draw_detection(image, detection_pb2.Detection())
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
rotation = np.eye(3, dtype=np.float32)
translation = np.array([0., 0., 1.])
drawing_utils.draw_axis(image, rotation, translation)
def test_invalid_connection(self):
landmark_list = text_format.Parse(
'landmark {x: 0.5 y: 0.5} landmark {x: 0.2 y: 0.2}',
landmark_pb2.NormalizedLandmarkList())
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with self.assertRaisesRegex(ValueError, 'Landmark index is out of range.'):
drawing_utils.draw_landmarks(image, landmark_list, [(0, 2)])
def test_unqualified_detection(self):
detection = text_format.Parse('location_data {format: GLOBAL}',
detection_pb2.Detection())
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
with self.assertRaisesRegex(ValueError, 'LocationData must be relative'):
drawing_utils.draw_detection(image, detection)
def test_draw_keypoints_only(self):
detection = text_format.Parse(
'location_data {'
' format: RELATIVE_BOUNDING_BOX'
' relative_keypoints {x: 0 y: 1}'
' relative_keypoints {x: 1 y: 0}}', detection_pb2.Detection())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.circle(expected_result, (0, 99),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, (99, 0),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_detection(image, detection)
np.testing.assert_array_equal(image, expected_result)
def test_draw_bboxs_only(self):
detection = text_format.Parse(
'location_data {'
' format: RELATIVE_BOUNDING_BOX'
' relative_bounding_box {xmin: 0 ymin: 0 width: 1 height: 1}}',
detection_pb2.Detection())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.rectangle(expected_result, (0, 0), (99, 99),
DEFAULT_BBOX_DRAWING_SPEC.color,
DEFAULT_BBOX_DRAWING_SPEC.thickness)
drawing_utils.draw_detection(image, detection)
np.testing.assert_array_equal(image, expected_result)
@parameterized.named_parameters(
('landmark_list_has_only_one_element', 'landmark {x: 0.1 y: 0.1}'),
('second_landmark_is_invisible',
'landmark {x: 0.1 y: 0.1} landmark {x: 0.5 y: 0.5 visibility: 0.0}'))
def test_draw_single_landmark_point(self, landmark_list_text):
landmark_list = text_format.Parse(landmark_list_text,
landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
cv2.circle(expected_result, (10, 10),
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(image, landmark_list)
np.testing.assert_array_equal(image, expected_result)
@parameterized.named_parameters(
('landmarks_have_x_and_y_only',
'landmark {x: 0.1 y: 0.5} landmark {x: 0.5 y: 0.1}'),
('landmark_zero_visibility_and_presence',
'landmark {x: 0.1 y: 0.5 presence: 0.5}'
'landmark {x: 0.5 y: 0.1 visibility: 0.5}'))
def test_draw_landmarks_and_connections(self, landmark_list_text):
landmark_list = text_format.Parse(landmark_list_text,
landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
start_point = (10, 50)
end_point = (50, 10)
cv2.line(expected_result, start_point, end_point,
DEFAULT_CONNECTION_DRAWING_SPEC.color,
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
cv2.circle(expected_result, start_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, end_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(
image=image, landmark_list=landmark_list, connections=[(0, 1)])
np.testing.assert_array_equal(image, expected_result)
def test_draw_axis(self):
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
origin = (50, 50)
x_axis = (75, 50)
y_axis = (50, 22)
z_axis = (50, 77)
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
r = np.sqrt(2.) / 2.
rotation = np.array([[1., 0., 0.], [0., r, -r], [0., r, r]])
translation = np.array([0, 0, -0.2])
drawing_utils.draw_axis(image, rotation, translation)
np.testing.assert_array_equal(image, expected_result)
def test_draw_axis_zero_translation(self):
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
origin = (50, 50)
x_axis = (0, 50)
y_axis = (50, 100)
z_axis = (50, 50)
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
DEFAULT_AXIS_DRAWING_SPEC.thickness)
rotation = np.eye(3, dtype=np.float32)
translation = np.zeros((3,), dtype=np.float32)
drawing_utils.draw_axis(image, rotation, translation)
np.testing.assert_array_equal(image, expected_result)
def test_min_and_max_coordinate_values(self):
landmark_list = text_format.Parse(
'landmark {x: 0.0 y: 1.0}'
'landmark {x: 1.0 y: 0.0}', landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
expected_result = np.copy(image)
start_point = (0, 99)
end_point = (99, 0)
cv2.line(expected_result, start_point, end_point,
DEFAULT_CONNECTION_DRAWING_SPEC.color,
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
cv2.circle(expected_result, start_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
cv2.circle(expected_result, end_point,
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
DEFAULT_CIRCLE_DRAWING_SPEC.color,
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
drawing_utils.draw_landmarks(
image=image, landmark_list=landmark_list, connections=[(0, 1)])
np.testing.assert_array_equal(image, expected_result)
def test_drawing_spec(self):
landmark_list = text_format.Parse(
'landmark {x: 0.1 y: 0.1}'
'landmark {x: 0.8 y: 0.8}', landmark_pb2.NormalizedLandmarkList())
image = np.zeros((100, 100, 3), np.uint8)
landmark_drawing_spec = drawing_utils.DrawingSpec(
color=(0, 0, 255), thickness=5)
connection_drawing_spec = drawing_utils.DrawingSpec(
color=(255, 0, 0), thickness=3)
expected_result = np.copy(image)
start_point = (10, 10)
end_point = (80, 80)
cv2.line(expected_result, start_point, end_point,
connection_drawing_spec.color, connection_drawing_spec.thickness)
cv2.circle(expected_result, start_point,
landmark_drawing_spec.circle_radius, landmark_drawing_spec.color,
landmark_drawing_spec.thickness)
cv2.circle(expected_result, end_point, landmark_drawing_spec.circle_radius,
landmark_drawing_spec.color, landmark_drawing_spec.thickness)
drawing_utils.draw_landmarks(
image=image,
landmark_list=landmark_list,
connections=[(0, 1)],
landmark_drawing_spec=landmark_drawing_spec,
connection_drawing_spec=connection_drawing_spec)
np.testing.assert_array_equal(image, expected_result)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,112 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Face Detection."""
import enum
from typing import NamedTuple, Union
import numpy as np
from mediapipe.framework.formats import detection_pb2
from mediapipe.framework.formats import location_data_pb2
# pylint: disable=unused-import
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb'
FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb'
def get_key_point(
detection: detection_pb2.Detection, key_point_enum: 'FaceKeyPoint'
) -> Union[None, location_data_pb2.LocationData.RelativeKeypoint]:
"""A convenience method to return a face key point by the FaceKeyPoint type.
Args:
detection: A detection proto message that contains face key points.
key_point_enum: A FaceKeyPoint type.
Returns:
A RelativeKeypoint proto message.
"""
if not detection or not detection.location_data:
return None
return detection.location_data.relative_keypoints[key_point_enum]
class FaceKeyPoint(enum.IntEnum):
"""The enum type of the six face detection key points."""
RIGHT_EYE = 0
LEFT_EYE = 1
NOSE_TIP = 2
MOUTH_CENTER = 3
RIGHT_EAR_TRAGION = 4
LEFT_EAR_TRAGION = 5
class FaceDetection(SolutionBase):
"""MediaPipe Face Detection.
MediaPipe Face Detection processes an RGB image and returns a list of the
detected face location data.
Please refer to
https://solutions.mediapipe.dev/face_detection#python-solution-api
for usage examples.
"""
def __init__(self, min_detection_confidence=0.5, model_selection=0):
"""Initializes a MediaPipe Face Detection object.
Args:
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
detection to be considered successful. See details in
https://solutions.mediapipe.dev/face_detection#min_detection_confidence.
model_selection: 0 or 1. 0 to select a short-range model that works
best for faces within 2 meters from the camera, and 1 for a full-range
model best for faces within 5 meters. See details in
https://solutions.mediapipe.dev/face_detection#model_selection.
"""
binary_graph_path = FULL_RANGE_GRAPH_FILE_PATH if model_selection == 1 else SHORT_RANGE_GRAPH_FILE_PATH
subgraph_name = 'facedetectionfullrangecommon' if model_selection == 1 else 'facedetectionshortrangecommon'
super().__init__(
binary_graph_path=binary_graph_path,
calculator_params={
subgraph_name + '__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
},
outputs=['detections'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns a list of the detected face location data.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "detections" field that contains a list of the
detected face location data.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,92 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.face_detection."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import face_detection as mp_faces
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
SHORT_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 182], [460, 186], [420, 241],
[417, 284], [295, 199], [502, 198]]
FULL_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 181], [455, 181], [413, 233],
[411, 278], [306, 204], [499, 207]]
DIFF_THRESHOLD = 5 # pixels
class FaceDetectionTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
for detection in results.detections:
mp_drawing.draw_detection(frame, detection)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_faces.FaceDetection() as faces:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
with mp_faces.FaceDetection(min_detection_confidence=0.5) as faces:
results = faces.process(image)
self.assertIsNone(results.detections)
@parameterized.named_parameters(('short_range_model', 0),
('full_range_model', 1))
def test_face(self, model_selection):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
rows, cols, _ = image.shape
with mp_faces.FaceDetection(
min_detection_confidence=0.5, model_selection=model_selection) as faces:
for idx in range(5):
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
location_data = results.detections[0].location_data
x = [keypoint.x * cols for keypoint in location_data.relative_keypoints]
y = [keypoint.y * rows for keypoint in location_data.relative_keypoints]
face_keypoints = np.column_stack((x, y))
if model_selection == 0:
prediction_error = np.abs(
np.asarray(face_keypoints) -
np.asarray(SHORT_RANGE_EXPECTED_FACE_KEY_POINTS))
else:
prediction_error = np.abs(
np.asarray(face_keypoints) -
np.asarray(FULL_RANGE_EXPECTED_FACE_KEY_POINTS))
self.assertLen(results.detections, 1)
self.assertLen(location_data.relative_keypoints, 6)
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,238 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe FaceMesh."""
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb'
FACE_CONNECTIONS = frozenset([
# Lips.
(61, 146),
(146, 91),
(91, 181),
(181, 84),
(84, 17),
(17, 314),
(314, 405),
(405, 321),
(321, 375),
(375, 291),
(61, 185),
(185, 40),
(40, 39),
(39, 37),
(37, 0),
(0, 267),
(267, 269),
(269, 270),
(270, 409),
(409, 291),
(78, 95),
(95, 88),
(88, 178),
(178, 87),
(87, 14),
(14, 317),
(317, 402),
(402, 318),
(318, 324),
(324, 308),
(78, 191),
(191, 80),
(80, 81),
(81, 82),
(82, 13),
(13, 312),
(312, 311),
(311, 310),
(310, 415),
(415, 308),
# Left eye.
(263, 249),
(249, 390),
(390, 373),
(373, 374),
(374, 380),
(380, 381),
(381, 382),
(382, 362),
(263, 466),
(466, 388),
(388, 387),
(387, 386),
(386, 385),
(385, 384),
(384, 398),
(398, 362),
# Left eyebrow.
(276, 283),
(283, 282),
(282, 295),
(295, 285),
(300, 293),
(293, 334),
(334, 296),
(296, 336),
# Right eye.
(33, 7),
(7, 163),
(163, 144),
(144, 145),
(145, 153),
(153, 154),
(154, 155),
(155, 133),
(33, 246),
(246, 161),
(161, 160),
(160, 159),
(159, 158),
(158, 157),
(157, 173),
(173, 133),
# Right eyebrow.
(46, 53),
(53, 52),
(52, 65),
(65, 55),
(70, 63),
(63, 105),
(105, 66),
(66, 107),
# Face oval.
(10, 338),
(338, 297),
(297, 332),
(332, 284),
(284, 251),
(251, 389),
(389, 356),
(356, 454),
(454, 323),
(323, 361),
(361, 288),
(288, 397),
(397, 365),
(365, 379),
(379, 378),
(378, 400),
(400, 377),
(377, 152),
(152, 148),
(148, 176),
(176, 149),
(149, 150),
(150, 136),
(136, 172),
(172, 58),
(58, 132),
(132, 93),
(93, 234),
(234, 127),
(127, 162),
(162, 21),
(21, 54),
(54, 103),
(103, 67),
(67, 109),
(109, 10)
])
class FaceMesh(SolutionBase):
"""MediaPipe FaceMesh.
MediaPipe FaceMesh processes an RGB image and returns the face landmarks on
each detected face.
Please refer to https://solutions.mediapipe.dev/face_mesh#python-solution-api
for usage examples.
"""
def __init__(self,
static_image_mode=False,
max_num_faces=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe FaceMesh object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/face_mesh#static_image_mode.
max_num_faces: Maximum number of faces to detect. See details in
https://solutions.mediapipe.dev/face_mesh#max_num_faces.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
detection to be considered successful. See details in
https://solutions.mediapipe.dev/face_mesh#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
face landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/face_mesh#min_tracking_confidence.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'num_faces': max_num_faces,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'facedetectionshortrangecpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'facelandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['multi_face_landmarks'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the face landmarks on each detected face.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "multi_face_landmarks" field that contains the
face landmarks on each detected face.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,125 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.face_mesh."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import face_mesh as mp_faces
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 5 # pixels
EYE_INDICES_TO_LANDMARKS = {
33: [345, 178],
7: [348, 179],
163: [352, 178],
144: [357, 179],
145: [365, 179],
153: [371, 179],
154: [378, 178],
155: [381, 177],
133: [383, 177],
246: [347, 175],
161: [350, 174],
160: [355, 172],
159: [362, 170],
158: [368, 171],
157: [375, 172],
173: [380, 175],
263: [467, 176],
249: [464, 177],
390: [460, 177],
373: [455, 178],
374: [448, 179],
380: [441, 179],
381: [435, 178],
382: [432, 177],
362: [430, 177],
466: [465, 175],
388: [462, 173],
387: [457, 171],
386: [450, 170],
385: [444, 171],
384: [437, 172],
398: [432, 175]
}
class FaceMeshTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image=frame,
landmark_list=face_landmarks,
landmark_drawing_spec=drawing_spec)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_faces.FaceMesh() as faces:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_faces.FaceMesh() as faces:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = faces.process(image)
self.assertIsNone(results.multi_face_landmarks)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_face(self, static_image_mode: bool, num_frames: int):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
rows, cols, _ = image.shape
with mp_faces.FaceMesh(
static_image_mode=static_image_mode,
min_detection_confidence=0.5) as faces:
for idx in range(num_frames):
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
multi_face_landmarks = []
for landmarks in results.multi_face_landmarks:
self.assertLen(landmarks.landmark, 468)
x = [landmark.x * cols for landmark in landmarks.landmark]
y = [landmark.y * rows for landmark in landmarks.landmark]
face_landmarks = np.column_stack((x, y))
multi_face_landmarks.append(face_landmarks)
self.assertLen(multi_face_landmarks, 1)
# Verify the eye landmarks are correct as sanity check.
for eye_idx, gt_lds in EYE_INDICES_TO_LANDMARKS.items():
prediction_error = np.abs(
np.asarray(multi_face_landmarks[0][eye_idx]) - np.asarray(gt_lds))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,164 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Hands."""
import enum
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
class HandLandmark(enum.IntEnum):
"""The 21 hand landmarks."""
WRIST = 0
THUMB_CMC = 1
THUMB_MCP = 2
THUMB_IP = 3
THUMB_TIP = 4
INDEX_FINGER_MCP = 5
INDEX_FINGER_PIP = 6
INDEX_FINGER_DIP = 7
INDEX_FINGER_TIP = 8
MIDDLE_FINGER_MCP = 9
MIDDLE_FINGER_PIP = 10
MIDDLE_FINGER_DIP = 11
MIDDLE_FINGER_TIP = 12
RING_FINGER_MCP = 13
RING_FINGER_PIP = 14
RING_FINGER_DIP = 15
RING_FINGER_TIP = 16
PINKY_MCP = 17
PINKY_PIP = 18
PINKY_DIP = 19
PINKY_TIP = 20
BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
HAND_CONNECTIONS = frozenset([
(HandLandmark.WRIST, HandLandmark.THUMB_CMC),
(HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP),
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.INDEX_FINGER_PIP),
(HandLandmark.INDEX_FINGER_PIP, HandLandmark.INDEX_FINGER_DIP),
(HandLandmark.INDEX_FINGER_DIP, HandLandmark.INDEX_FINGER_TIP),
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP),
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.MIDDLE_FINGER_PIP),
(HandLandmark.MIDDLE_FINGER_PIP, HandLandmark.MIDDLE_FINGER_DIP),
(HandLandmark.MIDDLE_FINGER_DIP, HandLandmark.MIDDLE_FINGER_TIP),
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.RING_FINGER_MCP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.RING_FINGER_PIP),
(HandLandmark.RING_FINGER_PIP, HandLandmark.RING_FINGER_DIP),
(HandLandmark.RING_FINGER_DIP, HandLandmark.RING_FINGER_TIP),
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
(HandLandmark.WRIST, HandLandmark.PINKY_MCP),
(HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP)
])
class Hands(SolutionBase):
"""MediaPipe Hands.
MediaPipe Hands processes an RGB image and returns the hand landmarks and
handedness (left v.s. right hand) of each detected hand.
Note that it determines handedness assuming the input image is mirrored,
i.e., taken with a front-facing/selfie camera (
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
to flip the image first for a correct handedness output.
Please refer to https://solutions.mediapipe.dev/hands#python-solution-api for
usage examples.
"""
def __init__(self,
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Hand object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/hands#static_image_mode.
max_num_hands: Maximum number of hands to detect. See details in
https://solutions.mediapipe.dev/hands#max_num_hands.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for hand
detection to be considered successful. See details in
https://solutions.mediapipe.dev/hands#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
hand landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/hands#min_tracking_confidence.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'num_hands': max_num_hands,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'handlandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['multi_hand_landmarks', 'multi_handedness'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
contains the hand landmarks on each detected hand and a "multi_handedness"
field that contains the handedness (left v.s. right hand) of the detected
hand.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,110 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.hands."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_styles
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import hands as mp_hands
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 15 # pixels
EXPECTED_HAND_COORDINATES_PREDICTION = [[[144, 345], [211, 323], [257, 286],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]],
[[577, 37], [504, 56], [459, 94],
[429, 146], [397, 182], [496, 167],
[479, 245], [469, 292], [464, 330],
[540, 177], [534, 265], [533, 319],
[536, 360], [581, 172], [587, 252],
[593, 304], [599, 346], [615, 157],
[628, 223], [638, 258], [648, 288]]]
class HandsTest(parameterized.TestCase):
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
drawing_styles.get_default_hand_landmark_style(),
drawing_styles.get_default_hand_connection_style())
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_hands.Hands() as hands:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_hands.Hands() as hands:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = hands.process(image)
self.assertIsNone(results.multi_hand_landmarks)
self.assertIsNone(results.multi_handedness)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_multi_hands(self, static_image_mode, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg')
image = cv2.imread(image_path)
with mp_hands.Hands(
static_image_mode=static_image_mode,
max_num_hands=2,
min_detection_confidence=0.5) as hands:
for idx in range(num_frames):
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
handedness = [
handedness.classification[0].label
for handedness in results.multi_handedness
]
multi_hand_coordinates = []
rows, cols, _ = image.shape
for landmarks in results.multi_hand_landmarks:
self.assertLen(landmarks.landmark, 21)
x = [landmark.x * cols for landmark in landmarks.landmark]
y = [landmark.y * rows for landmark in landmarks.landmark]
hand_coordinates = np.column_stack((x, y))
multi_hand_coordinates.append(hand_coordinates)
self.assertLen(handedness, 2)
self.assertLen(multi_hand_coordinates, 2)
prediction_error = np.abs(
np.asarray(multi_hand_coordinates) -
np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,152 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Holistic."""
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmark_projection_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
# pylint: disable=unused-import
from mediapipe.python.solutions.face_mesh import FACE_CONNECTIONS
from mediapipe.python.solutions.hands import HAND_CONNECTIONS
from mediapipe.python.solutions.hands import HandLandmark
from mediapipe.python.solutions.pose import POSE_CONNECTIONS
from mediapipe.python.solutions.pose import PoseLandmark
# pylint: enable=unused-import
BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
def _download_oss_pose_landmark_model(model_complexity):
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
if model_complexity == 0:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
elif model_complexity == 2:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
class Holistic(SolutionBase):
"""MediaPipe Holistic.
MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
right hand landmarks, and face mesh landmarks on the most prominent person
detected.
Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
for usage examples.
"""
def __init__(self,
static_image_mode=False,
model_complexity=1,
smooth_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Holistic object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/holistic#static_image_mode.
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
details in https://solutions.mediapipe.dev/holistic#model_complexity.
smooth_landmarks: Whether to filter landmarks across different input
images to reduce jitter. See details in
https://solutions.mediapipe.dev/holistic#smooth_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
pose landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
"""
_download_oss_pose_landmark_model(model_complexity)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_complexity': model_complexity,
'smooth_landmarks': smooth_landmarks and not static_image_mode,
},
calculator_params={
'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=[
'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
'right_hand_landmarks', 'face_landmarks'
])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple that has five fields describing the landmarks on the most
prominate person detected:
1) "pose_landmarks" field that contains the pose landmarks.
2) "pose_world_landmarks" field that contains the pose landmarks in
real-world 3D coordinates that are in meters with the origin at the
center between hips.
3) "left_hand_landmarks" field that contains the left-hand landmarks.
4) "right_hand_landmarks" field that contains the right-hand landmarks.
5) "face_landmarks" field that contains the face landmarks.
"""
results = super().process(input_data={'image': image})
if results.pose_landmarks:
for landmark in results.pose_landmarks.landmark:
landmark.ClearField('presence')
if results.pose_world_landmarks:
for landmark in results.pose_world_landmarks.landmark:
landmark.ClearField('presence')
return results

View File

@@ -0,0 +1,134 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.pose."""
import os
import tempfile # pylint: disable=unused-import
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import holistic as mp_holistic
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
POSE_DIFF_THRESHOLD = 30 # pixels
HAND_DIFF_THRESHOLD = 30 # pixels
EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
[801, 233], [773, 231], [766, 231],
[759, 232], [802, 242], [751, 239],
[791, 258], [766, 258], [830, 301],
[708, 298], [910, 248], [635, 234],
[954, 161], [593, 136], [961, 137],
[583, 110], [952, 132], [592, 106],
[950, 141], [596, 115], [793, 500],
[724, 502], [874, 626], [640, 629],
[965, 756], [542, 760], [962, 779],
[533, 781], [1025, 797], [487, 803]])
EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
[945, 141], [947, 134], [945, 136],
[939, 122], [935, 113], [931, 106],
[951, 134], [946, 118], [942, 108],
[938, 100], [957, 135], [954, 120],
[951, 111], [948, 103], [964, 138],
[964, 128], [965, 122], [965, 117]])
EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
[613, 103], [617, 96], [596, 100],
[595, 84], [594, 74], [593, 68],
[588, 100], [586, 84], [585, 73],
[584, 65], [581, 103], [579, 89],
[579, 79], [579, 72], [575, 109],
[571, 99], [570, 93], [569, 87]])
class PoseTest(parameterized.TestCase):
def _landmarks_list_to_array(self, landmark_list, image_shape):
rows, cols, _ = image_shape
return np.asarray([(lmk.x * cols, lmk.y * rows)
for lmk in landmark_list.landmark])
def _assert_diff_less(self, array1, array2, threshold):
npt.assert_array_less(np.abs(array1 - array2), threshold)
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
mp_drawing.draw_landmarks(
image=frame,
landmark_list=results.face_landmarks,
landmark_drawing_spec=drawing_spec)
mp_drawing.draw_landmarks(frame, results.left_hand_landmarks,
mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(frame, results.right_hand_landmarks,
mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
mp_holistic.POSE_CONNECTIONS)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_holistic.Holistic() as holistic:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_holistic.Holistic() as holistic:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = holistic.process(image)
self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3),
('static_full', True, 1, 3),
('static_heavy', True, 2, 3),
('video_lite', False, 0, 3),
('video_full', False, 1, 3),
('video_heavy', False, 2, 3))
def test_on_image(self, static_image_mode, model_complexity, num_frames):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/holistic.jpg')
image = cv2.imread(image_path)
with mp_holistic.Holistic(static_image_mode=static_image_mode,
model_complexity=model_complexity) as holistic:
for idx in range(num_frames):
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
self._assert_diff_less(
self._landmarks_list_to_array(results.pose_landmarks, image.shape),
EXPECTED_POSE_LANDMARKS,
POSE_DIFF_THRESHOLD)
self._assert_diff_less(
self._landmarks_list_to_array(results.left_hand_landmarks,
image.shape),
EXPECTED_LEFT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD)
self._assert_diff_less(
self._landmarks_list_to_array(results.right_hand_landmarks,
image.shape),
EXPECTED_RIGHT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD)
# TODO: Verify the correctness of the face landmarks.
self.assertLen(results.face_landmarks.landmark, 468)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,292 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Objectron."""
import enum
from typing import List, Tuple, NamedTuple, Optional
import attr
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import association_calculator_pb2
from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmark_projection_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
from mediapipe.framework.formats import landmark_pb2
from mediapipe.modules.objectron.calculators import annotation_data_pb2
from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
class BoxLandmark(enum.IntEnum):
"""The 9 3D box landmarks."""
#
# 3 + + + + + + + + 7
# +\ +\ UP
# + \ + \
# + \ + \ |
# + 4 + + + + + + + + 8 | y
# + + + + |
# + + + + |
# + + (0) + + .------- x
# + + + + \
# 1 + + + + + + + + 5 + \
# \ + \ + \ z
# \ + \ + \
# \+ \+
# 2 + + + + + + + + 6
CENTER = 0
BACK_BOTTOM_LEFT = 1
FRONT_BOTTOM_LEFT = 2
BACK_TOP_LEFT = 3
FRONT_TOP_LEFT = 4
BACK_BOTTOM_RIGHT = 5
FRONT_BOTTOM_RIGHT = 6
BACK_TOP_RIGHT = 7
FRONT_TOP_RIGHT = 8
BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
BOX_CONNECTIONS = frozenset([
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
(BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
(BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
(BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
])
@attr.s(auto_attribs=True)
class ObjectronModel(object):
model_path: str
label_name: str
@attr.s(auto_attribs=True, frozen=True)
class ShoeModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_sneakers.tflite')
label_name: str = 'Footwear'
@attr.s(auto_attribs=True, frozen=True)
class ChairModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_chair.tflite')
label_name: str = 'Chair'
@attr.s(auto_attribs=True, frozen=True)
class CameraModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_camera.tflite')
label_name: str = 'Camera'
@attr.s(auto_attribs=True, frozen=True)
class CupModel(ObjectronModel):
model_path: str = ('mediapipe/modules/objectron/'
'object_detection_3d_cup.tflite')
label_name: str = 'Coffee cup, Mug'
_MODEL_DICT = {
'Shoe': ShoeModel(),
'Chair': ChairModel(),
'Cup': CupModel(),
'Camera': CameraModel()
}
def _download_oss_objectron_models(objectron_model: str):
"""Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
download_utils.download_oss_model(
'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
)
download_utils.download_oss_model(objectron_model)
def get_model_by_name(name: str) -> ObjectronModel:
if name not in _MODEL_DICT:
raise ValueError(f'{name} is not a valid model name for Objectron.')
_download_oss_objectron_models(_MODEL_DICT[name].model_path)
return _MODEL_DICT[name]
@attr.s(auto_attribs=True)
class ObjectronOutputs(object):
landmarks_2d: landmark_pb2.NormalizedLandmarkList
landmarks_3d: landmark_pb2.LandmarkList
rotation: np.ndarray
translation: np.ndarray
scale: np.ndarray
class Objectron(SolutionBase):
"""MediaPipe Objectron.
MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
and 2D rectangular bounding box of each detected object.
"""
def __init__(self,
static_image_mode: bool = False,
max_num_objects: int = 5,
min_detection_confidence: float = 0.5,
min_tracking_confidence: float = 0.99,
model_name: str = 'Shoe',
focal_length: Tuple[float, float] = (1.0, 1.0),
principal_point: Tuple[float, float] = (0.0, 0.0),
image_size: Optional[Tuple[int, int]] = None,
):
"""Initializes a MediaPipe Objectron class.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream.
max_num_objects: Maximum number of objects to detect.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
detection to be considered successful.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
box landmarks to be considered tracked successfully.
model_name: Name of model to use for predicting box landmarks, currently
support {'Shoe', 'Chair', 'Cup', 'Camera'}.
focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
should provide image_size = (image_width, image_height) to enable
conversions inside the API.
principal_point: Camera principal point (px, py), by default is defined in
NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
users should provide image_size = (image_width, image_height) to enable
conversions inside the API.
image_size (Optional): size (image_width, image_height) of the input image
, ONLY needed when use focal_length and principal_point in pixel space.
Raises:
ConnectionError: If the objectron open source model can't be downloaded
from the MediaPipe Github repo.
"""
# Get Camera parameters.
fx, fy = focal_length
px, py = principal_point
if image_size is not None:
half_width = image_size[0] / 2.0
half_height = image_size[1] / 2.0
fx = fx / half_width
fy = fy / half_height
px = - (px - half_width) / half_width
py = - (py - half_height) / half_height
# Create and init model.
model = get_model_by_name(model_name)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'box_landmark_model_path': model.model_path,
'allowed_labels': model.label_name,
'max_num_objects': max_num_objects,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
('objectdetectionoidv4subgraph'
'__TensorsToDetectionsCalculator.min_score_thresh'):
min_detection_confidence,
('boxlandmarksubgraph__ThresholdingCalculator'
'.threshold'):
min_tracking_confidence,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_focal_x'): fx,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_focal_y'): fy,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_principal_point_x'): px,
('Lift2DFrameAnnotationTo3DCalculator'
'.normalized_principal_point_y'): py,
},
outputs=['detected_objects'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "detected_objects" field that contains a list
of detected 3D bounding boxes. Each detected box is represented as an
"ObjectronOutputs" instance.
"""
results = super().process(input_data={'image': image})
if results.detected_objects:
results.detected_objects = self._convert_format(results.detected_objects)
else:
results.detected_objects = None
return results
def _convert_format(
self,
inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
new_outputs = list()
for annotation in inputs.annotations:
# Get 3d object pose.
rotation = np.reshape(np.array(annotation.rotation), (3, 3))
translation = np.array(annotation.translation)
scale = np.array(annotation.scale)
# Get 2d/3d landmakrs.
landmarks_2d = landmark_pb2.NormalizedLandmarkList()
landmarks_3d = landmark_pb2.LandmarkList()
for keypoint in annotation.keypoints:
point_2d = keypoint.point_2d
landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
point_3d = keypoint.point_3d
landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
# Add to objectron outputs.
new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
rotation, translation, scale=scale))
return new_outputs

View File

@@ -0,0 +1,81 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.objectron."""
import os
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
from mediapipe.python.solutions import objectron as mp_objectron
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 30 # pixels
EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
[383, 505], [80, 478], [408, 345],
[130, 347], [384, 355], [72, 353]],
[[241, 206], [411, 279], [131, 280],
[392, 249], [78, 252], [412, 155],
[140, 178], [396, 105], [89, 137]]]
class ObjectronTest(parameterized.TestCase):
def test_invalid_image_shape(self):
with mp_objectron.Objectron() as objectron:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_objectron.Objectron() as objectron:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = objectron.process(image)
self.assertIsNone(results.detected_objects)
@parameterized.named_parameters(('static_image_mode', True, 1),
('video_mode', False, 5))
def test_multi_objects(self, static_image_mode, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
image = cv2.imread(image_path)
with mp_objectron.Objectron(
static_image_mode=static_image_mode,
max_num_objects=2,
min_detection_confidence=0.5) as objectron:
for _ in range(num_frames):
results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
multi_box_coordinates = []
for detected_object in results.detected_objects:
landmarks = detected_object.landmarks_2d
self.assertLen(landmarks.landmark, 9)
x = [landmark.x for landmark in landmarks.landmark]
y = [landmark.y for landmark in landmarks.landmark]
box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
multi_box_coordinates.append(box_coordinates)
self.assertLen(multi_box_coordinates, 2)
prediction_error = np.abs(
np.asarray(multi_box_coordinates) -
np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,216 @@
# Copyright 2020-2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Pose."""
import enum
from typing import NamedTuple
import numpy as np
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import gate_calculator_pb2
from mediapipe.calculators.core import split_vector_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.calculators.util import logic_calculator_pb2
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
from mediapipe.calculators.util import rect_transformation_calculator_pb2
from mediapipe.calculators.util import thresholding_calculator_pb2
from mediapipe.calculators.util import visibility_smoothing_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
from mediapipe.python.solutions import download_utils
class PoseLandmark(enum.IntEnum):
"""The 33 pose landmarks."""
NOSE = 0
LEFT_EYE_INNER = 1
LEFT_EYE = 2
LEFT_EYE_OUTER = 3
RIGHT_EYE_INNER = 4
RIGHT_EYE = 5
RIGHT_EYE_OUTER = 6
LEFT_EAR = 7
RIGHT_EAR = 8
MOUTH_LEFT = 9
MOUTH_RIGHT = 10
LEFT_SHOULDER = 11
RIGHT_SHOULDER = 12
LEFT_ELBOW = 13
RIGHT_ELBOW = 14
LEFT_WRIST = 15
RIGHT_WRIST = 16
LEFT_PINKY = 17
RIGHT_PINKY = 18
LEFT_INDEX = 19
RIGHT_INDEX = 20
LEFT_THUMB = 21
RIGHT_THUMB = 22
LEFT_HIP = 23
RIGHT_HIP = 24
LEFT_KNEE = 25
RIGHT_KNEE = 26
LEFT_ANKLE = 27
RIGHT_ANKLE = 28
LEFT_HEEL = 29
RIGHT_HEEL = 30
LEFT_FOOT_INDEX = 31
RIGHT_FOOT_INDEX = 32
BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
POSE_CONNECTIONS = frozenset([
(PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
(PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
(PoseLandmark.RIGHT_EYE, PoseLandmark.RIGHT_EYE_OUTER),
(PoseLandmark.RIGHT_EYE_OUTER, PoseLandmark.RIGHT_EAR),
(PoseLandmark.NOSE, PoseLandmark.LEFT_EYE_INNER),
(PoseLandmark.LEFT_EYE_INNER, PoseLandmark.LEFT_EYE),
(PoseLandmark.LEFT_EYE, PoseLandmark.LEFT_EYE_OUTER),
(PoseLandmark.LEFT_EYE_OUTER, PoseLandmark.LEFT_EAR),
(PoseLandmark.MOUTH_RIGHT, PoseLandmark.MOUTH_LEFT),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.LEFT_SHOULDER),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_ELBOW),
(PoseLandmark.RIGHT_ELBOW, PoseLandmark.RIGHT_WRIST),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_PINKY),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_INDEX),
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_THUMB),
(PoseLandmark.RIGHT_PINKY, PoseLandmark.RIGHT_INDEX),
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_ELBOW),
(PoseLandmark.LEFT_ELBOW, PoseLandmark.LEFT_WRIST),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_PINKY),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_INDEX),
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_THUMB),
(PoseLandmark.LEFT_PINKY, PoseLandmark.LEFT_INDEX),
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_HIP),
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_HIP),
(PoseLandmark.RIGHT_HIP, PoseLandmark.LEFT_HIP),
(PoseLandmark.RIGHT_HIP, PoseLandmark.RIGHT_KNEE),
(PoseLandmark.LEFT_HIP, PoseLandmark.LEFT_KNEE),
(PoseLandmark.RIGHT_KNEE, PoseLandmark.RIGHT_ANKLE),
(PoseLandmark.LEFT_KNEE, PoseLandmark.LEFT_ANKLE),
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_HEEL),
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_HEEL),
(PoseLandmark.RIGHT_HEEL, PoseLandmark.RIGHT_FOOT_INDEX),
(PoseLandmark.LEFT_HEEL, PoseLandmark.LEFT_FOOT_INDEX),
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_FOOT_INDEX),
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_FOOT_INDEX),
])
def _download_oss_pose_landmark_model(model_complexity):
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
if model_complexity == 0:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
elif model_complexity == 2:
download_utils.download_oss_model(
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
class Pose(SolutionBase):
"""MediaPipe Pose.
MediaPipe Pose processes an RGB image and returns pose landmarks on the most
prominent person detected.
Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
usage examples.
"""
def __init__(self,
static_image_mode=False,
model_complexity=1,
smooth_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Pose object.
Args:
static_image_mode: Whether to treat the input images as a batch of static
and possibly unrelated images, or a video stream. See details in
https://solutions.mediapipe.dev/pose#static_image_mode.
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
details in https://solutions.mediapipe.dev/pose#model_complexity.
smooth_landmarks: Whether to filter landmarks across different input
images to reduce jitter. See details in
https://solutions.mediapipe.dev/pose#smooth_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in
https://solutions.mediapipe.dev/pose#min_detection_confidence.
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
pose landmarks to be considered tracked successfully. See details in
https://solutions.mediapipe.dev/pose#min_tracking_confidence.
"""
_download_oss_pose_landmark_model(model_complexity)
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_complexity': model_complexity,
'smooth_landmarks': smooth_landmarks and not static_image_mode,
},
calculator_params={
'ConstantSidePacketCalculator.packet': [
constant_side_packet_calculator_pb2
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
bool_value=not static_image_mode)
],
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
min_detection_confidence,
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['pose_landmarks', 'pose_world_landmarks'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the pose landmarks on the most prominent person detected.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple that has two fields describing the landmarks on the most
prominate person detected:
1) "pose_landmarks" field that contains the pose landmarks.
2) "pose_world_landmarks" field that contains the pose landmarks in
real-world 3D coordinates that are in meters with the origin at the
center between hips.
"""
results = super().process(input_data={'image': image})
if results.pose_landmarks:
for landmark in results.pose_landmarks.landmark:
landmark.ClearField('presence')
if results.pose_world_landmarks:
for landmark in results.pose_world_landmarks.landmark:
landmark.ClearField('presence')
return results

View File

@@ -0,0 +1,197 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.pose."""
import json
import os
import tempfile
from typing import NamedTuple
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
import numpy.testing as npt
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import drawing_utils as mp_drawing
from mediapipe.python.solutions import pose as mp_pose
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
DIFF_THRESHOLD = 15 # pixels
EXPECTED_POSE_LANDMARKS = np.array([[460, 283], [467, 273], [471, 273],
[474, 273], [465, 273], [465, 273],
[466, 273], [491, 277], [480, 277],
[470, 294], [465, 294], [545, 319],
[453, 329], [622, 323], [375, 316],
[696, 316], [299, 307], [719, 316],
[278, 306], [721, 311], [274, 304],
[713, 313], [283, 306], [520, 476],
[467, 471], [612, 550], [358, 490],
[701, 613], [349, 611], [709, 624],
[363, 630], [730, 633], [303, 628]])
WORLD_DIFF_THRESHOLD = 0.2 # meters
EXPECTED_POSE_WORLD_LANDMARKS = np.array([
[-0.11, -0.59, -0.15], [-0.09, -0.64, -0.16], [-0.09, -0.64, -0.16],
[-0.09, -0.64, -0.16], [-0.11, -0.64, -0.14], [-0.11, -0.64, -0.14],
[-0.11, -0.64, -0.14], [0.01, -0.65, -0.15], [-0.06, -0.64, -0.05],
[-0.07, -0.57, -0.15], [-0.09, -0.57, -0.12], [0.18, -0.49, -0.09],
[-0.14, -0.5, -0.03], [0.41, -0.48, -0.11], [-0.42, -0.5, -0.02],
[0.64, -0.49, -0.17], [-0.63, -0.51, -0.13], [0.7, -0.5, -0.19],
[-0.71, -0.53, -0.15], [0.72, -0.51, -0.23], [-0.69, -0.54, -0.19],
[0.66, -0.49, -0.19], [-0.64, -0.52, -0.15], [0.09, 0., -0.04],
[-0.09, -0., 0.03], [0.41, 0.23, -0.09], [-0.43, 0.1, -0.11],
[0.69, 0.49, -0.04], [-0.48, 0.47, -0.02], [0.72, 0.52, -0.04],
[-0.48, 0.51, -0.02], [0.8, 0.5, -0.14], [-0.59, 0.52, -0.11],
])
class PoseTest(parameterized.TestCase):
def _landmarks_list_to_array(self, landmark_list, image_shape):
rows, cols, _ = image_shape
return np.asarray([(lmk.x * cols, lmk.y * rows, lmk.z * cols)
for lmk in landmark_list.landmark])
def _world_landmarks_list_to_array(self, landmark_list):
return np.asarray([(lmk.x, lmk.y, lmk.z)
for lmk in landmark_list.landmark])
def _assert_diff_less(self, array1, array2, threshold):
npt.assert_array_less(np.abs(array1 - array2), threshold)
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
mp_pose.POSE_CONNECTIONS)
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
'_frame_{}.png'.format(idx))
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_pose.Pose() as pose:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_pose.Pose() as pose:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = pose.process(image)
self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3),
('static_full', True, 1, 3),
('static_heavy', True, 2, 3),
('video_lite', False, 0, 3),
('video_full', False, 1, 3),
('video_heavy', False, 2, 3))
def test_on_image(self, static_image_mode, model_complexity, num_frames):
image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
image = cv2.imread(image_path)
with mp_pose.Pose(static_image_mode=static_image_mode,
model_complexity=model_complexity) as pose:
for idx in range(num_frames):
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# TODO: Add rendering of world 3D when supported.
self._annotate(image.copy(), results, idx)
self._assert_diff_less(
self._landmarks_list_to_array(results.pose_landmarks,
image.shape)[:, :2],
EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
self._assert_diff_less(
self._world_landmarks_list_to_array(results.pose_world_landmarks),
EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
@parameterized.named_parameters(
('full', 1, 'pose_squats.full.npz'))
def test_on_video(self, model_complexity, expected_name):
"""Tests pose models on a video."""
# If set to `True` will dump actual predictions to .npz and JSON files.
dump_predictions = False
# Set threshold for comparing actual and expected predictions in pixels.
diff_threshold = 15
world_diff_threshold = 0.1
video_path = os.path.join(os.path.dirname(__file__),
'testdata/pose_squats.mp4')
expected_path = os.path.join(os.path.dirname(__file__),
'testdata/{}'.format(expected_name))
# Predict pose landmarks for each frame.
video_cap = cv2.VideoCapture(video_path)
actual_per_frame = []
actual_world_per_frame = []
frame_idx = 0
with mp_pose.Pose(static_image_mode=False,
model_complexity=model_complexity) as pose:
while True:
# Get next frame of the video.
success, input_frame = video_cap.read()
if not success:
break
# Run pose tracker.
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
result = pose.process(image=input_frame)
pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
input_frame.shape)
pose_world_landmarks = self._world_landmarks_list_to_array(
result.pose_world_landmarks)
actual_per_frame.append(pose_landmarks)
actual_world_per_frame.append(pose_world_landmarks)
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
self._annotate(input_frame, result, frame_idx)
frame_idx += 1
actual = np.array(actual_per_frame)
actual_world = np.array(actual_world_per_frame)
if dump_predictions:
# Dump .npz
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
np.savez(tmp_file, predictions=actual, predictions_world=actual_world)
print('Predictions saved as .npz to {}'.format(tmp_file.name))
# Dump JSON
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
with open(tmp_file.name, 'w') as fl:
dump_data = {
'predictions': np.around(actual, 3).tolist(),
'predictions_world': np.around(actual_world, 3).tolist()
}
fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
print('Predictions saved as JSON to {}'.format(tmp_file.name))
# Validate actual vs. expected landmarks.
expected = np.load(expected_path)['predictions']
assert actual.shape == expected.shape, (
'Unexpected shape of predictions: {} instead of {}'.format(
actual.shape, expected.shape))
self._assert_diff_less(
actual[..., :2], expected[..., :2], threshold=diff_threshold)
# Validate actual vs. expected world landmarks.
expected_world = np.load(expected_path)['predictions_world']
assert actual_world.shape == expected_world.shape, (
'Unexpected shape of world predictions: {} instead of {}'.format(
actual_world.shape, expected_world.shape))
self._assert_diff_less(
actual_world, expected_world, threshold=world_diff_threshold)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,76 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MediaPipe Selfie Segmentation."""
from typing import NamedTuple
import numpy as np
# The following imports are needed because python pb2 silently discards
# unknown protobuf fields.
# pylint: disable=unused-import
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
from mediapipe.calculators.tensor import inference_calculator_pb2
from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
from mediapipe.calculators.util import local_file_contents_calculator_pb2
from mediapipe.framework.tool import switch_container_pb2
# pylint: enable=unused-import
from mediapipe.python.solution_base import SolutionBase
BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
class SelfieSegmentation(SolutionBase):
"""MediaPipe Selfie Segmentation.
MediaPipe Selfie Segmentation processes an RGB image and returns a
segmentation mask.
Please refer to
https://solutions.mediapipe.dev/selfie_segmentation#python-solution-api for
usage examples.
"""
def __init__(self, model_selection=0):
"""Initializes a MediaPipe Selfie Segmentation object.
Args:
model_selection: 0 or 1. 0 to select a general-purpose model, and 1 to
select a model more optimized for landscape images. See details in
https://solutions.mediapipe.dev/selfie_segmentation#model_selection.
"""
super().__init__(
binary_graph_path=BINARYPB_FILE_PATH,
side_inputs={
'model_selection': model_selection,
},
outputs=['segmentation_mask'])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns a segmentation mask.
Args:
image: An RGB image represented as a numpy ndarray.
Raises:
RuntimeError: If the underlying graph throws any error.
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with a "segmentation_mask" field that contains a float
type 2d np array representing the mask.
"""
return super().process(input_data={'image': image})

View File

@@ -0,0 +1,68 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python.solutions.selfie_segmentation."""
import os
from absl.testing import absltest
from absl.testing import parameterized
import cv2
import numpy as np
# resources dependency
# undeclared dependency
from mediapipe.python.solutions import selfie_segmentation as mp_selfie_segmentation
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
class SelfieSegmentationTest(parameterized.TestCase):
def _draw(self, frame: np.ndarray, mask: np.ndarray):
frame = np.minimum(frame, np.stack((mask,) * 3, axis=-1))
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + '.png')
cv2.imwrite(path, frame)
def test_invalid_image_shape(self):
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
with self.assertRaisesRegex(
ValueError, 'Input image must contain three channel rgb data.'):
selfie_segmentation.process(
np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def test_blank_image(self):
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
image = np.zeros([100, 100, 3], dtype=np.uint8)
image.fill(255)
results = selfie_segmentation.process(image)
normalized_segmentation_mask = (results.segmentation_mask *
255).astype(int)
self.assertLess(np.amax(normalized_segmentation_mask), 1)
@parameterized.named_parameters(('general', 0), ('landscape', 1))
def test_segmentation(self, model_selection):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/portrait.jpg')
image = cv2.imread(image_path)
with mp_selfie_segmentation.SelfieSegmentation(
model_selection=model_selection) as selfie_segmentation:
results = selfie_segmentation.process(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
normalized_segmentation_mask = (results.segmentation_mask *
255).astype(int)
self._draw(image.copy(), normalized_segmentation_mask)
if __name__ == '__main__':
absltest.main()

View File

@@ -0,0 +1,75 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mediapipe.python._framework_bindings.timestamp."""
import time
from absl.testing import absltest
import mediapipe as mp
class TimestampTest(absltest.TestCase):
def test_timestamp(self):
t = mp.Timestamp(100)
self.assertEqual(t.value, 100)
self.assertEqual(t, 100)
self.assertEqual(str(t), '<mediapipe.Timestamp with value: 100>')
def test_timestamp_copy_constructor(self):
ts1 = mp.Timestamp(100)
ts2 = mp.Timestamp(ts1)
self.assertEqual(ts1, ts2)
def test_timestamp_comparsion(self):
ts1 = mp.Timestamp(100)
ts2 = mp.Timestamp(100)
self.assertEqual(ts1, ts2)
ts3 = mp.Timestamp(200)
self.assertNotEqual(ts1, ts3)
def test_timestamp_special_values(self):
t1 = mp.Timestamp.UNSET
self.assertEqual(str(t1), '<mediapipe.Timestamp with value: UNSET>')
t2 = mp.Timestamp.UNSTARTED
self.assertEqual(str(t2), '<mediapipe.Timestamp with value: UNSTARTED>')
t3 = mp.Timestamp.PRESTREAM
self.assertEqual(str(t3), '<mediapipe.Timestamp with value: PRESTREAM>')
t4 = mp.Timestamp.MIN
self.assertEqual(str(t4), '<mediapipe.Timestamp with value: MIN>')
t5 = mp.Timestamp.MAX
self.assertEqual(str(t5), '<mediapipe.Timestamp with value: MAX>')
t6 = mp.Timestamp.POSTSTREAM
self.assertEqual(str(t6), '<mediapipe.Timestamp with value: POSTSTREAM>')
t7 = mp.Timestamp.DONE
self.assertEqual(str(t7), '<mediapipe.Timestamp with value: DONE>')
def test_timestamp_comparisons(self):
ts1 = mp.Timestamp(100)
ts2 = mp.Timestamp(101)
self.assertGreater(ts2, ts1)
self.assertGreaterEqual(ts2, ts1)
self.assertLess(ts1, ts2)
self.assertLessEqual(ts1, ts2)
self.assertNotEqual(ts1, ts2)
def test_from_seconds(self):
now = time.time()
ts = mp.Timestamp.from_seconds(now)
self.assertAlmostEqual(now, ts.seconds(), delta=1)
if __name__ == '__main__':
absltest.main()