Initial commit
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Python API."""
|
||||
|
||||
from mediapipe.python._framework_bindings import resource_util
|
||||
from mediapipe.python._framework_bindings.calculator_graph import CalculatorGraph
|
||||
from mediapipe.python._framework_bindings.calculator_graph import GraphInputStreamAddMode
|
||||
from mediapipe.python._framework_bindings.image import Image
|
||||
from mediapipe.python._framework_bindings.image_frame import ImageFormat
|
||||
from mediapipe.python._framework_bindings.image_frame import ImageFrame
|
||||
from mediapipe.python._framework_bindings.matrix import Matrix
|
||||
from mediapipe.python._framework_bindings.packet import Packet
|
||||
from mediapipe.python._framework_bindings.timestamp import Timestamp
|
||||
from mediapipe.python._framework_bindings.validated_graph_config import ValidatedGraphConfig
|
||||
import mediapipe.python.packet_creator
|
||||
import mediapipe.python.packet_getter
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,216 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python._framework_bindings.calculator_graph."""
|
||||
|
||||
# Dependency imports
|
||||
|
||||
from absl.testing import absltest
|
||||
import mediapipe as mp
|
||||
from google.protobuf import text_format
|
||||
from mediapipe.framework import calculator_pb2
|
||||
|
||||
|
||||
class GraphTest(absltest.TestCase):
|
||||
|
||||
def test_invalid_binary_graph_file(self):
|
||||
with self.assertRaisesRegex(
|
||||
FileNotFoundError,
|
||||
'(No such file or directory|The path does not exist)'):
|
||||
mp.CalculatorGraph(binary_graph_path='/tmp/abc.binarypb')
|
||||
|
||||
def test_invalid_node_config(self):
|
||||
text_config = """
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
text_format.Parse(text_config, config_proto)
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'Input and output streams to PassThroughCalculator must use matching tags and indexes.'
|
||||
):
|
||||
mp.CalculatorGraph(graph_config=config_proto)
|
||||
|
||||
def test_invalid_calculator_type(self):
|
||||
text_config = """
|
||||
node {
|
||||
calculator: 'SomeUnknownCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
text_format.Parse(text_config, config_proto)
|
||||
with self.assertRaisesRegex(
|
||||
RuntimeError, 'Unable to find Calculator \"SomeUnknownCalculator\"'):
|
||||
mp.CalculatorGraph(graph_config=config_proto)
|
||||
|
||||
def test_graph_initialized_with_proto_config(self):
|
||||
text_config = """
|
||||
max_queue_size: 1
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
text_format.Parse(text_config, config_proto)
|
||||
graph = mp.CalculatorGraph(graph_config=config_proto)
|
||||
|
||||
hello_world_packet = mp.packet_creator.create_string('hello world')
|
||||
out = []
|
||||
graph = mp.CalculatorGraph(graph_config=config_proto)
|
||||
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
|
||||
graph.start_run()
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet, timestamp=0)
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet.at(1))
|
||||
graph.close()
|
||||
self.assertEqual(graph.graph_input_stream_add_mode,
|
||||
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
|
||||
self.assertEqual(graph.max_queue_size, 1)
|
||||
self.assertFalse(graph.has_error())
|
||||
self.assertLen(out, 2)
|
||||
self.assertEqual(out[0].timestamp, 0)
|
||||
self.assertEqual(out[1].timestamp, 1)
|
||||
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
|
||||
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
|
||||
|
||||
def test_graph_initialized_with_text_config(self):
|
||||
text_config = """
|
||||
max_queue_size: 1
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
|
||||
hello_world_packet = mp.packet_creator.create_string('hello world')
|
||||
out = []
|
||||
graph = mp.CalculatorGraph(graph_config=text_config)
|
||||
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
|
||||
graph.start_run()
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet.at(0))
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet, timestamp=1)
|
||||
graph.close()
|
||||
self.assertEqual(graph.graph_input_stream_add_mode,
|
||||
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
|
||||
self.assertEqual(graph.max_queue_size, 1)
|
||||
self.assertFalse(graph.has_error())
|
||||
self.assertLen(out, 2)
|
||||
self.assertEqual(out[0].timestamp, 0)
|
||||
self.assertEqual(out[1].timestamp, 1)
|
||||
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
|
||||
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
|
||||
|
||||
def test_graph_validation_and_initialization(self):
|
||||
text_config = """
|
||||
max_queue_size: 1
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
|
||||
hello_world_packet = mp.packet_creator.create_string('hello world')
|
||||
out = []
|
||||
validated_graph_config = mp.ValidatedGraphConfig()
|
||||
self.assertFalse(validated_graph_config.initialized())
|
||||
validated_graph_config.initialize(graph_config=text_config)
|
||||
self.assertTrue(validated_graph_config.initialized())
|
||||
|
||||
graph = mp.CalculatorGraph(validated_graph_config=validated_graph_config)
|
||||
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
|
||||
graph.start_run()
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet.at(0))
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet, timestamp=1)
|
||||
graph.close()
|
||||
self.assertEqual(graph.graph_input_stream_add_mode,
|
||||
mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL)
|
||||
self.assertEqual(graph.max_queue_size, 1)
|
||||
self.assertFalse(graph.has_error())
|
||||
self.assertLen(out, 2)
|
||||
self.assertEqual(out[0].timestamp, 0)
|
||||
self.assertEqual(out[1].timestamp, 1)
|
||||
self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world')
|
||||
self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world')
|
||||
|
||||
def test_insert_packets_with_same_timestamp(self):
|
||||
text_config = """
|
||||
max_queue_size: 1
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
"""
|
||||
config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
text_format.Parse(text_config, config_proto)
|
||||
|
||||
hello_world_packet = mp.packet_creator.create_string('hello world')
|
||||
out = []
|
||||
graph = mp.CalculatorGraph(graph_config=config_proto)
|
||||
graph.observe_output_stream('out', lambda _, packet: out.append(packet))
|
||||
graph.start_run()
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet.at(0))
|
||||
graph.wait_until_idle()
|
||||
graph.add_packet_to_input_stream(
|
||||
stream='in', packet=hello_world_packet.at(0))
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Current minimum expected timestamp is 1 but received 0.'):
|
||||
graph.wait_until_idle()
|
||||
|
||||
def test_side_packet_graph(self):
|
||||
text_config = """
|
||||
node {
|
||||
calculator: 'StringToUint64Calculator'
|
||||
input_side_packet: "string"
|
||||
output_side_packet: "number"
|
||||
}
|
||||
"""
|
||||
config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
text_format.Parse(text_config, config_proto)
|
||||
graph = mp.CalculatorGraph(graph_config=config_proto)
|
||||
graph.start_run(
|
||||
input_side_packets={'string': mp.packet_creator.create_string('42')})
|
||||
graph.wait_until_done()
|
||||
self.assertFalse(graph.has_error())
|
||||
self.assertEqual(
|
||||
mp.packet_getter.get_uint(graph.get_output_side_packet('number')), 42)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,186 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python._framework_bindings.image_frame."""
|
||||
|
||||
import gc
|
||||
import random
|
||||
import sys
|
||||
from absl.testing import absltest
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
|
||||
|
||||
# TODO: Add unit tests specifically for memory management.
|
||||
class ImageFrameTest(absltest.TestCase):
|
||||
|
||||
def test_create_image_frame_from_gray_cv_mat(self):
|
||||
w, h = random.randrange(3, 100), random.randrange(3, 100)
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2GRAY)
|
||||
mat[2, 2] = 42
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.GRAY8, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
|
||||
print(image_frame[w, h, 1])
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[w, h])
|
||||
self.assertEqual(42, image_frame[2, 2])
|
||||
|
||||
def test_create_image_frame_from_rgb_cv_mat(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
mat[2, 2, 1] = 42
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[w, h, channels])
|
||||
self.assertEqual(42, image_frame[2, 2, 1])
|
||||
|
||||
def test_create_image_frame_from_rgb48_cv_mat(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
mat[2, 2, 1] = 42
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB48, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[w, h, channels])
|
||||
self.assertEqual(42, image_frame[2, 2, 1])
|
||||
|
||||
def test_create_image_frame_from_gray_pil_image(self):
|
||||
w, h = random.randrange(3, 100), random.randrange(3, 100)
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**8 - 1, size=(h, w), dtype=np.uint8), 'L')
|
||||
image_frame = mp.ImageFrame(
|
||||
image_format=mp.ImageFormat.GRAY8, data=np.asarray(img))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
|
||||
print(image_frame[w, h, 1])
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[w, h])
|
||||
|
||||
def test_create_image_frame_from_rgb_pil_image(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
'RGB')
|
||||
image_frame = mp.ImageFrame(
|
||||
image_format=mp.ImageFormat.SRGB, data=np.asarray(img))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[w, h, channels])
|
||||
|
||||
def test_create_image_frame_from_rgba64_pil_image(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 4
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
|
||||
'RGBA')
|
||||
image_frame = mp.ImageFrame(
|
||||
image_format=mp.ImageFormat.SRGBA64,
|
||||
data=np.asarray(img).astype(np.uint16))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image_frame[1000, 1000, 1000])
|
||||
|
||||
def test_image_frame_numby_view(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
output_ndarray = image_frame.numpy_view()
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
# The output of numpy_view() is a reference to the internal data and it's
|
||||
# unwritable after creation.
|
||||
with self.assertRaisesRegex(ValueError,
|
||||
'assignment destination is read-only'):
|
||||
output_ndarray[0, 0, 0] = 0
|
||||
copied_ndarray = np.copy(output_ndarray)
|
||||
copied_ndarray[0, 0, 0] = 0
|
||||
|
||||
def test_cropped_gray8_image(self):
|
||||
w, h = random.randrange(20, 100), random.randrange(20, 100)
|
||||
channels, offset = 3, 10
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2GRAY)
|
||||
image_frame = mp.ImageFrame(
|
||||
image_format=mp.ImageFormat.GRAY8,
|
||||
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset]))
|
||||
self.assertTrue(
|
||||
np.array_equal(mat[offset:-offset, offset:-offset],
|
||||
image_frame.numpy_view()))
|
||||
|
||||
def test_cropped_rgb_image(self):
|
||||
w, h = random.randrange(20, 100), random.randrange(20, 100)
|
||||
channels, offset = 3, 10
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
image_frame = mp.ImageFrame(
|
||||
image_format=mp.ImageFormat.SRGB,
|
||||
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset, :]))
|
||||
self.assertTrue(
|
||||
np.array_equal(mat[offset:-offset, offset:-offset, :],
|
||||
image_frame.numpy_view()))
|
||||
|
||||
# For image frames that store contiguous data, the output of numpy_view()
|
||||
# points to the pixel data of the original image frame object. The life cycle
|
||||
# of the data array should tie to the image frame object.
|
||||
def test_image_frame_numpy_view_with_contiguous_data(self):
|
||||
w, h = 640, 480
|
||||
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertTrue(image_frame.is_contiguous())
|
||||
initial_ref_count = sys.getrefcount(image_frame)
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
# Get 2 data array objects and verify that the image frame's ref count is
|
||||
# increased by 2.
|
||||
np_view = image_frame.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count + 1)
|
||||
np_view2 = image_frame.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count + 2)
|
||||
del np_view
|
||||
del np_view2
|
||||
gc.collect()
|
||||
# After the two data array objects getting destroyed, the current ref count
|
||||
# should euqal to the initial ref count.
|
||||
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
|
||||
|
||||
# For image frames that store non contiguous data, the output of numpy_view()
|
||||
# stores a copy of the pixel data of the image frame object. The life cycle of
|
||||
# the data array doesn't tie to the image frame object.
|
||||
def test_image_frame_numpy_view_with_non_contiguous_data(self):
|
||||
w, h = 641, 481
|
||||
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertFalse(image_frame.is_contiguous())
|
||||
initial_ref_count = sys.getrefcount(image_frame)
|
||||
self.assertTrue(np.array_equal(mat, image_frame.numpy_view()))
|
||||
np_view = image_frame.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
|
||||
del np_view
|
||||
gc.collect()
|
||||
self.assertEqual(sys.getrefcount(image_frame), initial_ref_count)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
183
.venv/lib/python3.9/site-packages/mediapipe/python/image_test.py
Normal file
183
.venv/lib/python3.9/site-packages/mediapipe/python/image_test.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python._framework_bindings.image."""
|
||||
|
||||
import gc
|
||||
import random
|
||||
import sys
|
||||
from absl.testing import absltest
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
|
||||
|
||||
# TODO: Add unit tests specifically for memory management.
|
||||
class ImageTest(absltest.TestCase):
|
||||
|
||||
def test_create_image_from_gray_cv_mat(self):
|
||||
w, h = random.randrange(3, 100), random.randrange(3, 100)
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2GRAY)
|
||||
mat[2, 2] = 42
|
||||
image = mp.Image(image_format=mp.ImageFormat.GRAY8, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
|
||||
print(image[w, h, 1])
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[w, h])
|
||||
self.assertEqual(42, image[2, 2])
|
||||
|
||||
def test_create_image_from_rgb_cv_mat(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
mat[2, 2, 1] = 42
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[w, h, channels])
|
||||
self.assertEqual(42, image[2, 2, 1])
|
||||
|
||||
def test_create_image_from_rgb48_cv_mat(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
mat[2, 2, 1] = 42
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB48, data=mat)
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[w, h, channels])
|
||||
self.assertEqual(42, image[2, 2, 1])
|
||||
|
||||
def test_create_image_from_gray_pil_image(self):
|
||||
w, h = random.randrange(3, 100), random.randrange(3, 100)
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**8 - 1, size=(h, w), dtype=np.uint8), 'L')
|
||||
image = mp.Image(image_format=mp.ImageFormat.GRAY8, data=np.asarray(img))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'index dimension mismatch'):
|
||||
print(image[w, h, 1])
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[w, h])
|
||||
|
||||
def test_create_image_from_rgb_pil_image(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
'RGB')
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(img))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[w, h, channels])
|
||||
|
||||
def test_create_image_from_rgba64_pil_image(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 4
|
||||
img = PIL.Image.fromarray(
|
||||
np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16),
|
||||
'RGBA')
|
||||
image = mp.Image(
|
||||
image_format=mp.ImageFormat.SRGBA64,
|
||||
data=np.asarray(img).astype(np.uint16))
|
||||
self.assertTrue(np.array_equal(np.asarray(img), image.numpy_view()))
|
||||
with self.assertRaisesRegex(IndexError, 'out of bounds'):
|
||||
print(image[1000, 1000, 1000])
|
||||
|
||||
def test_image_numby_view(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
output_ndarray = image.numpy_view()
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
# The output of numpy_view() is a reference to the internal data and it's
|
||||
# unwritable after creation.
|
||||
with self.assertRaisesRegex(ValueError,
|
||||
'assignment destination is read-only'):
|
||||
output_ndarray[0, 0, 0] = 0
|
||||
copied_ndarray = np.copy(output_ndarray)
|
||||
copied_ndarray[0, 0, 0] = 0
|
||||
|
||||
def test_cropped_gray8_image(self):
|
||||
w, h = random.randrange(20, 100), random.randrange(20, 100)
|
||||
channels, offset = 3, 10
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2GRAY)
|
||||
image = mp.Image(
|
||||
image_format=mp.ImageFormat.GRAY8,
|
||||
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset]))
|
||||
self.assertTrue(
|
||||
np.array_equal(mat[offset:-offset, offset:-offset], image.numpy_view()))
|
||||
|
||||
def test_cropped_rgb_image(self):
|
||||
w, h = random.randrange(20, 100), random.randrange(20, 100)
|
||||
channels, offset = 3, 10
|
||||
mat = cv2.cvtColor(
|
||||
np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8),
|
||||
cv2.COLOR_RGB2BGR)
|
||||
image = mp.Image(
|
||||
image_format=mp.ImageFormat.SRGB,
|
||||
data=np.ascontiguousarray(mat[offset:-offset, offset:-offset, :]))
|
||||
self.assertTrue(
|
||||
np.array_equal(mat[offset:-offset, offset:-offset, :],
|
||||
image.numpy_view()))
|
||||
|
||||
# For image frames that store contiguous data, the output of numpy_view()
|
||||
# points to the pixel data of the original image frame object. The life cycle
|
||||
# of the data array should tie to the image frame object.
|
||||
def test_image_numpy_view_with_contiguous_data(self):
|
||||
w, h = 640, 480
|
||||
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertTrue(image.is_contiguous())
|
||||
initial_ref_count = sys.getrefcount(image)
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
# Get 2 data array objects and verify that the image frame's ref count is
|
||||
# increased by 2.
|
||||
np_view = image.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image), initial_ref_count + 1)
|
||||
np_view2 = image.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image), initial_ref_count + 2)
|
||||
del np_view
|
||||
del np_view2
|
||||
gc.collect()
|
||||
# After the two data array objects getting destroyed, the current ref count
|
||||
# should euqal to the initial ref count.
|
||||
self.assertEqual(sys.getrefcount(image), initial_ref_count)
|
||||
|
||||
# For image frames that store non contiguous data, the output of numpy_view()
|
||||
# stores a copy of the pixel data of the image frame object. The life cycle of
|
||||
# the data array doesn't tie to the image frame object.
|
||||
def test_image_numpy_view_with_non_contiguous_data(self):
|
||||
w, h = 641, 481
|
||||
mat = np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8)
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mat)
|
||||
self.assertFalse(image.is_contiguous())
|
||||
initial_ref_count = sys.getrefcount(image)
|
||||
self.assertTrue(np.array_equal(mat, image.numpy_view()))
|
||||
np_view = image.numpy_view()
|
||||
self.assertEqual(sys.getrefcount(image), initial_ref_count)
|
||||
del np_view
|
||||
gc.collect()
|
||||
self.assertEqual(sys.getrefcount(image), initial_ref_count)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,274 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""The public facing packet creator APIs."""
|
||||
|
||||
from typing import List, Union
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from google.protobuf import message
|
||||
from mediapipe.python._framework_bindings import _packet_creator
|
||||
from mediapipe.python._framework_bindings import image
|
||||
from mediapipe.python._framework_bindings import image_frame
|
||||
from mediapipe.python._framework_bindings import packet
|
||||
|
||||
|
||||
create_string = _packet_creator.create_string
|
||||
create_bool = _packet_creator.create_bool
|
||||
create_int = _packet_creator.create_int
|
||||
create_int8 = _packet_creator.create_int8
|
||||
create_int16 = _packet_creator.create_int16
|
||||
create_int32 = _packet_creator.create_int32
|
||||
create_int64 = _packet_creator.create_int64
|
||||
create_uint8 = _packet_creator.create_uint8
|
||||
create_uint16 = _packet_creator.create_uint16
|
||||
create_uint32 = _packet_creator.create_uint32
|
||||
create_uint64 = _packet_creator.create_uint64
|
||||
create_float = _packet_creator.create_float
|
||||
create_double = _packet_creator.create_double
|
||||
create_int_array = _packet_creator.create_int_array
|
||||
create_float_array = _packet_creator.create_float_array
|
||||
create_int_vector = _packet_creator.create_int_vector
|
||||
create_bool_vector = _packet_creator.create_bool_vector
|
||||
create_float_vector = _packet_creator.create_float_vector
|
||||
create_string_vector = _packet_creator.create_string_vector
|
||||
create_packet_vector = _packet_creator.create_packet_vector
|
||||
create_string_to_packet_map = _packet_creator.create_string_to_packet_map
|
||||
create_matrix = _packet_creator.create_matrix
|
||||
|
||||
|
||||
def create_image_frame(data: Union[image_frame.ImageFrame, np.ndarray],
|
||||
*,
|
||||
image_format: image_frame.ImageFormat = None,
|
||||
copy: bool = None) -> packet.Packet:
|
||||
"""Create a MediaPipe ImageFrame packet.
|
||||
|
||||
A MediaPipe ImageFrame packet can be created from an existing MediaPipe
|
||||
ImageFrame object and the data will be realigned and copied into a new
|
||||
ImageFrame object inside of the packet.
|
||||
|
||||
A MediaPipe ImageFrame packet can also be created from the raw pixel data
|
||||
represented as a numpy array with one of the uint8, uint16, and float data
|
||||
types. There are three data ownership modes depending on how the 'copy' arg
|
||||
is set.
|
||||
|
||||
i) Default mode
|
||||
If copy is not set, mutable data is always copied while the immutable data
|
||||
is by reference.
|
||||
|
||||
ii) Copy mode (safe)
|
||||
If copy is set to True, the data will be realigned and copied into an
|
||||
ImageFrame object inside of the packet regardless the immutablity of the
|
||||
original data.
|
||||
|
||||
iii) Reference mode (dangerous)
|
||||
If copy is set to False, the data will be forced to be shared. If the data is
|
||||
mutable (data.flags.writeable is True), a warning will be raised.
|
||||
|
||||
Args:
|
||||
data: A MediaPipe ImageFrame object or the raw pixel data that is
|
||||
represnted as a numpy ndarray.
|
||||
image_format: One of the image_frame.ImageFormat enum types.
|
||||
copy: Indicate if the packet should copy the data from the numpy nparray.
|
||||
|
||||
Returns:
|
||||
A MediaPipe ImageFrame Packet.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
i) When "data" is a numpy ndarray, "image_format" is not provided or
|
||||
the "data" array is not c_contiguous in the reference mode.
|
||||
ii) When "data" is an ImageFrame object, the "image_format" arg doesn't
|
||||
match the image format of the "data" ImageFrame object or "copy" is
|
||||
explicitly set to False.
|
||||
TypeError: If "image format" doesn't match "data" array's data type.
|
||||
|
||||
Examples:
|
||||
np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8)
|
||||
# Copy mode by default if the data array is writable.
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
|
||||
# Make the array unwriteable to trigger the reference mode.
|
||||
np_array.flags.writeable = False
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
|
||||
image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(image_frame)
|
||||
|
||||
"""
|
||||
if isinstance(data, image_frame.ImageFrame):
|
||||
if image_format is not None and data.image_format != image_format:
|
||||
raise ValueError(
|
||||
'The provided image_format doesn\'t match the one from the data arg.')
|
||||
if copy is not None and not copy:
|
||||
# Taking a reference will make the created packet be mutable since the
|
||||
# ImageFrame object can still be manipulated in Python, which voids packet
|
||||
# immutability.
|
||||
raise ValueError(
|
||||
'Creating ImageFrame packet by taking a reference of another ImageFrame object is not supported yet.'
|
||||
)
|
||||
# pylint:disable=protected-access
|
||||
return _packet_creator._create_image_frame_from_image_frame(data)
|
||||
# pylint:enable=protected-access
|
||||
else:
|
||||
if image_format is None:
|
||||
raise ValueError('Please provide \'image_format\' with \'data\'.')
|
||||
# If copy arg is not set, copying the data if it's immutable. Otherwise,
|
||||
# take a reference of the immutable data to avoid data copy.
|
||||
if copy is None:
|
||||
copy = True if data.flags.writeable else False
|
||||
if not copy:
|
||||
# TODO: Investigate why the first 2 bytes of the data has data
|
||||
# corruption when "data" is not c_contiguous.
|
||||
if not data.flags.c_contiguous:
|
||||
raise ValueError(
|
||||
'Reference mode is unavailable if \'data\' is not c_contiguous.')
|
||||
if data.flags.writeable:
|
||||
warnings.warn(
|
||||
'\'data\' is still writeable. Taking a reference of the data to create ImageFrame packet is dangerous.',
|
||||
RuntimeWarning, 2)
|
||||
# pylint:disable=protected-access
|
||||
return _packet_creator._create_image_frame_from_pixel_data(
|
||||
image_format, data, copy)
|
||||
# pylint:enable=protected-access
|
||||
|
||||
|
||||
def create_image(data: Union[image.Image, np.ndarray],
|
||||
*,
|
||||
image_format: image_frame.ImageFormat = None,
|
||||
copy: bool = None) -> packet.Packet:
|
||||
"""Create a MediaPipe Image packet.
|
||||
|
||||
A MediaPipe Image packet can be created from an existing MediaPipe
|
||||
Image object and the data will be realigned and copied into a new
|
||||
Image object inside of the packet.
|
||||
|
||||
A MediaPipe Image packet can also be created from the raw pixel data
|
||||
represented as a numpy array with one of the uint8, uint16, and float data
|
||||
types. There are three data ownership modes depending on how the 'copy' arg
|
||||
is set.
|
||||
|
||||
i) Default mode
|
||||
If copy is not set, mutable data is always copied while the immutable data
|
||||
is by reference.
|
||||
|
||||
ii) Copy mode (safe)
|
||||
If copy is set to True, the data will be realigned and copied into an
|
||||
Image object inside of the packet regardless the immutablity of the
|
||||
original data.
|
||||
|
||||
iii) Reference mode (dangerous)
|
||||
If copy is set to False, the data will be forced to be shared. If the data is
|
||||
mutable (data.flags.writeable is True), a warning will be raised.
|
||||
|
||||
Args:
|
||||
data: A MediaPipe Image object or the raw pixel data that is represnted as a
|
||||
numpy ndarray.
|
||||
image_format: One of the mp.ImageFormat enum types.
|
||||
copy: Indicate if the packet should copy the data from the numpy nparray.
|
||||
|
||||
Returns:
|
||||
A MediaPipe Image Packet.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
i) When "data" is a numpy ndarray, "image_format" is not provided or
|
||||
the "data" array is not c_contiguous in the reference mode.
|
||||
ii) When "data" is an Image object, the "image_format" arg doesn't
|
||||
match the image format of the "data" Image object or "copy" is
|
||||
explicitly set to False.
|
||||
TypeError: If "image format" doesn't match "data" array's data type.
|
||||
|
||||
Examples:
|
||||
np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8)
|
||||
# Copy mode by default if the data array is writable.
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
|
||||
# Make the array unwriteable to trigger the reference mode.
|
||||
np_array.flags.writeable = False
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
|
||||
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np_array)
|
||||
image_packet = mp.packet_creator.create_image(image)
|
||||
|
||||
"""
|
||||
if isinstance(data, image.Image):
|
||||
if image_format is not None and data.image_format != image_format:
|
||||
raise ValueError(
|
||||
'The provided image_format doesn\'t match the one from the data arg.')
|
||||
if copy is not None and not copy:
|
||||
# Taking a reference will make the created packet be mutable since the
|
||||
# Image object can still be manipulated in Python, which voids packet
|
||||
# immutability.
|
||||
raise ValueError(
|
||||
'Creating Image packet by taking a reference of another Image object is not supported yet.'
|
||||
)
|
||||
# pylint:disable=protected-access
|
||||
return _packet_creator._create_image_from_image(data)
|
||||
# pylint:enable=protected-access
|
||||
else:
|
||||
if image_format is None:
|
||||
raise ValueError('Please provide \'image_format\' with \'data\'.')
|
||||
# If copy arg is not set, copying the data if it's immutable. Otherwise,
|
||||
# take a reference of the immutable data to avoid data copy.
|
||||
if copy is None:
|
||||
copy = True if data.flags.writeable else False
|
||||
if not copy:
|
||||
# TODO: Investigate why the first 2 bytes of the data has data
|
||||
# corruption when "data" is not c_contiguous.
|
||||
if not data.flags.c_contiguous:
|
||||
raise ValueError(
|
||||
'Reference mode is unavailable if \'data\' is not c_contiguous.')
|
||||
if data.flags.writeable:
|
||||
warnings.warn(
|
||||
'\'data\' is still writeable. Taking a reference of the data to create Image packet is dangerous.',
|
||||
RuntimeWarning, 2)
|
||||
# pylint:disable=protected-access
|
||||
return _packet_creator._create_image_from_pixel_data(
|
||||
image_format, data, copy)
|
||||
# pylint:enable=protected-access
|
||||
|
||||
|
||||
def create_proto(proto_message: message.Message) -> packet.Packet:
|
||||
"""Create a MediaPipe protobuf message packet.
|
||||
|
||||
Args:
|
||||
proto_message: A Python protobuf message.
|
||||
|
||||
Returns:
|
||||
A MediaPipe protobuf message Packet.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the protobuf message type is not registered in MediaPipe.
|
||||
|
||||
Examples:
|
||||
detection = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.5', detection)
|
||||
packet = mp.packet_creator.create_proto(detection)
|
||||
output_detection = mp.packet_getter.get_proto(packet)
|
||||
"""
|
||||
# pylint:disable=protected-access
|
||||
return _packet_creator._create_proto(proto_message.DESCRIPTOR.full_name,
|
||||
proto_message.SerializeToString())
|
||||
# pylint:enable=protected-access
|
||||
|
||||
|
||||
def create_proto_vector(message_list: List[message.Message]) -> packet.Packet:
|
||||
raise NotImplementedError('create_proto_vector is not implemented.')
|
@@ -0,0 +1,118 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""The public facing packet getter APIs."""
|
||||
|
||||
from typing import List, Type
|
||||
|
||||
from google.protobuf import message
|
||||
from google.protobuf import symbol_database
|
||||
from mediapipe.python._framework_bindings import _packet_getter
|
||||
from mediapipe.python._framework_bindings import packet as mp_packet
|
||||
|
||||
get_str = _packet_getter.get_str
|
||||
get_bytes = _packet_getter.get_bytes
|
||||
get_bool = _packet_getter.get_bool
|
||||
get_int = _packet_getter.get_int
|
||||
get_uint = _packet_getter.get_uint
|
||||
get_float = _packet_getter.get_float
|
||||
get_int_list = _packet_getter.get_int_list
|
||||
get_bool_list = _packet_getter.get_bool_list
|
||||
get_float_list = _packet_getter.get_float_list
|
||||
get_str_list = _packet_getter.get_str_list
|
||||
get_packet_list = _packet_getter.get_packet_list
|
||||
get_str_to_packet_dict = _packet_getter.get_str_to_packet_dict
|
||||
get_image = _packet_getter.get_image
|
||||
get_image_frame = _packet_getter.get_image_frame
|
||||
get_matrix = _packet_getter.get_matrix
|
||||
|
||||
|
||||
def get_proto(packet: mp_packet.Packet) -> Type[message.Message]:
|
||||
"""Get the content of a MediaPipe proto Packet as a proto message.
|
||||
|
||||
Args:
|
||||
packet: A MediaPipe proto Packet.
|
||||
|
||||
Returns:
|
||||
A proto message.
|
||||
|
||||
Raises:
|
||||
TypeError: If the message descriptor can't be found by type name.
|
||||
|
||||
Examples:
|
||||
detection = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.5', detection)
|
||||
proto_packet = mp.packet_creator.create_proto(detection)
|
||||
output_proto = mp.packet_getter.get_proto(proto_packet)
|
||||
"""
|
||||
# pylint:disable=protected-access
|
||||
proto_type_name = _packet_getter._get_proto_type_name(packet)
|
||||
# pylint:enable=protected-access
|
||||
try:
|
||||
descriptor = symbol_database.Default().pool.FindMessageTypeByName(
|
||||
proto_type_name)
|
||||
except KeyError:
|
||||
raise TypeError('Can not find message descriptor by type name: %s' %
|
||||
proto_type_name)
|
||||
|
||||
message_class = symbol_database.Default().GetPrototype(descriptor)
|
||||
# pylint:disable=protected-access
|
||||
serialized_proto = _packet_getter._get_serialized_proto(packet)
|
||||
# pylint:enable=protected-access
|
||||
proto_message = message_class()
|
||||
proto_message.ParseFromString(serialized_proto)
|
||||
return proto_message
|
||||
|
||||
|
||||
def get_proto_list(packet: mp_packet.Packet) -> List[message.Message]:
|
||||
"""Get the content of a MediaPipe proto vector Packet as a proto message list.
|
||||
|
||||
Args:
|
||||
packet: A MediaPipe proto vector Packet.
|
||||
|
||||
Returns:
|
||||
A proto message list.
|
||||
|
||||
Raises:
|
||||
TypeError: If the message descriptor can't be found by type name.
|
||||
|
||||
Examples:
|
||||
proto_list = mp.packet_getter.get_proto_list(protos_packet)
|
||||
"""
|
||||
# pylint:disable=protected-access
|
||||
vector_size = _packet_getter._get_proto_vector_size(packet)
|
||||
# pylint:enable=protected-access
|
||||
# Return empty list if the proto vector is empty.
|
||||
if vector_size == 0:
|
||||
return []
|
||||
|
||||
# pylint:disable=protected-access
|
||||
proto_type_name = _packet_getter._get_proto_vector_element_type_name(packet)
|
||||
# pylint:enable=protected-access
|
||||
try:
|
||||
descriptor = symbol_database.Default().pool.FindMessageTypeByName(
|
||||
proto_type_name)
|
||||
except KeyError:
|
||||
raise TypeError('Can not find message descriptor by type name: %s' %
|
||||
proto_type_name)
|
||||
message_class = symbol_database.Default().GetPrototype(descriptor)
|
||||
# pylint:disable=protected-access
|
||||
serialized_protos = _packet_getter._get_serialized_proto_list(packet)
|
||||
# pylint:enable=protected-access
|
||||
proto_message_list = []
|
||||
for serialized_proto in serialized_protos:
|
||||
proto_message = message_class()
|
||||
proto_message.ParseFromString(serialized_proto)
|
||||
proto_message_list.append(proto_message)
|
||||
return proto_message_list
|
@@ -0,0 +1,506 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python._framework_bindings.packet."""
|
||||
|
||||
import gc
|
||||
import random
|
||||
import sys
|
||||
from absl.testing import absltest
|
||||
import mediapipe as mp
|
||||
import numpy as np
|
||||
from google.protobuf import text_format
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
|
||||
|
||||
class PacketTest(absltest.TestCase):
|
||||
|
||||
def test_empty_packet(self):
|
||||
p = mp.Packet()
|
||||
self.assertTrue(p.is_empty())
|
||||
|
||||
def test_boolean_packet(self):
|
||||
p = mp.packet_creator.create_bool(True)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_bool(p), True)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
|
||||
def test_int_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_int(2**32)
|
||||
p = mp.packet_creator.create_int(42)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p), 42)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_int(np.intc(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_int8_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_int8(2**7)
|
||||
p = mp.packet_creator.create_int8(2**7 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p), 2**7 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_int8(np.int8(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_int16_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_int16(2**15)
|
||||
p = mp.packet_creator.create_int16(2**15 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p), 2**15 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_int16(np.int16(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_int32_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_int32(2**31)
|
||||
|
||||
p = mp.packet_creator.create_int32(2**31 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p), 2**31 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_int32(np.int32(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_int64_packet(self):
|
||||
p = mp.packet_creator.create_int64(2**63 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p), 2**63 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_int64(np.int64(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_int(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_uint8_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_uint8(2**8)
|
||||
p = mp.packet_creator.create_uint8(2**8 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p), 2**8 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_uint8(np.uint8(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_uint16_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_uint16(2**16)
|
||||
p = mp.packet_creator.create_uint16(2**16 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p), 2**16 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_uint16(np.uint16(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_uint32_packet(self):
|
||||
with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'):
|
||||
p = mp.packet_creator.create_uint32(2**32)
|
||||
p = mp.packet_creator.create_uint32(2**32 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p), 2**32 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_uint32(np.uint32(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_uint64_packet(self):
|
||||
p = mp.packet_creator.create_uint64(2**64 - 1)
|
||||
p.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p), 2**64 - 1)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_uint64(np.uint64(1))
|
||||
p2.timestamp = 0
|
||||
self.assertEqual(mp.packet_getter.get_uint(p2), 1)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_float_packet(self):
|
||||
p = mp.packet_creator.create_float(0.42)
|
||||
p.timestamp = 0
|
||||
self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_float(np.float(0.42))
|
||||
p2.timestamp = 0
|
||||
self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_double_packet(self):
|
||||
p = mp.packet_creator.create_double(0.42)
|
||||
p.timestamp = 0
|
||||
self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42)
|
||||
self.assertEqual(p.timestamp, 0)
|
||||
p2 = mp.packet_creator.create_double(np.double(0.42))
|
||||
p2.timestamp = 0
|
||||
self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42)
|
||||
self.assertEqual(p2.timestamp, 0)
|
||||
|
||||
def test_detection_proto_packet(self):
|
||||
detection = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.5', detection)
|
||||
p = mp.packet_creator.create_proto(detection).at(100)
|
||||
|
||||
def test_string_packet(self):
|
||||
p = mp.packet_creator.create_string('abc').at(100)
|
||||
self.assertEqual(mp.packet_getter.get_str(p), 'abc')
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
p.timestamp = 200
|
||||
self.assertEqual(p.timestamp, 200)
|
||||
|
||||
def test_bytes_packet(self):
|
||||
p = mp.packet_creator.create_string(b'xd0\xba\xd0').at(300)
|
||||
self.assertEqual(mp.packet_getter.get_bytes(p), b'xd0\xba\xd0')
|
||||
self.assertEqual(p.timestamp, 300)
|
||||
|
||||
def test_int_array_packet(self):
|
||||
p = mp.packet_creator.create_int_array([1, 2, 3]).at(100)
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_float_array_packet(self):
|
||||
p = mp.packet_creator.create_float_array([0.1, 0.2, 0.3]).at(100)
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_int_vector_packet(self):
|
||||
p = mp.packet_creator.create_int_vector([1, 2, 3]).at(100)
|
||||
self.assertEqual(mp.packet_getter.get_int_list(p), [1, 2, 3])
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_float_vector_packet(self):
|
||||
p = mp.packet_creator.create_float_vector([0.1, 0.2, 0.3]).at(100)
|
||||
output_list = mp.packet_getter.get_float_list(p)
|
||||
self.assertAlmostEqual(output_list[0], 0.1)
|
||||
self.assertAlmostEqual(output_list[1], 0.2)
|
||||
self.assertAlmostEqual(output_list[2], 0.3)
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_string_vector_packet(self):
|
||||
p = mp.packet_creator.create_string_vector(['a', 'b', 'c']).at(100)
|
||||
output_list = mp.packet_getter.get_str_list(p)
|
||||
self.assertEqual(output_list[0], 'a')
|
||||
self.assertEqual(output_list[1], 'b')
|
||||
self.assertEqual(output_list[2], 'c')
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_packet_vector_packet(self):
|
||||
p = mp.packet_creator.create_packet_vector([
|
||||
mp.packet_creator.create_float(0.42),
|
||||
mp.packet_creator.create_int(42),
|
||||
mp.packet_creator.create_string('42')
|
||||
]).at(100)
|
||||
output_list = mp.packet_getter.get_packet_list(p)
|
||||
self.assertAlmostEqual(mp.packet_getter.get_float(output_list[0]), 0.42)
|
||||
self.assertEqual(mp.packet_getter.get_int(output_list[1]), 42)
|
||||
self.assertEqual(mp.packet_getter.get_str(output_list[2]), '42')
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_string_to_packet_map_packet(self):
|
||||
p = mp.packet_creator.create_string_to_packet_map({
|
||||
'float': mp.packet_creator.create_float(0.42),
|
||||
'int': mp.packet_creator.create_int(42),
|
||||
'string': mp.packet_creator.create_string('42')
|
||||
}).at(100)
|
||||
output_list = mp.packet_getter.get_str_to_packet_dict(p)
|
||||
self.assertAlmostEqual(
|
||||
mp.packet_getter.get_float(output_list['float']), 0.42)
|
||||
self.assertEqual(mp.packet_getter.get_int(output_list['int']), 42)
|
||||
self.assertEqual(mp.packet_getter.get_str(output_list['string']), '42')
|
||||
self.assertEqual(p.timestamp, 100)
|
||||
|
||||
def test_uint8_image_packet(self):
|
||||
uint8_img = np.random.randint(
|
||||
2**8 - 1,
|
||||
size=(random.randrange(3, 100), random.randrange(3, 100), 3),
|
||||
dtype=np.uint8)
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=uint8_img))
|
||||
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
|
||||
self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint8_img))
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
mp.Image(image_format=mp.ImageFormat.SRGB, data=uint8_img))
|
||||
output_image = mp.packet_getter.get_image(image_packet)
|
||||
self.assertTrue(np.array_equal(output_image.numpy_view(), uint8_img))
|
||||
|
||||
def test_uint16_image_packet(self):
|
||||
uint16_img = np.random.randint(
|
||||
2**16 - 1,
|
||||
size=(random.randrange(3, 100), random.randrange(3, 100), 4),
|
||||
dtype=np.uint16)
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
mp.ImageFrame(image_format=mp.ImageFormat.SRGBA64, data=uint16_img))
|
||||
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
|
||||
self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint16_img))
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
mp.Image(image_format=mp.ImageFormat.SRGBA64, data=uint16_img))
|
||||
output_image = mp.packet_getter.get_image(image_packet)
|
||||
self.assertTrue(np.array_equal(output_image.numpy_view(), uint16_img))
|
||||
|
||||
def test_float_image_frame_packet(self):
|
||||
float_img = np.float32(
|
||||
np.random.random_sample(
|
||||
(random.randrange(3, 100), random.randrange(3, 100), 2)))
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
mp.ImageFrame(image_format=mp.ImageFormat.VEC32F2, data=float_img))
|
||||
output_image_frame = mp.packet_getter.get_image_frame(image_frame_packet)
|
||||
self.assertTrue(np.allclose(output_image_frame.numpy_view(), float_img))
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
mp.Image(image_format=mp.ImageFormat.VEC32F2, data=float_img))
|
||||
output_image = mp.packet_getter.get_image(image_packet)
|
||||
self.assertTrue(np.array_equal(output_image.numpy_view(), float_img))
|
||||
|
||||
def test_image_frame_packet_creation_copy_mode(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
# rgb_data is c_contiguous.
|
||||
self.assertTrue(rgb_data.flags.c_contiguous)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
rgb_data = rgb_data[:, :, ::-1]
|
||||
# rgb_data is now not c_contiguous. But, copy mode shouldn't be affected.
|
||||
self.assertFalse(rgb_data.flags.c_contiguous)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
output_frame = mp.packet_getter.get_image_frame(p)
|
||||
self.assertEqual(output_frame.height, h)
|
||||
self.assertEqual(output_frame.width, w)
|
||||
self.assertEqual(output_frame.channels, channels)
|
||||
self.assertTrue(np.array_equal(output_frame.numpy_view(), rgb_data))
|
||||
|
||||
del p
|
||||
del output_frame
|
||||
gc.collect()
|
||||
# Destroying the packet also doesn't affect the ref count becuase of the
|
||||
# copy mode.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
def test_image_frame_packet_creation_reference_mode(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
rgb_data.flags.writeable = False
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
image_frame_packet = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# Reference mode increase the ref count of the rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
|
||||
del image_frame_packet
|
||||
gc.collect()
|
||||
# Deleting image_frame_packet should decrese the ref count of rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
rgb_data_copy = np.copy(rgb_data)
|
||||
# rgb_data_copy is a copy of rgb_data and should not increase the ref count.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
text_config = """
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_side_packet: "in"
|
||||
output_side_packet: "out"
|
||||
}
|
||||
"""
|
||||
graph = mp.CalculatorGraph(graph_config=text_config)
|
||||
graph.start_run(
|
||||
input_side_packets={
|
||||
'in':
|
||||
mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
})
|
||||
# reference mode increase the ref count of the rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
|
||||
graph.wait_until_done()
|
||||
output_packet = graph.get_output_side_packet('out')
|
||||
del rgb_data
|
||||
del graph
|
||||
gc.collect()
|
||||
# The pixel data of the output image frame packet should still be valid
|
||||
# after the graph and the original rgb_data data are deleted.
|
||||
self.assertTrue(
|
||||
np.array_equal(
|
||||
mp.packet_getter.get_image_frame(output_packet).numpy_view(),
|
||||
rgb_data_copy))
|
||||
|
||||
def test_image_frame_packet_copy_creation_with_cropping(self):
|
||||
w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3
|
||||
channels, offset = 3, 10
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image_frame(
|
||||
image_format=mp.ImageFormat.SRGB,
|
||||
data=rgb_data[offset:-offset, offset:-offset, :])
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
output_frame = mp.packet_getter.get_image_frame(p)
|
||||
self.assertEqual(output_frame.height, h - 2 * offset)
|
||||
self.assertEqual(output_frame.width, w - 2 * offset)
|
||||
self.assertEqual(output_frame.channels, channels)
|
||||
self.assertTrue(
|
||||
np.array_equal(rgb_data[offset:-offset, offset:-offset, :],
|
||||
output_frame.numpy_view()))
|
||||
del p
|
||||
del output_frame
|
||||
gc.collect()
|
||||
# Destroying the packet also doesn't affect the ref count becuase of the
|
||||
# copy mode.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
def test_image_packet_creation_copy_mode(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
# rgb_data is c_contiguous.
|
||||
self.assertTrue(rgb_data.flags.c_contiguous)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
rgb_data = rgb_data[:, :, ::-1]
|
||||
# rgb_data is now not c_contiguous. But, copy mode shouldn't be affected.
|
||||
self.assertFalse(rgb_data.flags.c_contiguous)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
output_image = mp.packet_getter.get_image(p)
|
||||
self.assertEqual(output_image.height, h)
|
||||
self.assertEqual(output_image.width, w)
|
||||
self.assertEqual(output_image.channels, channels)
|
||||
self.assertTrue(np.array_equal(output_image.numpy_view(), rgb_data))
|
||||
|
||||
del p
|
||||
del output_image
|
||||
gc.collect()
|
||||
# Destroying the packet also doesn't affect the ref count becuase of the
|
||||
# copy mode.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
def test_image_packet_creation_reference_mode(self):
|
||||
w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
rgb_data.flags.writeable = False
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
image_packet = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
# Reference mode increase the ref count of the rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
|
||||
del image_packet
|
||||
gc.collect()
|
||||
# Deleting image_packet should decrese the ref count of rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
rgb_data_copy = np.copy(rgb_data)
|
||||
# rgb_data_copy is a copy of rgb_data and should not increase the ref count.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
text_config = """
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_side_packet: "in"
|
||||
output_side_packet: "out"
|
||||
}
|
||||
"""
|
||||
graph = mp.CalculatorGraph(graph_config=text_config)
|
||||
graph.start_run(
|
||||
input_side_packets={
|
||||
'in':
|
||||
mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB, data=rgb_data)
|
||||
})
|
||||
# reference mode increase the ref count of the rgb_data by 1.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1)
|
||||
graph.wait_until_done()
|
||||
output_packet = graph.get_output_side_packet('out')
|
||||
del rgb_data
|
||||
del graph
|
||||
gc.collect()
|
||||
# The pixel data of the output image frame packet should still be valid
|
||||
# after the graph and the original rgb_data data are deleted.
|
||||
self.assertTrue(
|
||||
np.array_equal(
|
||||
mp.packet_getter.get_image(output_packet).numpy_view(),
|
||||
rgb_data_copy))
|
||||
|
||||
def test_image_packet_copy_creation_with_cropping(self):
|
||||
w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3
|
||||
channels, offset = 3, 10
|
||||
rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8)
|
||||
initial_ref_count = sys.getrefcount(rgb_data)
|
||||
p = mp.packet_creator.create_image(
|
||||
image_format=mp.ImageFormat.SRGB,
|
||||
data=rgb_data[offset:-offset, offset:-offset, :])
|
||||
# copy mode doesn't increase the ref count of the data.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
output_image = mp.packet_getter.get_image(p)
|
||||
self.assertEqual(output_image.height, h - 2 * offset)
|
||||
self.assertEqual(output_image.width, w - 2 * offset)
|
||||
self.assertEqual(output_image.channels, channels)
|
||||
self.assertTrue(
|
||||
np.array_equal(rgb_data[offset:-offset, offset:-offset, :],
|
||||
output_image.numpy_view()))
|
||||
del p
|
||||
del output_image
|
||||
gc.collect()
|
||||
# Destroying the packet also doesn't affect the ref count becuase of the
|
||||
# copy mode.
|
||||
self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count)
|
||||
|
||||
def test_matrix_packet(self):
|
||||
np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]])
|
||||
initial_ref_count = sys.getrefcount(np_matrix)
|
||||
p = mp.packet_creator.create_matrix(np_matrix)
|
||||
# Copy mode should not increase the ref count of np_matrix.
|
||||
self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix))
|
||||
output_matrix = mp.packet_getter.get_matrix(p)
|
||||
del np_matrix
|
||||
gc.collect()
|
||||
self.assertTrue(
|
||||
np.allclose(output_matrix, np.array([[.1, .2, .3], [.4, .5, .6]])))
|
||||
|
||||
def test_matrix_packet_with_non_c_contiguous_data(self):
|
||||
np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1]
|
||||
# np_matrix is not c_contiguous.
|
||||
self.assertFalse(np_matrix.flags.c_contiguous)
|
||||
p = mp.packet_creator.create_matrix(np_matrix)
|
||||
initial_ref_count = sys.getrefcount(np_matrix)
|
||||
# Copy mode should not increase the ref count of np_matrix.
|
||||
self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix))
|
||||
output_matrix = mp.packet_getter.get_matrix(p)
|
||||
del np_matrix
|
||||
gc.collect()
|
||||
self.assertTrue(
|
||||
np.allclose(output_matrix,
|
||||
np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1]))
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,543 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe SolutionBase module.
|
||||
|
||||
MediaPipe SolutionBase is the common base class for the high-level MediaPipe
|
||||
Solution APIs such as BlazeFace, hand tracking, and BlazePose. The SolutionBase
|
||||
class contains the shared logic among the high-level Solution APIs including
|
||||
graph initialization, processing image/audio data, and graph shutdown. Thus,
|
||||
users can easily create new MediaPipe Solution APIs on top of the SolutionBase
|
||||
class.
|
||||
"""
|
||||
|
||||
import collections
|
||||
import enum
|
||||
import os
|
||||
from typing import Any, Iterable, List, Mapping, NamedTuple, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from google.protobuf import descriptor
|
||||
from google.protobuf import message
|
||||
# resources dependency
|
||||
# pylint: disable=unused-import
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.framework import calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
from mediapipe.calculators.image import image_transformation_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
from mediapipe.framework.formats import classification_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
from mediapipe.framework.formats import rect_pb2
|
||||
from mediapipe.modules.objectron.calculators import annotation_data_pb2
|
||||
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python._framework_bindings import calculator_graph
|
||||
from mediapipe.python._framework_bindings import image_frame
|
||||
from mediapipe.python._framework_bindings import packet
|
||||
from mediapipe.python._framework_bindings import resource_util
|
||||
from mediapipe.python._framework_bindings import validated_graph_config
|
||||
import mediapipe.python.packet_creator as packet_creator
|
||||
import mediapipe.python.packet_getter as packet_getter
|
||||
|
||||
RGB_CHANNELS = 3
|
||||
# TODO: Enable calculator options modification for more calculators.
|
||||
CALCULATOR_TO_OPTIONS = {
|
||||
'ConstantSidePacketCalculator':
|
||||
constant_side_packet_calculator_pb2.ConstantSidePacketCalculatorOptions,
|
||||
'ImageTransformationCalculator':
|
||||
image_transformation_calculator_pb2
|
||||
.ImageTransformationCalculatorOptions,
|
||||
'LandmarksSmoothingCalculator':
|
||||
landmarks_smoothing_calculator_pb2.LandmarksSmoothingCalculatorOptions,
|
||||
'LogicCalculator':
|
||||
logic_calculator_pb2.LogicCalculatorOptions,
|
||||
'ThresholdingCalculator':
|
||||
thresholding_calculator_pb2.ThresholdingCalculatorOptions,
|
||||
'TensorsToDetectionsCalculator':
|
||||
tensors_to_detections_calculator_pb2
|
||||
.TensorsToDetectionsCalculatorOptions,
|
||||
'Lift2DFrameAnnotationTo3DCalculator':
|
||||
lift_2d_frame_annotation_to_3d_calculator_pb2
|
||||
.Lift2DFrameAnnotationTo3DCalculatorOptions,
|
||||
}
|
||||
|
||||
|
||||
# TODO: Support more packet data types, such as "Any" type.
|
||||
@enum.unique
|
||||
class _PacketDataType(enum.Enum):
|
||||
"""The packet data types supported by the SolutionBase class."""
|
||||
STRING = 'string'
|
||||
BOOL = 'bool'
|
||||
BOOL_LIST = 'bool_list'
|
||||
INT = 'int'
|
||||
FLOAT = 'float'
|
||||
FLOAT_LIST = 'float_list'
|
||||
AUDIO = 'matrix'
|
||||
IMAGE = 'image'
|
||||
IMAGE_FRAME = 'image_frame'
|
||||
PROTO = 'proto'
|
||||
PROTO_LIST = 'proto_list'
|
||||
|
||||
@staticmethod
|
||||
def from_registered_name(registered_name: str) -> '_PacketDataType':
|
||||
return NAME_TO_TYPE[registered_name]
|
||||
|
||||
|
||||
NAME_TO_TYPE: Mapping[str, '_PacketDataType'] = {
|
||||
'string':
|
||||
_PacketDataType.STRING,
|
||||
'bool':
|
||||
_PacketDataType.BOOL,
|
||||
'::std::vector<bool>':
|
||||
_PacketDataType.BOOL_LIST,
|
||||
'int':
|
||||
_PacketDataType.INT,
|
||||
'float':
|
||||
_PacketDataType.FLOAT,
|
||||
'::std::vector<float>':
|
||||
_PacketDataType.FLOAT_LIST,
|
||||
'::mediapipe::Matrix':
|
||||
_PacketDataType.AUDIO,
|
||||
'::mediapipe::ImageFrame':
|
||||
_PacketDataType.IMAGE_FRAME,
|
||||
'::mediapipe::Classification':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::ClassificationList':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::ClassificationListCollection':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::Detection':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::DetectionList':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::Landmark':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::LandmarkList':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::LandmarkListCollection':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::NormalizedLandmark':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::FrameAnnotation':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::Trigger':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::Rect':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::NormalizedRect':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::NormalizedLandmarkList':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::NormalizedLandmarkListCollection':
|
||||
_PacketDataType.PROTO,
|
||||
'::mediapipe::Image':
|
||||
_PacketDataType.IMAGE,
|
||||
'::std::vector<::mediapipe::Classification>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::ClassificationList>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::Detection>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::DetectionList>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::Landmark>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::LandmarkList>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::NormalizedLandmark>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::NormalizedLandmarkList>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::Rect>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
'::std::vector<::mediapipe::NormalizedRect>':
|
||||
_PacketDataType.PROTO_LIST,
|
||||
}
|
||||
|
||||
|
||||
class SolutionBase:
|
||||
"""The common base class for the high-level MediaPipe Solution APIs.
|
||||
|
||||
The SolutionBase class contains the shared logic among the high-level solution
|
||||
APIs including graph initialization, processing image/audio data, and graph
|
||||
shutdown.
|
||||
|
||||
Example usage:
|
||||
with solution_base.SolutionBase(
|
||||
binary_graph_path='mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb',
|
||||
side_inputs={'num_hands': 2}) as hand_tracker:
|
||||
# Read an image and convert the BGR image to RGB.
|
||||
input_image = cv2.cvtColor(cv2.imread('/tmp/hand.png'), COLOR_BGR2RGB)
|
||||
results = hand_tracker.process(input_image)
|
||||
print(results.palm_detections)
|
||||
print(results.multi_hand_landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
binary_graph_path: Optional[str] = None,
|
||||
graph_config: Optional[calculator_pb2.CalculatorGraphConfig] = None,
|
||||
calculator_params: Optional[Mapping[str, Any]] = None,
|
||||
side_inputs: Optional[Mapping[str, Any]] = None,
|
||||
outputs: Optional[List[str]] = None):
|
||||
"""Initializes the SolutionBase object.
|
||||
|
||||
Args:
|
||||
binary_graph_path: The path to a binary mediapipe graph file (.binarypb).
|
||||
graph_config: A CalculatorGraphConfig proto message or its text proto
|
||||
format.
|
||||
calculator_params: A mapping from the
|
||||
{calculator_name}.{options_field_name} str to the field value.
|
||||
side_inputs: A mapping from the side packet name to the packet raw data.
|
||||
outputs: A list of the graph output stream names to observe. If the list
|
||||
is empty, all the output streams listed in the graph config will be
|
||||
automatically observed by default.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the binary graph file can't be found.
|
||||
RuntimeError: If the underlying calculator graph can't be successfully
|
||||
initialized or started.
|
||||
ValueError: If any of the following:
|
||||
a) If not exactly one of 'binary_graph_path' or 'graph_config' arguments
|
||||
is provided.
|
||||
b) If the graph validation process contains error.
|
||||
c) If the registered type name of the streams and side packets can't be
|
||||
found.
|
||||
d) If the calculator options of the calculator listed in
|
||||
calculator_params is not allowed to be modified.
|
||||
e) If the calculator options field is a repeated field but the field
|
||||
value to be set is not iterable.
|
||||
"""
|
||||
if bool(binary_graph_path) == bool(graph_config):
|
||||
raise ValueError(
|
||||
"Must provide exactly one of 'binary_graph_path' or 'graph_config'.")
|
||||
# MediaPipe package root path
|
||||
root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-3])
|
||||
resource_util.set_resource_dir(root_path)
|
||||
validated_graph = validated_graph_config.ValidatedGraphConfig()
|
||||
if binary_graph_path:
|
||||
validated_graph.initialize(
|
||||
binary_graph_path=os.path.join(root_path, binary_graph_path))
|
||||
else:
|
||||
validated_graph.initialize(graph_config=graph_config)
|
||||
|
||||
canonical_graph_config_proto = self._initialize_graph_interface(
|
||||
validated_graph, side_inputs, outputs)
|
||||
if calculator_params:
|
||||
self._modify_calculator_options(canonical_graph_config_proto,
|
||||
calculator_params)
|
||||
self._graph = calculator_graph.CalculatorGraph(
|
||||
graph_config=canonical_graph_config_proto)
|
||||
self._simulated_timestamp = 0
|
||||
self._graph_outputs = {}
|
||||
|
||||
def callback(stream_name: str, output_packet: packet.Packet) -> None:
|
||||
self._graph_outputs[stream_name] = output_packet
|
||||
|
||||
for stream_name in self._output_stream_type_info.keys():
|
||||
self._graph.observe_output_stream(stream_name, callback, True)
|
||||
|
||||
self._input_side_packets = {
|
||||
name: self._make_packet(self._side_input_type_info[name], data)
|
||||
for name, data in (side_inputs or {}).items()
|
||||
}
|
||||
self._graph.start_run(self._input_side_packets)
|
||||
|
||||
# TODO: Use "inspect.Parameter" to fetch the input argument names and
|
||||
# types from "_input_stream_type_info" and then auto generate the process
|
||||
# method signature by "inspect.Signature" in __init__.
|
||||
def process(
|
||||
self, input_data: Union[np.ndarray, Mapping[str, Union[np.ndarray,
|
||||
message.Message]]]
|
||||
) -> NamedTuple:
|
||||
"""Processes a set of RGB image data and output SolutionOutputs.
|
||||
|
||||
Args:
|
||||
input_data: Either a single numpy ndarray object representing the solo
|
||||
image input of a graph or a mapping from the stream name to the image or
|
||||
proto data that represents every input streams of a graph.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If input_data contains audio data or a list of proto
|
||||
objects.
|
||||
RuntimeError: If the underlying graph occurs any error.
|
||||
ValueError: If the input image data is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object that contains the output data of a graph run.
|
||||
The field names in the NamedTuple object are mapping to the graph output
|
||||
stream names.
|
||||
|
||||
Examples:
|
||||
solution = solution_base.SolutionBase(graph_config=hand_landmark_graph)
|
||||
results = solution.process(cv2.imread('/tmp/hand0.png')[:, :, ::-1])
|
||||
print(results.detection)
|
||||
results = solution.process(
|
||||
{'video_in' : cv2.imread('/tmp/hand1.png')[:, :, ::-1]})
|
||||
print(results.hand_landmarks)
|
||||
"""
|
||||
self._graph_outputs.clear()
|
||||
|
||||
if isinstance(input_data, np.ndarray):
|
||||
if len(self._input_stream_type_info.keys()) != 1:
|
||||
raise ValueError(
|
||||
"Can't process single image input since the graph has more than one input streams."
|
||||
)
|
||||
input_dict = {next(iter(self._input_stream_type_info)): input_data}
|
||||
else:
|
||||
input_dict = input_data
|
||||
|
||||
# Set the timestamp increment to 33333 us to simulate the 30 fps video
|
||||
# input.
|
||||
self._simulated_timestamp += 33333
|
||||
for stream_name, data in input_dict.items():
|
||||
input_stream_type = self._input_stream_type_info[stream_name]
|
||||
if (input_stream_type == _PacketDataType.PROTO_LIST or
|
||||
input_stream_type == _PacketDataType.AUDIO):
|
||||
# TODO: Support audio data.
|
||||
raise NotImplementedError(
|
||||
f'SolutionBase can only process non-audio and non-proto-list data. '
|
||||
f'{self._input_stream_type_info[stream_name].name} '
|
||||
f'type is not supported yet.')
|
||||
elif (input_stream_type == _PacketDataType.IMAGE_FRAME or
|
||||
input_stream_type == _PacketDataType.IMAGE):
|
||||
if data.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
self._graph.add_packet_to_input_stream(
|
||||
stream=stream_name,
|
||||
packet=self._make_packet(input_stream_type,
|
||||
data).at(self._simulated_timestamp))
|
||||
else:
|
||||
self._graph.add_packet_to_input_stream(
|
||||
stream=stream_name,
|
||||
packet=self._make_packet(input_stream_type,
|
||||
data).at(self._simulated_timestamp))
|
||||
|
||||
self._graph.wait_until_idle()
|
||||
# Create a NamedTuple object where the field names are mapping to the graph
|
||||
# output stream names.
|
||||
solution_outputs = collections.namedtuple(
|
||||
'SolutionOutputs', self._output_stream_type_info.keys())
|
||||
for stream_name in self._output_stream_type_info.keys():
|
||||
if stream_name in self._graph_outputs:
|
||||
setattr(
|
||||
solution_outputs, stream_name,
|
||||
self._get_packet_content(self._output_stream_type_info[stream_name],
|
||||
self._graph_outputs[stream_name]))
|
||||
else:
|
||||
setattr(solution_outputs, stream_name, None)
|
||||
|
||||
return solution_outputs
|
||||
|
||||
def close(self) -> None:
|
||||
"""Closes all the input sources and the graph."""
|
||||
self._graph.close()
|
||||
self._graph = None
|
||||
self._input_stream_type_info = None
|
||||
self._output_stream_type_info = None
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Resets the graph for another run."""
|
||||
if self._graph:
|
||||
self._graph.close()
|
||||
self._graph.start_run(self._input_side_packets)
|
||||
|
||||
def _initialize_graph_interface(
|
||||
self,
|
||||
validated_graph: validated_graph_config.ValidatedGraphConfig,
|
||||
side_inputs: Optional[Mapping[str, Any]] = None,
|
||||
outputs: Optional[List[str]] = None):
|
||||
"""Gets graph interface type information and returns the canonical graph config proto."""
|
||||
|
||||
canonical_graph_config_proto = calculator_pb2.CalculatorGraphConfig()
|
||||
canonical_graph_config_proto.ParseFromString(validated_graph.binary_config)
|
||||
|
||||
# Gets name from a 'TAG:index:name' str.
|
||||
def get_name(tag_index_name):
|
||||
return tag_index_name.split(':')[-1]
|
||||
|
||||
# Gets the packet type information of the input streams and output streams
|
||||
# from the validated calculator graph. The mappings from the stream names to
|
||||
# the packet data types is for deciding which packet creator and getter
|
||||
# methods to call in the process() method.
|
||||
def get_stream_packet_type(packet_tag_index_name):
|
||||
return _PacketDataType.from_registered_name(
|
||||
validated_graph.registered_stream_type_name(
|
||||
get_name(packet_tag_index_name)))
|
||||
|
||||
self._input_stream_type_info = {
|
||||
get_name(tag_index_name): get_stream_packet_type(tag_index_name)
|
||||
for tag_index_name in canonical_graph_config_proto.input_stream
|
||||
}
|
||||
|
||||
if not outputs:
|
||||
output_streams = canonical_graph_config_proto.output_stream
|
||||
else:
|
||||
output_streams = outputs
|
||||
self._output_stream_type_info = {
|
||||
get_name(tag_index_name): get_stream_packet_type(tag_index_name)
|
||||
for tag_index_name in output_streams
|
||||
}
|
||||
|
||||
# Gets the packet type information of the input side packets from the
|
||||
# validated calculator graph. The mappings from the side packet names to the
|
||||
# packet data types is for making the input_side_packets dict for graph
|
||||
# start_run().
|
||||
def get_side_packet_type(packet_tag_index_name):
|
||||
return _PacketDataType.from_registered_name(
|
||||
validated_graph.registered_side_packet_type_name(
|
||||
get_name(packet_tag_index_name)))
|
||||
|
||||
self._side_input_type_info = {
|
||||
get_name(tag_index_name): get_side_packet_type(tag_index_name)
|
||||
for tag_index_name, _ in (side_inputs or {}).items()
|
||||
}
|
||||
return canonical_graph_config_proto
|
||||
|
||||
def _modify_calculator_options(
|
||||
self, calculator_graph_config: calculator_pb2.CalculatorGraphConfig,
|
||||
calculator_params: Mapping[str, Any]) -> None:
|
||||
"""Modifies the CalculatorOptions of the calculators listed in calculator_params."""
|
||||
|
||||
# Reorganizes the calculator options field data by calculator name and puts
|
||||
# all the field data of the same calculator in a list.
|
||||
def generate_nested_calculator_params(flat_map):
|
||||
nested_map = {}
|
||||
for compound_name, field_value in flat_map.items():
|
||||
calculator_and_field_name = compound_name.split('.')
|
||||
if len(calculator_and_field_name) != 2:
|
||||
raise ValueError(
|
||||
f'The key "{compound_name}" in the calculator_params is invalid.')
|
||||
calculator_name = calculator_and_field_name[0]
|
||||
field_name = calculator_and_field_name[1]
|
||||
if calculator_name in nested_map:
|
||||
nested_map[calculator_name].append((field_name, field_value))
|
||||
else:
|
||||
nested_map[calculator_name] = [(field_name, field_value)]
|
||||
return nested_map
|
||||
|
||||
def modify_options_fields(calculator_options, options_field_list):
|
||||
for field_name, field_value in options_field_list:
|
||||
if field_value is None:
|
||||
calculator_options.ClearField(field_name)
|
||||
else:
|
||||
field_label = calculator_options.DESCRIPTOR.fields_by_name[
|
||||
field_name].label
|
||||
if field_label is descriptor.FieldDescriptor.LABEL_REPEATED:
|
||||
if not isinstance(field_value, Iterable):
|
||||
raise ValueError(
|
||||
f'{field_name} is a repeated proto field but the value '
|
||||
f'to be set is {type(field_value)}, which is not iterable.')
|
||||
# TODO: Support resetting the entire repeated field
|
||||
# (array-option) and changing the individual values in the repeated
|
||||
# field (array-element-option).
|
||||
calculator_options.ClearField(field_name)
|
||||
for elem in field_value:
|
||||
getattr(calculator_options, field_name).append(elem)
|
||||
else:
|
||||
setattr(calculator_options, field_name, field_value)
|
||||
|
||||
nested_calculator_params = generate_nested_calculator_params(
|
||||
calculator_params)
|
||||
|
||||
num_modified = 0
|
||||
for node in calculator_graph_config.node:
|
||||
if node.name not in nested_calculator_params:
|
||||
continue
|
||||
options_type = CALCULATOR_TO_OPTIONS.get(node.calculator)
|
||||
if options_type is None:
|
||||
raise ValueError(
|
||||
f'Modifying the calculator options of {node.name} is not supported.'
|
||||
)
|
||||
options_field_list = nested_calculator_params[node.name]
|
||||
if node.HasField('options') and node.node_options:
|
||||
raise ValueError(
|
||||
f'Cannot modify the calculator options of {node.name} because it '
|
||||
f'has both options and node_options fields.')
|
||||
if node.node_options:
|
||||
# The "node_options" case for the proto3 syntax.
|
||||
node_options_modified = False
|
||||
for elem in node.node_options:
|
||||
type_name = elem.type_url.split('/')[-1]
|
||||
if type_name == options_type.DESCRIPTOR.full_name:
|
||||
calculator_options = options_type.FromString(elem.value)
|
||||
modify_options_fields(calculator_options, options_field_list)
|
||||
elem.value = calculator_options.SerializeToString()
|
||||
node_options_modified = True
|
||||
break
|
||||
# There is no existing node_options being modified. Add a new
|
||||
# node_options instead.
|
||||
if not node_options_modified:
|
||||
calculator_options = options_type()
|
||||
modify_options_fields(calculator_options, options_field_list)
|
||||
node.node_options.add().Pack(calculator_options)
|
||||
else:
|
||||
# The "options" case for the proto2 syntax as well as the fallback
|
||||
# when the calculator doesn't have either "options" or "node_options".
|
||||
modify_options_fields(node.options.Extensions[options_type.ext],
|
||||
options_field_list)
|
||||
|
||||
num_modified += 1
|
||||
# Exits the loop early when every elements in nested_calculator_params
|
||||
# have been visited.
|
||||
if num_modified == len(nested_calculator_params):
|
||||
break
|
||||
|
||||
def _make_packet(self, packet_data_type: _PacketDataType,
|
||||
data: Any) -> packet.Packet:
|
||||
if (packet_data_type == _PacketDataType.IMAGE_FRAME or
|
||||
packet_data_type == _PacketDataType.IMAGE):
|
||||
return getattr(packet_creator, 'create_' + packet_data_type.value)(
|
||||
data, image_format=image_frame.ImageFormat.SRGB)
|
||||
else:
|
||||
return getattr(packet_creator, 'create_' + packet_data_type.value)(data)
|
||||
|
||||
def _get_packet_content(self, packet_data_type: _PacketDataType,
|
||||
output_packet: packet.Packet) -> Any:
|
||||
"""Gets packet content from a packet by type.
|
||||
|
||||
Args:
|
||||
packet_data_type: The supported packet data type.
|
||||
output_packet: The packet to get content from.
|
||||
|
||||
Returns:
|
||||
Packet content by packet data type. None to indicate "no output".
|
||||
|
||||
"""
|
||||
|
||||
if output_packet.is_empty():
|
||||
return None
|
||||
if packet_data_type == _PacketDataType.STRING:
|
||||
return packet_getter.get_str(output_packet)
|
||||
elif (packet_data_type == _PacketDataType.IMAGE_FRAME or
|
||||
packet_data_type == _PacketDataType.IMAGE):
|
||||
return getattr(packet_getter, 'get_' +
|
||||
packet_data_type.value)(output_packet).numpy_view()
|
||||
else:
|
||||
return getattr(packet_getter, 'get_' + packet_data_type.value)(
|
||||
output_packet)
|
||||
|
||||
def __enter__(self):
|
||||
"""A "with" statement support."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Closes all the input sources and the graph."""
|
||||
self.close()
|
@@ -0,0 +1,367 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solution_base."""
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import numpy as np
|
||||
|
||||
from google.protobuf import text_format
|
||||
from mediapipe.framework import calculator_pb2
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.python import solution_base
|
||||
|
||||
CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG = """
|
||||
input_stream: 'image_in'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
name: 'ImageTransformation'
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
options: {
|
||||
[mediapipe.ImageTransformationCalculatorOptions.ext] {
|
||||
output_width: 10
|
||||
output_height: 10
|
||||
}
|
||||
}
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 10
|
||||
output_height: 10
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class SolutionBaseTest(parameterized.TestCase):
|
||||
|
||||
def test_invalid_initialization_arguments(self):
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'Must provide exactly one of \'binary_graph_path\' or \'graph_config\'.'
|
||||
):
|
||||
solution_base.SolutionBase()
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'Must provide exactly one of \'binary_graph_path\' or \'graph_config\'.'
|
||||
):
|
||||
solution_base.SolutionBase(
|
||||
graph_config=calculator_pb2.CalculatorGraphConfig(),
|
||||
binary_graph_path='/tmp/no_such.binarypb')
|
||||
|
||||
@parameterized.named_parameters(('no_graph_input_output_stream', """
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
""", RuntimeError, 'does not have a corresponding output stream.'),
|
||||
('calcualtor_io_mismatch', """
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
input_stream: 'in2'
|
||||
output_stream: 'out'
|
||||
}
|
||||
""", ValueError, 'must use matching tags and indexes.'),
|
||||
('unkown_registered_stream_type_name', """
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
node {
|
||||
calculator: 'PassThroughCalculator'
|
||||
input_stream: 'in'
|
||||
output_stream: 'out'
|
||||
}
|
||||
""", RuntimeError, 'Unable to find the type for stream \"in\".'))
|
||||
def test_invalid_config(self, text_config, error_type, error_message):
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with self.assertRaisesRegex(error_type, error_message):
|
||||
solution_base.SolutionBase(graph_config=config_proto)
|
||||
|
||||
def test_valid_input_data_type_proto(self):
|
||||
text_config = """
|
||||
input_stream: 'input_detections'
|
||||
output_stream: 'output_detections'
|
||||
node {
|
||||
calculator: 'DetectionUniqueIdCalculator'
|
||||
input_stream: 'DETECTION_LIST:input_detections'
|
||||
output_stream: 'DETECTION_LIST:output_detections'
|
||||
}
|
||||
"""
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with solution_base.SolutionBase(graph_config=config_proto) as solution:
|
||||
input_detections = detection_pb2.DetectionList()
|
||||
detection_1 = input_detections.detection.add()
|
||||
text_format.Parse('score: 0.5', detection_1)
|
||||
detection_2 = input_detections.detection.add()
|
||||
text_format.Parse('score: 0.8', detection_2)
|
||||
results = solution.process({'input_detections': input_detections})
|
||||
self.assertTrue(hasattr(results, 'output_detections'))
|
||||
self.assertLen(results.output_detections.detection, 2)
|
||||
expected_detection_1 = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.5, detection_id: 1', expected_detection_1)
|
||||
expected_detection_2 = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.8, detection_id: 2', expected_detection_2)
|
||||
self.assertEqual(results.output_detections.detection[0],
|
||||
expected_detection_1)
|
||||
self.assertEqual(results.output_detections.detection[1],
|
||||
expected_detection_2)
|
||||
|
||||
def test_invalid_input_data_type_proto_vector(self):
|
||||
text_config = """
|
||||
input_stream: 'input_detections'
|
||||
output_stream: 'output_detections'
|
||||
node {
|
||||
calculator: 'DetectionUniqueIdCalculator'
|
||||
input_stream: 'DETECTIONS:input_detections'
|
||||
output_stream: 'DETECTIONS:output_detections'
|
||||
}
|
||||
"""
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with solution_base.SolutionBase(graph_config=config_proto) as solution:
|
||||
detection = detection_pb2.Detection()
|
||||
text_format.Parse('score: 0.5', detection)
|
||||
with self.assertRaisesRegex(
|
||||
NotImplementedError,
|
||||
'SolutionBase can only process non-audio and non-proto-list data. '
|
||||
+ 'PROTO_LIST type is not supported.'
|
||||
):
|
||||
solution.process({'input_detections': detection})
|
||||
|
||||
def test_invalid_input_image_data(self):
|
||||
text_config = """
|
||||
input_stream: 'image_in'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:transformed_image_in'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}
|
||||
"""
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with solution_base.SolutionBase(graph_config=config_proto) as solution:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
solution.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
@parameterized.named_parameters(('graph_without_side_packets', """
|
||||
input_stream: 'image_in'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:transformed_image_in'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}
|
||||
""", None), ('graph_with_side_packets', """
|
||||
input_stream: 'image_in'
|
||||
input_side_packet: 'allow_signal'
|
||||
input_side_packet: 'rotation_degrees'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
calculator: 'GateCalculator'
|
||||
input_stream: 'transformed_image_in'
|
||||
input_side_packet: 'ALLOW:allow_signal'
|
||||
output_stream: 'image_out_to_transform'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_out_to_transform'
|
||||
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}""", {
|
||||
'allow_signal': True,
|
||||
'rotation_degrees': 0
|
||||
}))
|
||||
def test_solution_process(self, text_config, side_inputs):
|
||||
self._process_and_verify(
|
||||
config_proto=text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig()),
|
||||
side_inputs=side_inputs)
|
||||
|
||||
def test_invalid_calculator_options(self):
|
||||
text_config = """
|
||||
input_stream: 'image_in'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
name: 'SignalGate'
|
||||
calculator: 'GateCalculator'
|
||||
input_stream: 'transformed_image_in'
|
||||
input_side_packet: 'ALLOW:allow_signal'
|
||||
output_stream: 'image_out_to_transform'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_out_to_transform'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}
|
||||
"""
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'Modifying the calculator options of SignalGate is not supported.'):
|
||||
solution_base.SolutionBase(
|
||||
graph_config=config_proto,
|
||||
calculator_params={'SignalGate.invalid_field': 'I am invalid'})
|
||||
|
||||
def test_calculator_has_both_options_and_node_options(self):
|
||||
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
with self.assertRaisesRegex(ValueError,
|
||||
'has both options and node_options fields.'):
|
||||
solution_base.SolutionBase(
|
||||
graph_config=config_proto,
|
||||
calculator_params={
|
||||
'ImageTransformation.output_width': 0,
|
||||
'ImageTransformation.output_height': 0
|
||||
})
|
||||
|
||||
def test_modifying_calculator_proto2_options(self):
|
||||
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
# To test proto2 options only, remove the proto3 node_options field from the
|
||||
# graph config.
|
||||
self.assertEqual('ImageTransformation', config_proto.node[0].name)
|
||||
config_proto.node[0].ClearField('node_options')
|
||||
self._process_and_verify(
|
||||
config_proto=config_proto,
|
||||
calculator_params={
|
||||
'ImageTransformation.output_width': 0,
|
||||
'ImageTransformation.output_height': 0
|
||||
})
|
||||
|
||||
def test_modifying_calculator_proto3_node_options(self):
|
||||
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
# To test proto3 node options only, remove the proto2 options field from the
|
||||
# graph config.
|
||||
self.assertEqual('ImageTransformation', config_proto.node[0].name)
|
||||
config_proto.node[0].ClearField('options')
|
||||
self._process_and_verify(
|
||||
config_proto=config_proto,
|
||||
calculator_params={
|
||||
'ImageTransformation.output_width': 0,
|
||||
'ImageTransformation.output_height': 0
|
||||
})
|
||||
|
||||
def test_adding_calculator_options(self):
|
||||
config_proto = text_format.Parse(CALCULATOR_OPTIONS_TEST_GRAPH_CONFIG,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
# To test a calculator with no options field, remove both proto2 options and
|
||||
# proto3 node_options fields from the graph config.
|
||||
self.assertEqual('ImageTransformation', config_proto.node[0].name)
|
||||
config_proto.node[0].ClearField('options')
|
||||
config_proto.node[0].ClearField('node_options')
|
||||
self._process_and_verify(
|
||||
config_proto=config_proto,
|
||||
calculator_params={
|
||||
'ImageTransformation.output_width': 0,
|
||||
'ImageTransformation.output_height': 0
|
||||
})
|
||||
|
||||
@parameterized.named_parameters(('graph_without_side_packets', """
|
||||
input_stream: 'image_in'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:transformed_image_in'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}
|
||||
""", None), ('graph_with_side_packets', """
|
||||
input_stream: 'image_in'
|
||||
input_side_packet: 'allow_signal'
|
||||
input_side_packet: 'rotation_degrees'
|
||||
output_stream: 'image_out'
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_in'
|
||||
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
|
||||
output_stream: 'IMAGE:transformed_image_in'
|
||||
}
|
||||
node {
|
||||
calculator: 'GateCalculator'
|
||||
input_stream: 'transformed_image_in'
|
||||
input_side_packet: 'ALLOW:allow_signal'
|
||||
output_stream: 'image_out_to_transform'
|
||||
}
|
||||
node {
|
||||
calculator: 'ImageTransformationCalculator'
|
||||
input_stream: 'IMAGE:image_out_to_transform'
|
||||
input_side_packet: 'ROTATION_DEGREES:rotation_degrees'
|
||||
output_stream: 'IMAGE:image_out'
|
||||
}""", {
|
||||
'allow_signal': True,
|
||||
'rotation_degrees': 0
|
||||
}))
|
||||
def test_solution_reset(self, text_config, side_inputs):
|
||||
config_proto = text_format.Parse(text_config,
|
||||
calculator_pb2.CalculatorGraphConfig())
|
||||
input_image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with solution_base.SolutionBase(
|
||||
graph_config=config_proto, side_inputs=side_inputs) as solution:
|
||||
for _ in range(20):
|
||||
outputs = solution.process(input_image)
|
||||
self.assertTrue(np.array_equal(input_image, outputs.image_out))
|
||||
solution.reset()
|
||||
|
||||
def _process_and_verify(self,
|
||||
config_proto,
|
||||
side_inputs=None,
|
||||
calculator_params=None):
|
||||
input_image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with solution_base.SolutionBase(
|
||||
graph_config=config_proto,
|
||||
side_inputs=side_inputs,
|
||||
calculator_params=calculator_params) as solution:
|
||||
outputs = solution.process(input_image)
|
||||
outputs2 = solution.process({'image_in': input_image})
|
||||
self.assertTrue(np.array_equal(input_image, outputs.image_out))
|
||||
self.assertTrue(np.array_equal(input_image, outputs2.image_out))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,25 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Solutions Python API."""
|
||||
|
||||
import mediapipe.python.solutions.drawing_styles
|
||||
import mediapipe.python.solutions.drawing_utils
|
||||
import mediapipe.python.solutions.face_detection
|
||||
import mediapipe.python.solutions.face_mesh
|
||||
import mediapipe.python.solutions.hands
|
||||
import mediapipe.python.solutions.holistic
|
||||
import mediapipe.python.solutions.objectron
|
||||
import mediapipe.python.solutions.pose
|
||||
import mediapipe.python.solutions.selfie_segmentation
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,37 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Downloading utils."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
|
||||
_OSS_URL_PREFIX = 'https://github.com/google/mediapipe/raw/master/'
|
||||
|
||||
|
||||
def download_oss_model(model_path: str):
|
||||
"""Downloads the oss model from the MediaPipe GitHub repo if it doesn't exist in the package."""
|
||||
|
||||
mp_root_path = os.sep.join(os.path.abspath(__file__).split(os.sep)[:-4])
|
||||
model_abspath = os.path.join(mp_root_path, model_path)
|
||||
if os.path.exists(model_abspath):
|
||||
return
|
||||
model_url = _OSS_URL_PREFIX + model_path
|
||||
print('Downloading model to ' + model_abspath)
|
||||
with urllib.request.urlopen(model_url) as response, open(model_abspath,
|
||||
'wb') as out_file:
|
||||
if response.code != 200:
|
||||
raise ConnectionError('Cannot download ' + model_path +
|
||||
' from the MediaPipe Github repo.')
|
||||
shutil.copyfileobj(response, out_file)
|
@@ -0,0 +1,146 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless requi_RED by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe solution drawing styles."""
|
||||
|
||||
from typing import Mapping, Tuple
|
||||
|
||||
from mediapipe.python.solutions.drawing_utils import DrawingSpec
|
||||
from mediapipe.python.solutions.hands import HandLandmark
|
||||
|
||||
_RADIUS = 5
|
||||
_RED = (54, 67, 244)
|
||||
_GREEN = (118, 230, 0)
|
||||
_BLUE = (192, 101, 21)
|
||||
_YELLOW = (0, 204, 255)
|
||||
_GRAY = (174, 164, 144)
|
||||
_PURPLE = (128, 64, 128)
|
||||
_PEACH = (180, 229, 255)
|
||||
|
||||
# Hands
|
||||
_THICKNESS_WRIST_MCP = 3
|
||||
_THICKNESS_FINGER = 2
|
||||
_THICKNESS_DOT = -1
|
||||
|
||||
# Hand landmarks
|
||||
_PALM_LANMARKS = (HandLandmark.WRIST, HandLandmark.THUMB_CMC,
|
||||
HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP)
|
||||
_THUMP_LANDMARKS = (HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP,
|
||||
HandLandmark.THUMB_TIP)
|
||||
_INDEX_FINGER_LANDMARKS = (HandLandmark.INDEX_FINGER_PIP,
|
||||
HandLandmark.INDEX_FINGER_DIP,
|
||||
HandLandmark.INDEX_FINGER_TIP)
|
||||
_MIDDLE_FINGER_LANDMARKS = (HandLandmark.MIDDLE_FINGER_PIP,
|
||||
HandLandmark.MIDDLE_FINGER_DIP,
|
||||
HandLandmark.MIDDLE_FINGER_TIP)
|
||||
_RING_FINGER_LANDMARKS = (HandLandmark.RING_FINGER_PIP,
|
||||
HandLandmark.RING_FINGER_DIP,
|
||||
HandLandmark.RING_FINGER_TIP)
|
||||
_PINKY_FINGER_LANDMARKS = (HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP,
|
||||
HandLandmark.PINKY_TIP)
|
||||
_HAND_LANDMARK_STYLE = {
|
||||
_PALM_LANMARKS:
|
||||
DrawingSpec(
|
||||
color=_RED, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_THUMP_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_PEACH, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_INDEX_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_PURPLE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_MIDDLE_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_YELLOW, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_RING_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_GREEN, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
_PINKY_FINGER_LANDMARKS:
|
||||
DrawingSpec(
|
||||
color=_BLUE, thickness=_THICKNESS_DOT, circle_radius=_RADIUS),
|
||||
}
|
||||
|
||||
# Hand connections
|
||||
_PALM_CONNECTIONS = ((HandLandmark.WRIST, HandLandmark.THUMB_CMC),
|
||||
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_MCP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.INDEX_FINGER_MCP,
|
||||
HandLandmark.MIDDLE_FINGER_MCP), (HandLandmark.WRIST,
|
||||
HandLandmark.PINKY_MCP))
|
||||
_THUMB_CONNECTIONS = ((HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
|
||||
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
|
||||
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP))
|
||||
_INDEX_FINGER_CONNECTIONS = ((HandLandmark.INDEX_FINGER_MCP,
|
||||
HandLandmark.INDEX_FINGER_PIP),
|
||||
(HandLandmark.INDEX_FINGER_PIP,
|
||||
HandLandmark.INDEX_FINGER_DIP),
|
||||
(HandLandmark.INDEX_FINGER_DIP,
|
||||
HandLandmark.INDEX_FINGER_TIP))
|
||||
_MIDDLE_FINGER_CONNECTIONS = ((HandLandmark.MIDDLE_FINGER_MCP,
|
||||
HandLandmark.MIDDLE_FINGER_PIP),
|
||||
(HandLandmark.MIDDLE_FINGER_PIP,
|
||||
HandLandmark.MIDDLE_FINGER_DIP),
|
||||
(HandLandmark.MIDDLE_FINGER_DIP,
|
||||
HandLandmark.MIDDLE_FINGER_TIP))
|
||||
_RING_FINGER_CONNECTIONS = ((HandLandmark.RING_FINGER_MCP,
|
||||
HandLandmark.RING_FINGER_PIP),
|
||||
(HandLandmark.RING_FINGER_PIP,
|
||||
HandLandmark.RING_FINGER_DIP),
|
||||
(HandLandmark.RING_FINGER_DIP,
|
||||
HandLandmark.RING_FINGER_TIP))
|
||||
_PINKY_FINGER_CONNECTIONS = ((HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
|
||||
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
|
||||
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP))
|
||||
_HAND_CONNECTION_STYLE = {
|
||||
_PALM_CONNECTIONS:
|
||||
DrawingSpec(color=_GRAY, thickness=_THICKNESS_WRIST_MCP),
|
||||
_THUMB_CONNECTIONS:
|
||||
DrawingSpec(color=_PEACH, thickness=_THICKNESS_FINGER),
|
||||
_INDEX_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_PURPLE, thickness=_THICKNESS_FINGER),
|
||||
_MIDDLE_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_YELLOW, thickness=_THICKNESS_FINGER),
|
||||
_RING_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_GREEN, thickness=_THICKNESS_FINGER),
|
||||
_PINKY_FINGER_CONNECTIONS:
|
||||
DrawingSpec(color=_BLUE, thickness=_THICKNESS_FINGER)
|
||||
}
|
||||
|
||||
|
||||
def get_default_hand_landmark_style() -> Mapping[int, DrawingSpec]:
|
||||
"""Returns the default hand landmark drawing style.
|
||||
|
||||
Returns:
|
||||
A mapping from each hand landmark to the default drawing spec.
|
||||
"""
|
||||
hand_landmark_style = {}
|
||||
for k, v in _HAND_LANDMARK_STYLE.items():
|
||||
for landmark in k:
|
||||
hand_landmark_style[landmark] = v
|
||||
return hand_landmark_style
|
||||
|
||||
|
||||
def get_default_hand_connection_style(
|
||||
) -> Mapping[Tuple[int, int], DrawingSpec]:
|
||||
"""Returns the default hand connection drawing style.
|
||||
|
||||
Returns:
|
||||
A mapping from each hand connection to the default drawing spec.
|
||||
"""
|
||||
hand_connection_style = {}
|
||||
for k, v in _HAND_CONNECTION_STYLE.items():
|
||||
for connection in k:
|
||||
hand_connection_style[connection] = v
|
||||
return hand_connection_style
|
@@ -0,0 +1,307 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe solution drawing utils."""
|
||||
|
||||
import math
|
||||
from typing import List, Mapping, Optional, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import dataclasses
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import location_data_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
|
||||
PRESENCE_THRESHOLD = 0.5
|
||||
RGB_CHANNELS = 3
|
||||
BLACK_COLOR = (0, 0, 0)
|
||||
RED_COLOR = (0, 0, 255)
|
||||
GREEN_COLOR = (0, 128, 0)
|
||||
BLUE_COLOR = (255, 0, 0)
|
||||
VISIBILITY_THRESHOLD = 0.5
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DrawingSpec:
|
||||
# Color for drawing the annotation. Default to the green color.
|
||||
color: Tuple[int, int, int] = (0, 255, 0)
|
||||
# Thickness for drawing the annotation. Default to 2 pixels.
|
||||
thickness: int = 2
|
||||
# Circle radius. Default to 2 pixels.
|
||||
circle_radius: int = 2
|
||||
|
||||
|
||||
def _normalized_to_pixel_coordinates(
|
||||
normalized_x: float, normalized_y: float, image_width: int,
|
||||
image_height: int) -> Union[None, Tuple[int, int]]:
|
||||
"""Converts normalized value pair to pixel coordinates."""
|
||||
|
||||
# Checks if the float value is between 0 and 1.
|
||||
def is_valid_normalized_value(value: float) -> bool:
|
||||
return (value > 0 or math.isclose(0, value)) and (value < 1 or
|
||||
math.isclose(1, value))
|
||||
|
||||
if not (is_valid_normalized_value(normalized_x) and
|
||||
is_valid_normalized_value(normalized_y)):
|
||||
# TODO: Draw coordinates even if it's outside of the image bounds.
|
||||
return None
|
||||
x_px = min(math.floor(normalized_x * image_width), image_width - 1)
|
||||
y_px = min(math.floor(normalized_y * image_height), image_height - 1)
|
||||
return x_px, y_px
|
||||
|
||||
|
||||
def draw_detection(
|
||||
image: np.ndarray,
|
||||
detection: detection_pb2.Detection,
|
||||
keypoint_drawing_spec: DrawingSpec = DrawingSpec(color=RED_COLOR),
|
||||
bbox_drawing_spec: DrawingSpec = DrawingSpec()):
|
||||
"""Draws the detction bounding box and keypoints on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
detection: A detection proto message to be annotated on the image.
|
||||
keypoint_drawing_spec: A DrawingSpec object that specifies the keypoints'
|
||||
drawing settings such as color, line thickness, and circle radius.
|
||||
bbox_drawing_spec: A DrawingSpec object that specifies the bounding box's
|
||||
drawing settings such as color and line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
b) If the location data is not relative data.
|
||||
"""
|
||||
if not detection.location_data:
|
||||
return
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
|
||||
location = detection.location_data
|
||||
if location.format != location_data_pb2.LocationData.RELATIVE_BOUNDING_BOX:
|
||||
raise ValueError(
|
||||
'LocationData must be relative for this drawing funtion to work.')
|
||||
# Draws keypoints.
|
||||
for keypoint in location.relative_keypoints:
|
||||
keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
|
||||
image_cols, image_rows)
|
||||
cv2.circle(image, keypoint_px, keypoint_drawing_spec.circle_radius,
|
||||
keypoint_drawing_spec.color, keypoint_drawing_spec.thickness)
|
||||
# Draws bounding box if exists.
|
||||
if not location.HasField('relative_bounding_box'):
|
||||
return
|
||||
relative_bounding_box = location.relative_bounding_box
|
||||
rect_start_point = _normalized_to_pixel_coordinates(
|
||||
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
|
||||
image_rows)
|
||||
rect_end_point = _normalized_to_pixel_coordinates(
|
||||
relative_bounding_box.xmin + relative_bounding_box.width,
|
||||
relative_bounding_box.ymin + +relative_bounding_box.height, image_cols,
|
||||
image_rows)
|
||||
cv2.rectangle(image, rect_start_point, rect_end_point,
|
||||
bbox_drawing_spec.color, bbox_drawing_spec.thickness)
|
||||
|
||||
|
||||
def draw_landmarks(
|
||||
image: np.ndarray,
|
||||
landmark_list: landmark_pb2.NormalizedLandmarkList,
|
||||
connections: Optional[List[Tuple[int, int]]] = None,
|
||||
landmark_drawing_spec: Union[DrawingSpec,
|
||||
Mapping[int, DrawingSpec]] = DrawingSpec(
|
||||
color=RED_COLOR),
|
||||
connection_drawing_spec: Union[DrawingSpec,
|
||||
Mapping[Tuple[int, int],
|
||||
DrawingSpec]] = DrawingSpec()):
|
||||
"""Draws the landmarks and the connections on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
landmark_list: A normalized landmark list proto message to be annotated on
|
||||
the image.
|
||||
connections: A list of landmark index tuples that specifies how landmarks to
|
||||
be connected in the drawing.
|
||||
landmark_drawing_spec: Either a DrawingSpec object or a mapping from
|
||||
hand landmarks to the DrawingSpecs that specifies the landmarks' drawing
|
||||
settings such as color, line thickness, and circle radius.
|
||||
connection_drawing_spec: Either a DrawingSpec object or a mapping from
|
||||
hand connections to the DrawingSpecs that specifies the
|
||||
connections' drawing settings such as color and line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
b) If any connetions contain invalid landmark index.
|
||||
"""
|
||||
if not landmark_list:
|
||||
return
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
idx_to_coordinates = {}
|
||||
for idx, landmark in enumerate(landmark_list.landmark):
|
||||
if ((landmark.HasField('visibility') and
|
||||
landmark.visibility < VISIBILITY_THRESHOLD) or
|
||||
(landmark.HasField('presence') and
|
||||
landmark.presence < PRESENCE_THRESHOLD)):
|
||||
continue
|
||||
landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y,
|
||||
image_cols, image_rows)
|
||||
if landmark_px:
|
||||
idx_to_coordinates[idx] = landmark_px
|
||||
if connections:
|
||||
num_landmarks = len(landmark_list.landmark)
|
||||
# Draws the connections if the start and end landmarks are both visible.
|
||||
for connection in connections:
|
||||
start_idx = connection[0]
|
||||
end_idx = connection[1]
|
||||
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
|
||||
raise ValueError(f'Landmark index is out of range. Invalid connection '
|
||||
f'from landmark #{start_idx} to landmark #{end_idx}.')
|
||||
if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
|
||||
drawing_spec = connection_drawing_spec[connection] if isinstance(
|
||||
connection_drawing_spec, Mapping) else connection_drawing_spec
|
||||
cv2.line(image, idx_to_coordinates[start_idx],
|
||||
idx_to_coordinates[end_idx], drawing_spec.color,
|
||||
drawing_spec.thickness)
|
||||
# Draws landmark points after finishing the connection lines, which is
|
||||
# aesthetically better.
|
||||
for idx, landmark_px in idx_to_coordinates.items():
|
||||
drawing_spec = landmark_drawing_spec[idx] if isinstance(
|
||||
landmark_drawing_spec, Mapping) else landmark_drawing_spec
|
||||
cv2.circle(image, landmark_px, drawing_spec.circle_radius,
|
||||
drawing_spec.color, drawing_spec.thickness)
|
||||
|
||||
|
||||
def draw_axis(
|
||||
image: np.ndarray,
|
||||
rotation: np.ndarray,
|
||||
translation: np.ndarray,
|
||||
focal_length: Tuple[float, float] = (1.0, 1.0),
|
||||
principal_point: Tuple[float, float] = (0.0, 0.0),
|
||||
axis_length: float = 0.1,
|
||||
axis_drawing_spec: DrawingSpec = DrawingSpec()):
|
||||
"""Draws the 3D axis on the image.
|
||||
|
||||
Args:
|
||||
image: A three channel RGB image represented as numpy ndarray.
|
||||
rotation: Rotation matrix from object to camera coordinate frame.
|
||||
translation: Translation vector from object to camera coordinate frame.
|
||||
focal_length: camera focal length along x and y directions.
|
||||
principal_point: camera principal point in x and y.
|
||||
axis_length: length of the axis in the drawing.
|
||||
axis_drawing_spec: A DrawingSpec object that specifies the xyz axis
|
||||
drawing settings such as line thickness.
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the followings:
|
||||
a) If the input image is not three channel RGB.
|
||||
"""
|
||||
if image.shape[2] != RGB_CHANNELS:
|
||||
raise ValueError('Input image must contain three channel rgb data.')
|
||||
image_rows, image_cols, _ = image.shape
|
||||
# Create axis points in camera coordinate frame.
|
||||
axis_world = np.float32([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
||||
axis_cam = np.matmul(rotation, axis_length*axis_world.T).T + translation
|
||||
x = axis_cam[..., 0]
|
||||
y = axis_cam[..., 1]
|
||||
z = axis_cam[..., 2]
|
||||
# Project 3D points to NDC space.
|
||||
fx, fy = focal_length
|
||||
px, py = principal_point
|
||||
x_ndc = np.clip(-fx * x / (z + 1e-5) + px, -1., 1.)
|
||||
y_ndc = np.clip(-fy * y / (z + 1e-5) + py, -1., 1.)
|
||||
# Convert from NDC space to image space.
|
||||
x_im = np.int32((1 + x_ndc) * 0.5 * image_cols)
|
||||
y_im = np.int32((1 - y_ndc) * 0.5 * image_rows)
|
||||
# Draw xyz axis on the image.
|
||||
origin = (x_im[0], y_im[0])
|
||||
x_axis = (x_im[1], y_im[1])
|
||||
y_axis = (x_im[2], y_im[2])
|
||||
z_axis = (x_im[3], y_im[3])
|
||||
cv2.arrowedLine(image, origin, x_axis, RED_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
cv2.arrowedLine(image, origin, y_axis, GREEN_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
cv2.arrowedLine(image, origin, z_axis, BLUE_COLOR,
|
||||
axis_drawing_spec.thickness)
|
||||
|
||||
|
||||
def _normalize_color(color):
|
||||
return tuple(v / 255. for v in color)
|
||||
|
||||
|
||||
def plot_landmarks(landmark_list: landmark_pb2.NormalizedLandmarkList,
|
||||
connections: Optional[List[Tuple[int, int]]] = None,
|
||||
landmark_drawing_spec: DrawingSpec = DrawingSpec(
|
||||
color=RED_COLOR, thickness=5),
|
||||
connection_drawing_spec: DrawingSpec = DrawingSpec(
|
||||
color=BLACK_COLOR, thickness=5),
|
||||
elevation: int = 10,
|
||||
azimuth: int = 10):
|
||||
"""Plot the landmarks and the connections in matplotlib 3d.
|
||||
|
||||
Args:
|
||||
landmark_list: A normalized landmark list proto message to be plotted.
|
||||
connections: A list of landmark index tuples that specifies how landmarks to
|
||||
be connected.
|
||||
landmark_drawing_spec: A DrawingSpec object that specifies the landmarks'
|
||||
drawing settings such as color and line thickness.
|
||||
connection_drawing_spec: A DrawingSpec object that specifies the
|
||||
connections' drawing settings such as color and line thickness.
|
||||
elevation: The elevation from which to view the plot.
|
||||
azimuth: the azimuth angle to rotate the plot.
|
||||
Raises:
|
||||
ValueError: If any connetions contain invalid landmark index.
|
||||
"""
|
||||
if not landmark_list:
|
||||
return
|
||||
plt.figure(figsize=(10, 10))
|
||||
ax = plt.axes(projection='3d')
|
||||
ax.view_init(elev=elevation, azim=azimuth)
|
||||
plotted_landmarks = {}
|
||||
for idx, landmark in enumerate(landmark_list.landmark):
|
||||
if ((landmark.HasField('visibility') and
|
||||
landmark.visibility < VISIBILITY_THRESHOLD) or
|
||||
(landmark.HasField('presence') and
|
||||
landmark.presence < PRESENCE_THRESHOLD)):
|
||||
continue
|
||||
ax.scatter3D(
|
||||
xs=[-landmark.z],
|
||||
ys=[landmark.x],
|
||||
zs=[-landmark.y],
|
||||
color=_normalize_color(landmark_drawing_spec.color[::-1]),
|
||||
linewidth=landmark_drawing_spec.thickness)
|
||||
plotted_landmarks[idx] = (-landmark.z, landmark.x, -landmark.y)
|
||||
if connections:
|
||||
num_landmarks = len(landmark_list.landmark)
|
||||
# Draws the connections if the start and end landmarks are both visible.
|
||||
for connection in connections:
|
||||
start_idx = connection[0]
|
||||
end_idx = connection[1]
|
||||
if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
|
||||
raise ValueError(f'Landmark index is out of range. Invalid connection '
|
||||
f'from landmark #{start_idx} to landmark #{end_idx}.')
|
||||
if start_idx in plotted_landmarks and end_idx in plotted_landmarks:
|
||||
landmark_pair = [
|
||||
plotted_landmarks[start_idx], plotted_landmarks[end_idx]
|
||||
]
|
||||
ax.plot3D(
|
||||
xs=[landmark_pair[0][0], landmark_pair[1][0]],
|
||||
ys=[landmark_pair[0][1], landmark_pair[1][1]],
|
||||
zs=[landmark_pair[0][2], landmark_pair[1][2]],
|
||||
color=_normalize_color(connection_drawing_spec.color[::-1]),
|
||||
linewidth=connection_drawing_spec.thickness)
|
||||
plt.show()
|
@@ -0,0 +1,231 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.drawing_utils."""
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from google.protobuf import text_format
|
||||
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
from mediapipe.python.solutions import drawing_utils
|
||||
|
||||
DEFAULT_BBOX_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC = drawing_utils.DrawingSpec(color=(0, 0, 255))
|
||||
DEFAULT_AXIS_DRAWING_SPEC = drawing_utils.DrawingSpec()
|
||||
|
||||
|
||||
class DrawingUtilTest(parameterized.TestCase):
|
||||
|
||||
def test_invalid_input_image(self):
|
||||
image = np.arange(18, dtype=np.uint8).reshape(3, 3, 2)
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
drawing_utils.draw_landmarks(image, landmark_pb2.NormalizedLandmarkList())
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
drawing_utils.draw_detection(image, detection_pb2.Detection())
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
rotation = np.eye(3, dtype=np.float32)
|
||||
translation = np.array([0., 0., 1.])
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
|
||||
def test_invalid_connection(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.5 y: 0.5} landmark {x: 0.2 y: 0.2}',
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with self.assertRaisesRegex(ValueError, 'Landmark index is out of range.'):
|
||||
drawing_utils.draw_landmarks(image, landmark_list, [(0, 2)])
|
||||
|
||||
def test_unqualified_detection(self):
|
||||
detection = text_format.Parse('location_data {format: GLOBAL}',
|
||||
detection_pb2.Detection())
|
||||
image = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
|
||||
with self.assertRaisesRegex(ValueError, 'LocationData must be relative'):
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
|
||||
def test_draw_keypoints_only(self):
|
||||
detection = text_format.Parse(
|
||||
'location_data {'
|
||||
' format: RELATIVE_BOUNDING_BOX'
|
||||
' relative_keypoints {x: 0 y: 1}'
|
||||
' relative_keypoints {x: 1 y: 0}}', detection_pb2.Detection())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.circle(expected_result, (0, 99),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, (99, 0),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_bboxs_only(self):
|
||||
detection = text_format.Parse(
|
||||
'location_data {'
|
||||
' format: RELATIVE_BOUNDING_BOX'
|
||||
' relative_bounding_box {xmin: 0 ymin: 0 width: 1 height: 1}}',
|
||||
detection_pb2.Detection())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.rectangle(expected_result, (0, 0), (99, 99),
|
||||
DEFAULT_BBOX_DRAWING_SPEC.color,
|
||||
DEFAULT_BBOX_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_detection(image, detection)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('landmark_list_has_only_one_element', 'landmark {x: 0.1 y: 0.1}'),
|
||||
('second_landmark_is_invisible',
|
||||
'landmark {x: 0.1 y: 0.1} landmark {x: 0.5 y: 0.5 visibility: 0.0}'))
|
||||
def test_draw_single_landmark_point(self, landmark_list_text):
|
||||
landmark_list = text_format.Parse(landmark_list_text,
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
cv2.circle(expected_result, (10, 10),
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(image, landmark_list)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('landmarks_have_x_and_y_only',
|
||||
'landmark {x: 0.1 y: 0.5} landmark {x: 0.5 y: 0.1}'),
|
||||
('landmark_zero_visibility_and_presence',
|
||||
'landmark {x: 0.1 y: 0.5 presence: 0.5}'
|
||||
'landmark {x: 0.5 y: 0.1 visibility: 0.5}'))
|
||||
def test_draw_landmarks_and_connections(self, landmark_list_text):
|
||||
landmark_list = text_format.Parse(landmark_list_text,
|
||||
landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (10, 50)
|
||||
end_point = (50, 10)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.color,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, end_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image, landmark_list=landmark_list, connections=[(0, 1)])
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_axis(self):
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
origin = (50, 50)
|
||||
x_axis = (75, 50)
|
||||
y_axis = (50, 22)
|
||||
z_axis = (50, 77)
|
||||
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
r = np.sqrt(2.) / 2.
|
||||
rotation = np.array([[1., 0., 0.], [0., r, -r], [0., r, r]])
|
||||
translation = np.array([0, 0, -0.2])
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_draw_axis_zero_translation(self):
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
origin = (50, 50)
|
||||
x_axis = (0, 50)
|
||||
y_axis = (50, 100)
|
||||
z_axis = (50, 50)
|
||||
cv2.arrowedLine(expected_result, origin, x_axis, drawing_utils.RED_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, y_axis, drawing_utils.GREEN_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
cv2.arrowedLine(expected_result, origin, z_axis, drawing_utils.BLUE_COLOR,
|
||||
DEFAULT_AXIS_DRAWING_SPEC.thickness)
|
||||
rotation = np.eye(3, dtype=np.float32)
|
||||
translation = np.zeros((3,), dtype=np.float32)
|
||||
drawing_utils.draw_axis(image, rotation, translation)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_min_and_max_coordinate_values(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.0 y: 1.0}'
|
||||
'landmark {x: 1.0 y: 0.0}', landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (0, 99)
|
||||
end_point = (99, 0)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.color,
|
||||
DEFAULT_CONNECTION_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
cv2.circle(expected_result, end_point,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.circle_radius,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.color,
|
||||
DEFAULT_CIRCLE_DRAWING_SPEC.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image, landmark_list=landmark_list, connections=[(0, 1)])
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
def test_drawing_spec(self):
|
||||
landmark_list = text_format.Parse(
|
||||
'landmark {x: 0.1 y: 0.1}'
|
||||
'landmark {x: 0.8 y: 0.8}', landmark_pb2.NormalizedLandmarkList())
|
||||
image = np.zeros((100, 100, 3), np.uint8)
|
||||
landmark_drawing_spec = drawing_utils.DrawingSpec(
|
||||
color=(0, 0, 255), thickness=5)
|
||||
connection_drawing_spec = drawing_utils.DrawingSpec(
|
||||
color=(255, 0, 0), thickness=3)
|
||||
expected_result = np.copy(image)
|
||||
start_point = (10, 10)
|
||||
end_point = (80, 80)
|
||||
cv2.line(expected_result, start_point, end_point,
|
||||
connection_drawing_spec.color, connection_drawing_spec.thickness)
|
||||
cv2.circle(expected_result, start_point,
|
||||
landmark_drawing_spec.circle_radius, landmark_drawing_spec.color,
|
||||
landmark_drawing_spec.thickness)
|
||||
cv2.circle(expected_result, end_point, landmark_drawing_spec.circle_radius,
|
||||
landmark_drawing_spec.color, landmark_drawing_spec.thickness)
|
||||
drawing_utils.draw_landmarks(
|
||||
image=image,
|
||||
landmark_list=landmark_list,
|
||||
connections=[(0, 1)],
|
||||
landmark_drawing_spec=landmark_drawing_spec,
|
||||
connection_drawing_spec=connection_drawing_spec)
|
||||
np.testing.assert_array_equal(image, expected_result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,112 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Face Detection."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple, Union
|
||||
|
||||
import numpy as np
|
||||
from mediapipe.framework.formats import detection_pb2
|
||||
from mediapipe.framework.formats import location_data_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb'
|
||||
FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb'
|
||||
|
||||
|
||||
def get_key_point(
|
||||
detection: detection_pb2.Detection, key_point_enum: 'FaceKeyPoint'
|
||||
) -> Union[None, location_data_pb2.LocationData.RelativeKeypoint]:
|
||||
"""A convenience method to return a face key point by the FaceKeyPoint type.
|
||||
|
||||
Args:
|
||||
detection: A detection proto message that contains face key points.
|
||||
key_point_enum: A FaceKeyPoint type.
|
||||
|
||||
Returns:
|
||||
A RelativeKeypoint proto message.
|
||||
"""
|
||||
if not detection or not detection.location_data:
|
||||
return None
|
||||
return detection.location_data.relative_keypoints[key_point_enum]
|
||||
|
||||
|
||||
class FaceKeyPoint(enum.IntEnum):
|
||||
"""The enum type of the six face detection key points."""
|
||||
RIGHT_EYE = 0
|
||||
LEFT_EYE = 1
|
||||
NOSE_TIP = 2
|
||||
MOUTH_CENTER = 3
|
||||
RIGHT_EAR_TRAGION = 4
|
||||
LEFT_EAR_TRAGION = 5
|
||||
|
||||
|
||||
class FaceDetection(SolutionBase):
|
||||
"""MediaPipe Face Detection.
|
||||
|
||||
MediaPipe Face Detection processes an RGB image and returns a list of the
|
||||
detected face location data.
|
||||
|
||||
Please refer to
|
||||
https://solutions.mediapipe.dev/face_detection#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self, min_detection_confidence=0.5, model_selection=0):
|
||||
"""Initializes a MediaPipe Face Detection object.
|
||||
|
||||
Args:
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/face_detection#min_detection_confidence.
|
||||
model_selection: 0 or 1. 0 to select a short-range model that works
|
||||
best for faces within 2 meters from the camera, and 1 for a full-range
|
||||
model best for faces within 5 meters. See details in
|
||||
https://solutions.mediapipe.dev/face_detection#model_selection.
|
||||
"""
|
||||
|
||||
binary_graph_path = FULL_RANGE_GRAPH_FILE_PATH if model_selection == 1 else SHORT_RANGE_GRAPH_FILE_PATH
|
||||
subgraph_name = 'facedetectionfullrangecommon' if model_selection == 1 else 'facedetectionshortrangecommon'
|
||||
|
||||
super().__init__(
|
||||
binary_graph_path=binary_graph_path,
|
||||
calculator_params={
|
||||
subgraph_name + '__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
},
|
||||
outputs=['detections'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns a list of the detected face location data.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "detections" field that contains a list of the
|
||||
detected face location data.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,92 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.face_detection."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import face_detection as mp_faces
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
SHORT_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 182], [460, 186], [420, 241],
|
||||
[417, 284], [295, 199], [502, 198]]
|
||||
FULL_RANGE_EXPECTED_FACE_KEY_POINTS = [[363, 181], [455, 181], [413, 233],
|
||||
[411, 278], [306, 204], [499, 207]]
|
||||
DIFF_THRESHOLD = 5 # pixels
|
||||
|
||||
|
||||
class FaceDetectionTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
for detection in results.detections:
|
||||
mp_drawing.draw_detection(frame, detection)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_faces.FaceDetection() as faces:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
with mp_faces.FaceDetection(min_detection_confidence=0.5) as faces:
|
||||
results = faces.process(image)
|
||||
self.assertIsNone(results.detections)
|
||||
|
||||
@parameterized.named_parameters(('short_range_model', 0),
|
||||
('full_range_model', 1))
|
||||
def test_face(self, model_selection):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
rows, cols, _ = image.shape
|
||||
with mp_faces.FaceDetection(
|
||||
min_detection_confidence=0.5, model_selection=model_selection) as faces:
|
||||
for idx in range(5):
|
||||
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
location_data = results.detections[0].location_data
|
||||
x = [keypoint.x * cols for keypoint in location_data.relative_keypoints]
|
||||
y = [keypoint.y * rows for keypoint in location_data.relative_keypoints]
|
||||
face_keypoints = np.column_stack((x, y))
|
||||
if model_selection == 0:
|
||||
prediction_error = np.abs(
|
||||
np.asarray(face_keypoints) -
|
||||
np.asarray(SHORT_RANGE_EXPECTED_FACE_KEY_POINTS))
|
||||
else:
|
||||
prediction_error = np.abs(
|
||||
np.asarray(face_keypoints) -
|
||||
np.asarray(FULL_RANGE_EXPECTED_FACE_KEY_POINTS))
|
||||
|
||||
self.assertLen(results.detections, 1)
|
||||
self.assertLen(location_data.relative_keypoints, 6)
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,238 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe FaceMesh."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb'
|
||||
FACE_CONNECTIONS = frozenset([
|
||||
# Lips.
|
||||
(61, 146),
|
||||
(146, 91),
|
||||
(91, 181),
|
||||
(181, 84),
|
||||
(84, 17),
|
||||
(17, 314),
|
||||
(314, 405),
|
||||
(405, 321),
|
||||
(321, 375),
|
||||
(375, 291),
|
||||
(61, 185),
|
||||
(185, 40),
|
||||
(40, 39),
|
||||
(39, 37),
|
||||
(37, 0),
|
||||
(0, 267),
|
||||
(267, 269),
|
||||
(269, 270),
|
||||
(270, 409),
|
||||
(409, 291),
|
||||
(78, 95),
|
||||
(95, 88),
|
||||
(88, 178),
|
||||
(178, 87),
|
||||
(87, 14),
|
||||
(14, 317),
|
||||
(317, 402),
|
||||
(402, 318),
|
||||
(318, 324),
|
||||
(324, 308),
|
||||
(78, 191),
|
||||
(191, 80),
|
||||
(80, 81),
|
||||
(81, 82),
|
||||
(82, 13),
|
||||
(13, 312),
|
||||
(312, 311),
|
||||
(311, 310),
|
||||
(310, 415),
|
||||
(415, 308),
|
||||
# Left eye.
|
||||
(263, 249),
|
||||
(249, 390),
|
||||
(390, 373),
|
||||
(373, 374),
|
||||
(374, 380),
|
||||
(380, 381),
|
||||
(381, 382),
|
||||
(382, 362),
|
||||
(263, 466),
|
||||
(466, 388),
|
||||
(388, 387),
|
||||
(387, 386),
|
||||
(386, 385),
|
||||
(385, 384),
|
||||
(384, 398),
|
||||
(398, 362),
|
||||
# Left eyebrow.
|
||||
(276, 283),
|
||||
(283, 282),
|
||||
(282, 295),
|
||||
(295, 285),
|
||||
(300, 293),
|
||||
(293, 334),
|
||||
(334, 296),
|
||||
(296, 336),
|
||||
# Right eye.
|
||||
(33, 7),
|
||||
(7, 163),
|
||||
(163, 144),
|
||||
(144, 145),
|
||||
(145, 153),
|
||||
(153, 154),
|
||||
(154, 155),
|
||||
(155, 133),
|
||||
(33, 246),
|
||||
(246, 161),
|
||||
(161, 160),
|
||||
(160, 159),
|
||||
(159, 158),
|
||||
(158, 157),
|
||||
(157, 173),
|
||||
(173, 133),
|
||||
# Right eyebrow.
|
||||
(46, 53),
|
||||
(53, 52),
|
||||
(52, 65),
|
||||
(65, 55),
|
||||
(70, 63),
|
||||
(63, 105),
|
||||
(105, 66),
|
||||
(66, 107),
|
||||
# Face oval.
|
||||
(10, 338),
|
||||
(338, 297),
|
||||
(297, 332),
|
||||
(332, 284),
|
||||
(284, 251),
|
||||
(251, 389),
|
||||
(389, 356),
|
||||
(356, 454),
|
||||
(454, 323),
|
||||
(323, 361),
|
||||
(361, 288),
|
||||
(288, 397),
|
||||
(397, 365),
|
||||
(365, 379),
|
||||
(379, 378),
|
||||
(378, 400),
|
||||
(400, 377),
|
||||
(377, 152),
|
||||
(152, 148),
|
||||
(148, 176),
|
||||
(176, 149),
|
||||
(149, 150),
|
||||
(150, 136),
|
||||
(136, 172),
|
||||
(172, 58),
|
||||
(58, 132),
|
||||
(132, 93),
|
||||
(93, 234),
|
||||
(234, 127),
|
||||
(127, 162),
|
||||
(162, 21),
|
||||
(21, 54),
|
||||
(54, 103),
|
||||
(103, 67),
|
||||
(67, 109),
|
||||
(109, 10)
|
||||
])
|
||||
|
||||
|
||||
class FaceMesh(SolutionBase):
|
||||
"""MediaPipe FaceMesh.
|
||||
|
||||
MediaPipe FaceMesh processes an RGB image and returns the face landmarks on
|
||||
each detected face.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/face_mesh#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
max_num_faces=1,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe FaceMesh object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#static_image_mode.
|
||||
max_num_faces: Maximum number of faces to detect. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#max_num_faces.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for face
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
face landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/face_mesh#min_tracking_confidence.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'num_faces': max_num_faces,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'facedetectionshortrangecpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'facelandmarkcpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['multi_face_landmarks'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the face landmarks on each detected face.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "multi_face_landmarks" field that contains the
|
||||
face landmarks on each detected face.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,125 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.face_mesh."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import face_mesh as mp_faces
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 5 # pixels
|
||||
EYE_INDICES_TO_LANDMARKS = {
|
||||
33: [345, 178],
|
||||
7: [348, 179],
|
||||
163: [352, 178],
|
||||
144: [357, 179],
|
||||
145: [365, 179],
|
||||
153: [371, 179],
|
||||
154: [378, 178],
|
||||
155: [381, 177],
|
||||
133: [383, 177],
|
||||
246: [347, 175],
|
||||
161: [350, 174],
|
||||
160: [355, 172],
|
||||
159: [362, 170],
|
||||
158: [368, 171],
|
||||
157: [375, 172],
|
||||
173: [380, 175],
|
||||
263: [467, 176],
|
||||
249: [464, 177],
|
||||
390: [460, 177],
|
||||
373: [455, 178],
|
||||
374: [448, 179],
|
||||
380: [441, 179],
|
||||
381: [435, 178],
|
||||
382: [432, 177],
|
||||
362: [430, 177],
|
||||
466: [465, 175],
|
||||
388: [462, 173],
|
||||
387: [457, 171],
|
||||
386: [450, 170],
|
||||
385: [444, 171],
|
||||
384: [437, 172],
|
||||
398: [432, 175]
|
||||
}
|
||||
|
||||
|
||||
class FaceMeshTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
for face_landmarks in results.multi_face_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image=frame,
|
||||
landmark_list=face_landmarks,
|
||||
landmark_drawing_spec=drawing_spec)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_faces.FaceMesh() as faces:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
faces.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_faces.FaceMesh() as faces:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = faces.process(image)
|
||||
self.assertIsNone(results.multi_face_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_face(self, static_image_mode: bool, num_frames: int):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
rows, cols, _ = image.shape
|
||||
with mp_faces.FaceMesh(
|
||||
static_image_mode=static_image_mode,
|
||||
min_detection_confidence=0.5) as faces:
|
||||
for idx in range(num_frames):
|
||||
results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
multi_face_landmarks = []
|
||||
for landmarks in results.multi_face_landmarks:
|
||||
self.assertLen(landmarks.landmark, 468)
|
||||
x = [landmark.x * cols for landmark in landmarks.landmark]
|
||||
y = [landmark.y * rows for landmark in landmarks.landmark]
|
||||
face_landmarks = np.column_stack((x, y))
|
||||
multi_face_landmarks.append(face_landmarks)
|
||||
self.assertLen(multi_face_landmarks, 1)
|
||||
# Verify the eye landmarks are correct as sanity check.
|
||||
for eye_idx, gt_lds in EYE_INDICES_TO_LANDMARKS.items():
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_face_landmarks[0][eye_idx]) - np.asarray(gt_lds))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,164 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Hands."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
|
||||
class HandLandmark(enum.IntEnum):
|
||||
"""The 21 hand landmarks."""
|
||||
WRIST = 0
|
||||
THUMB_CMC = 1
|
||||
THUMB_MCP = 2
|
||||
THUMB_IP = 3
|
||||
THUMB_TIP = 4
|
||||
INDEX_FINGER_MCP = 5
|
||||
INDEX_FINGER_PIP = 6
|
||||
INDEX_FINGER_DIP = 7
|
||||
INDEX_FINGER_TIP = 8
|
||||
MIDDLE_FINGER_MCP = 9
|
||||
MIDDLE_FINGER_PIP = 10
|
||||
MIDDLE_FINGER_DIP = 11
|
||||
MIDDLE_FINGER_TIP = 12
|
||||
RING_FINGER_MCP = 13
|
||||
RING_FINGER_PIP = 14
|
||||
RING_FINGER_DIP = 15
|
||||
RING_FINGER_TIP = 16
|
||||
PINKY_MCP = 17
|
||||
PINKY_PIP = 18
|
||||
PINKY_DIP = 19
|
||||
PINKY_TIP = 20
|
||||
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
|
||||
HAND_CONNECTIONS = frozenset([
|
||||
(HandLandmark.WRIST, HandLandmark.THUMB_CMC),
|
||||
(HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
|
||||
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
|
||||
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP),
|
||||
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
|
||||
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.INDEX_FINGER_PIP),
|
||||
(HandLandmark.INDEX_FINGER_PIP, HandLandmark.INDEX_FINGER_DIP),
|
||||
(HandLandmark.INDEX_FINGER_DIP, HandLandmark.INDEX_FINGER_TIP),
|
||||
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.MIDDLE_FINGER_PIP),
|
||||
(HandLandmark.MIDDLE_FINGER_PIP, HandLandmark.MIDDLE_FINGER_DIP),
|
||||
(HandLandmark.MIDDLE_FINGER_DIP, HandLandmark.MIDDLE_FINGER_TIP),
|
||||
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.RING_FINGER_MCP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.RING_FINGER_PIP),
|
||||
(HandLandmark.RING_FINGER_PIP, HandLandmark.RING_FINGER_DIP),
|
||||
(HandLandmark.RING_FINGER_DIP, HandLandmark.RING_FINGER_TIP),
|
||||
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.WRIST, HandLandmark.PINKY_MCP),
|
||||
(HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
|
||||
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
|
||||
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP)
|
||||
])
|
||||
|
||||
|
||||
class Hands(SolutionBase):
|
||||
"""MediaPipe Hands.
|
||||
|
||||
MediaPipe Hands processes an RGB image and returns the hand landmarks and
|
||||
handedness (left v.s. right hand) of each detected hand.
|
||||
|
||||
Note that it determines handedness assuming the input image is mirrored,
|
||||
i.e., taken with a front-facing/selfie camera (
|
||||
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
|
||||
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
|
||||
to flip the image first for a correct handedness output.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/hands#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Hand object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/hands#static_image_mode.
|
||||
max_num_hands: Maximum number of hands to detect. See details in
|
||||
https://solutions.mediapipe.dev/hands#max_num_hands.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for hand
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/hands#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
hand landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/hands#min_tracking_confidence.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'num_hands': max_num_hands,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'handlandmarkcpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['multi_hand_landmarks', 'multi_handedness'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
|
||||
contains the hand landmarks on each detected hand and a "multi_handedness"
|
||||
field that contains the handedness (left v.s. right hand) of the detected
|
||||
hand.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,110 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.hands."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_styles
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import hands as mp_hands
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 15 # pixels
|
||||
EXPECTED_HAND_COORDINATES_PREDICTION = [[[144, 345], [211, 323], [257, 286],
|
||||
[289, 237], [322, 203], [219, 216],
|
||||
[238, 138], [249, 90], [253, 51],
|
||||
[177, 204], [184, 115], [187, 60],
|
||||
[185, 19], [138, 208], [131, 127],
|
||||
[124, 77], [117, 36], [106, 222],
|
||||
[92, 159], [79, 124], [68, 93]],
|
||||
[[577, 37], [504, 56], [459, 94],
|
||||
[429, 146], [397, 182], [496, 167],
|
||||
[479, 245], [469, 292], [464, 330],
|
||||
[540, 177], [534, 265], [533, 319],
|
||||
[536, 360], [581, 172], [587, 252],
|
||||
[593, 304], [599, 346], [615, 157],
|
||||
[628, 223], [638, 258], [648, 288]]]
|
||||
|
||||
|
||||
class HandsTest(parameterized.TestCase):
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
||||
drawing_styles.get_default_hand_landmark_style(),
|
||||
drawing_styles.get_default_hand_connection_style())
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_hands.Hands() as hands:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_hands.Hands() as hands:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = hands.process(image)
|
||||
self.assertIsNone(results.multi_hand_landmarks)
|
||||
self.assertIsNone(results.multi_handedness)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_multi_hands(self, static_image_mode, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_hands.Hands(
|
||||
static_image_mode=static_image_mode,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.5) as hands:
|
||||
for idx in range(num_frames):
|
||||
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
handedness = [
|
||||
handedness.classification[0].label
|
||||
for handedness in results.multi_handedness
|
||||
]
|
||||
multi_hand_coordinates = []
|
||||
rows, cols, _ = image.shape
|
||||
for landmarks in results.multi_hand_landmarks:
|
||||
self.assertLen(landmarks.landmark, 21)
|
||||
x = [landmark.x * cols for landmark in landmarks.landmark]
|
||||
y = [landmark.y * rows for landmark in landmarks.landmark]
|
||||
hand_coordinates = np.column_stack((x, y))
|
||||
multi_hand_coordinates.append(hand_coordinates)
|
||||
self.assertLen(handedness, 2)
|
||||
self.assertLen(multi_hand_coordinates, 2)
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_hand_coordinates) -
|
||||
np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,152 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Holistic."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmark_projection_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.python.solutions.face_mesh import FACE_CONNECTIONS
|
||||
from mediapipe.python.solutions.hands import HAND_CONNECTIONS
|
||||
from mediapipe.python.solutions.hands import HandLandmark
|
||||
from mediapipe.python.solutions.pose import POSE_CONNECTIONS
|
||||
from mediapipe.python.solutions.pose import PoseLandmark
|
||||
# pylint: enable=unused-import
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
|
||||
|
||||
|
||||
def _download_oss_pose_landmark_model(model_complexity):
|
||||
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
|
||||
|
||||
if model_complexity == 0:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
|
||||
elif model_complexity == 2:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
|
||||
|
||||
|
||||
class Holistic(SolutionBase):
|
||||
"""MediaPipe Holistic.
|
||||
|
||||
MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
|
||||
right hand landmarks, and face mesh landmarks on the most prominent person
|
||||
detected.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
|
||||
for usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
model_complexity=1,
|
||||
smooth_landmarks=True,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Holistic object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/holistic#static_image_mode.
|
||||
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
|
||||
details in https://solutions.mediapipe.dev/holistic#model_complexity.
|
||||
smooth_landmarks: Whether to filter landmarks across different input
|
||||
images to reduce jitter. See details in
|
||||
https://solutions.mediapipe.dev/holistic#smooth_landmarks.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
pose landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
|
||||
"""
|
||||
_download_oss_pose_landmark_model(model_complexity)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_complexity': model_complexity,
|
||||
'smooth_landmarks': smooth_landmarks and not static_image_mode,
|
||||
},
|
||||
calculator_params={
|
||||
'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=[
|
||||
'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
|
||||
'right_hand_landmarks', 'face_landmarks'
|
||||
])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple that has five fields describing the landmarks on the most
|
||||
prominate person detected:
|
||||
1) "pose_landmarks" field that contains the pose landmarks.
|
||||
2) "pose_world_landmarks" field that contains the pose landmarks in
|
||||
real-world 3D coordinates that are in meters with the origin at the
|
||||
center between hips.
|
||||
3) "left_hand_landmarks" field that contains the left-hand landmarks.
|
||||
4) "right_hand_landmarks" field that contains the right-hand landmarks.
|
||||
5) "face_landmarks" field that contains the face landmarks.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.pose_landmarks:
|
||||
for landmark in results.pose_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
if results.pose_world_landmarks:
|
||||
for landmark in results.pose_world_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
return results
|
@@ -0,0 +1,134 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.pose."""
|
||||
|
||||
import os
|
||||
import tempfile # pylint: disable=unused-import
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import holistic as mp_holistic
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
POSE_DIFF_THRESHOLD = 30 # pixels
|
||||
HAND_DIFF_THRESHOLD = 30 # pixels
|
||||
EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
|
||||
[801, 233], [773, 231], [766, 231],
|
||||
[759, 232], [802, 242], [751, 239],
|
||||
[791, 258], [766, 258], [830, 301],
|
||||
[708, 298], [910, 248], [635, 234],
|
||||
[954, 161], [593, 136], [961, 137],
|
||||
[583, 110], [952, 132], [592, 106],
|
||||
[950, 141], [596, 115], [793, 500],
|
||||
[724, 502], [874, 626], [640, 629],
|
||||
[965, 756], [542, 760], [962, 779],
|
||||
[533, 781], [1025, 797], [487, 803]])
|
||||
EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
|
||||
[945, 141], [947, 134], [945, 136],
|
||||
[939, 122], [935, 113], [931, 106],
|
||||
[951, 134], [946, 118], [942, 108],
|
||||
[938, 100], [957, 135], [954, 120],
|
||||
[951, 111], [948, 103], [964, 138],
|
||||
[964, 128], [965, 122], [965, 117]])
|
||||
EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
|
||||
[613, 103], [617, 96], [596, 100],
|
||||
[595, 84], [594, 74], [593, 68],
|
||||
[588, 100], [586, 84], [585, 73],
|
||||
[584, 65], [581, 103], [579, 89],
|
||||
[579, 79], [579, 72], [575, 109],
|
||||
[571, 99], [570, 93], [569, 87]])
|
||||
|
||||
|
||||
class PoseTest(parameterized.TestCase):
|
||||
|
||||
def _landmarks_list_to_array(self, landmark_list, image_shape):
|
||||
rows, cols, _ = image_shape
|
||||
return np.asarray([(lmk.x * cols, lmk.y * rows)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _assert_diff_less(self, array1, array2, threshold):
|
||||
npt.assert_array_less(np.abs(array1 - array2), threshold)
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
mp_drawing.draw_landmarks(
|
||||
image=frame,
|
||||
landmark_list=results.face_landmarks,
|
||||
landmark_drawing_spec=drawing_spec)
|
||||
mp_drawing.draw_landmarks(frame, results.left_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(frame, results.right_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
|
||||
mp_holistic.POSE_CONNECTIONS)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_holistic.Holistic() as holistic:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_holistic.Holistic() as holistic:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = holistic.process(image)
|
||||
self.assertIsNone(results.pose_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_lite', True, 0, 3),
|
||||
('static_full', True, 1, 3),
|
||||
('static_heavy', True, 2, 3),
|
||||
('video_lite', False, 0, 3),
|
||||
('video_full', False, 1, 3),
|
||||
('video_heavy', False, 2, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/holistic.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_holistic.Holistic(static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity) as holistic:
|
||||
for idx in range(num_frames):
|
||||
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.pose_landmarks, image.shape),
|
||||
EXPECTED_POSE_LANDMARKS,
|
||||
POSE_DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.left_hand_landmarks,
|
||||
image.shape),
|
||||
EXPECTED_LEFT_HAND_LANDMARKS,
|
||||
HAND_DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.right_hand_landmarks,
|
||||
image.shape),
|
||||
EXPECTED_RIGHT_HAND_LANDMARKS,
|
||||
HAND_DIFF_THRESHOLD)
|
||||
# TODO: Verify the correctness of the face landmarks.
|
||||
self.assertLen(results.face_landmarks.landmark, 468)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,292 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Objectron."""
|
||||
|
||||
import enum
|
||||
from typing import List, Tuple, NamedTuple, Optional
|
||||
|
||||
import attr
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import association_calculator_pb2
|
||||
from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
|
||||
from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmark_projection_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
from mediapipe.framework.formats import landmark_pb2
|
||||
from mediapipe.modules.objectron.calculators import annotation_data_pb2
|
||||
from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
|
||||
from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
|
||||
# pylint: enable=unused-import
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
|
||||
|
||||
class BoxLandmark(enum.IntEnum):
|
||||
"""The 9 3D box landmarks."""
|
||||
#
|
||||
# 3 + + + + + + + + 7
|
||||
# +\ +\ UP
|
||||
# + \ + \
|
||||
# + \ + \ |
|
||||
# + 4 + + + + + + + + 8 | y
|
||||
# + + + + |
|
||||
# + + + + |
|
||||
# + + (0) + + .------- x
|
||||
# + + + + \
|
||||
# 1 + + + + + + + + 5 + \
|
||||
# \ + \ + \ z
|
||||
# \ + \ + \
|
||||
# \+ \+
|
||||
# 2 + + + + + + + + 6
|
||||
CENTER = 0
|
||||
BACK_BOTTOM_LEFT = 1
|
||||
FRONT_BOTTOM_LEFT = 2
|
||||
BACK_TOP_LEFT = 3
|
||||
FRONT_TOP_LEFT = 4
|
||||
BACK_BOTTOM_RIGHT = 5
|
||||
FRONT_BOTTOM_RIGHT = 6
|
||||
BACK_TOP_RIGHT = 7
|
||||
FRONT_TOP_RIGHT = 8
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
|
||||
BOX_CONNECTIONS = frozenset([
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
|
||||
(BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
|
||||
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
|
||||
(BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
|
||||
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
|
||||
(BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
|
||||
(BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
|
||||
(BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
|
||||
(BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
(BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
|
||||
])
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class ObjectronModel(object):
|
||||
model_path: str
|
||||
label_name: str
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class ShoeModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_sneakers.tflite')
|
||||
label_name: str = 'Footwear'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class ChairModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_chair.tflite')
|
||||
label_name: str = 'Chair'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class CameraModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_camera.tflite')
|
||||
label_name: str = 'Camera'
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True)
|
||||
class CupModel(ObjectronModel):
|
||||
model_path: str = ('mediapipe/modules/objectron/'
|
||||
'object_detection_3d_cup.tflite')
|
||||
label_name: str = 'Coffee cup, Mug'
|
||||
|
||||
_MODEL_DICT = {
|
||||
'Shoe': ShoeModel(),
|
||||
'Chair': ChairModel(),
|
||||
'Cup': CupModel(),
|
||||
'Camera': CameraModel()
|
||||
}
|
||||
|
||||
|
||||
def _download_oss_objectron_models(objectron_model: str):
|
||||
"""Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
|
||||
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
|
||||
)
|
||||
download_utils.download_oss_model(objectron_model)
|
||||
|
||||
|
||||
def get_model_by_name(name: str) -> ObjectronModel:
|
||||
if name not in _MODEL_DICT:
|
||||
raise ValueError(f'{name} is not a valid model name for Objectron.')
|
||||
_download_oss_objectron_models(_MODEL_DICT[name].model_path)
|
||||
return _MODEL_DICT[name]
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class ObjectronOutputs(object):
|
||||
landmarks_2d: landmark_pb2.NormalizedLandmarkList
|
||||
landmarks_3d: landmark_pb2.LandmarkList
|
||||
rotation: np.ndarray
|
||||
translation: np.ndarray
|
||||
scale: np.ndarray
|
||||
|
||||
|
||||
class Objectron(SolutionBase):
|
||||
"""MediaPipe Objectron.
|
||||
|
||||
MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
|
||||
and 2D rectangular bounding box of each detected object.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode: bool = False,
|
||||
max_num_objects: int = 5,
|
||||
min_detection_confidence: float = 0.5,
|
||||
min_tracking_confidence: float = 0.99,
|
||||
model_name: str = 'Shoe',
|
||||
focal_length: Tuple[float, float] = (1.0, 1.0),
|
||||
principal_point: Tuple[float, float] = (0.0, 0.0),
|
||||
image_size: Optional[Tuple[int, int]] = None,
|
||||
):
|
||||
"""Initializes a MediaPipe Objectron class.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream.
|
||||
max_num_objects: Maximum number of objects to detect.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
|
||||
detection to be considered successful.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
box landmarks to be considered tracked successfully.
|
||||
model_name: Name of model to use for predicting box landmarks, currently
|
||||
support {'Shoe', 'Chair', 'Cup', 'Camera'}.
|
||||
focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
|
||||
space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
|
||||
should provide image_size = (image_width, image_height) to enable
|
||||
conversions inside the API.
|
||||
principal_point: Camera principal point (px, py), by default is defined in
|
||||
NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
|
||||
users should provide image_size = (image_width, image_height) to enable
|
||||
conversions inside the API.
|
||||
image_size (Optional): size (image_width, image_height) of the input image
|
||||
, ONLY needed when use focal_length and principal_point in pixel space.
|
||||
|
||||
Raises:
|
||||
ConnectionError: If the objectron open source model can't be downloaded
|
||||
from the MediaPipe Github repo.
|
||||
"""
|
||||
# Get Camera parameters.
|
||||
fx, fy = focal_length
|
||||
px, py = principal_point
|
||||
if image_size is not None:
|
||||
half_width = image_size[0] / 2.0
|
||||
half_height = image_size[1] / 2.0
|
||||
fx = fx / half_width
|
||||
fy = fy / half_height
|
||||
px = - (px - half_width) / half_width
|
||||
py = - (py - half_height) / half_height
|
||||
|
||||
# Create and init model.
|
||||
model = get_model_by_name(model_name)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'box_landmark_model_path': model.model_path,
|
||||
'allowed_labels': model.label_name,
|
||||
'max_num_objects': max_num_objects,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
('objectdetectionoidv4subgraph'
|
||||
'__TensorsToDetectionsCalculator.min_score_thresh'):
|
||||
min_detection_confidence,
|
||||
('boxlandmarksubgraph__ThresholdingCalculator'
|
||||
'.threshold'):
|
||||
min_tracking_confidence,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_focal_x'): fx,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_focal_y'): fy,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_principal_point_x'): px,
|
||||
('Lift2DFrameAnnotationTo3DCalculator'
|
||||
'.normalized_principal_point_y'): py,
|
||||
},
|
||||
outputs=['detected_objects'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "detected_objects" field that contains a list
|
||||
of detected 3D bounding boxes. Each detected box is represented as an
|
||||
"ObjectronOutputs" instance.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.detected_objects:
|
||||
results.detected_objects = self._convert_format(results.detected_objects)
|
||||
else:
|
||||
results.detected_objects = None
|
||||
return results
|
||||
|
||||
def _convert_format(
|
||||
self,
|
||||
inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
|
||||
new_outputs = list()
|
||||
for annotation in inputs.annotations:
|
||||
# Get 3d object pose.
|
||||
rotation = np.reshape(np.array(annotation.rotation), (3, 3))
|
||||
translation = np.array(annotation.translation)
|
||||
scale = np.array(annotation.scale)
|
||||
# Get 2d/3d landmakrs.
|
||||
landmarks_2d = landmark_pb2.NormalizedLandmarkList()
|
||||
landmarks_3d = landmark_pb2.LandmarkList()
|
||||
for keypoint in annotation.keypoints:
|
||||
point_2d = keypoint.point_2d
|
||||
landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
|
||||
point_3d = keypoint.point_3d
|
||||
landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
|
||||
|
||||
# Add to objectron outputs.
|
||||
new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
|
||||
rotation, translation, scale=scale))
|
||||
return new_outputs
|
@@ -0,0 +1,81 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python.solutions.objectron."""
|
||||
|
||||
import os
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
from mediapipe.python.solutions import objectron as mp_objectron
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 30 # pixels
|
||||
EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
|
||||
[383, 505], [80, 478], [408, 345],
|
||||
[130, 347], [384, 355], [72, 353]],
|
||||
[[241, 206], [411, 279], [131, 280],
|
||||
[392, 249], [78, 252], [412, 155],
|
||||
[140, 178], [396, 105], [89, 137]]]
|
||||
|
||||
|
||||
class ObjectronTest(parameterized.TestCase):
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_objectron.Objectron() as objectron:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_objectron.Objectron() as objectron:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = objectron.process(image)
|
||||
self.assertIsNone(results.detected_objects)
|
||||
|
||||
@parameterized.named_parameters(('static_image_mode', True, 1),
|
||||
('video_mode', False, 5))
|
||||
def test_multi_objects(self, static_image_mode, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
with mp_objectron.Objectron(
|
||||
static_image_mode=static_image_mode,
|
||||
max_num_objects=2,
|
||||
min_detection_confidence=0.5) as objectron:
|
||||
for _ in range(num_frames):
|
||||
results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
multi_box_coordinates = []
|
||||
for detected_object in results.detected_objects:
|
||||
landmarks = detected_object.landmarks_2d
|
||||
self.assertLen(landmarks.landmark, 9)
|
||||
x = [landmark.x for landmark in landmarks.landmark]
|
||||
y = [landmark.y for landmark in landmarks.landmark]
|
||||
box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
|
||||
multi_box_coordinates.append(box_coordinates)
|
||||
self.assertLen(multi_box_coordinates, 2)
|
||||
prediction_error = np.abs(
|
||||
np.asarray(multi_box_coordinates) -
|
||||
np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
|
||||
npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,216 @@
|
||||
# Copyright 2020-2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""MediaPipe Pose."""
|
||||
|
||||
import enum
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import gate_calculator_pb2
|
||||
from mediapipe.calculators.core import split_vector_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
||||
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
||||
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
||||
from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.calculators.util import logic_calculator_pb2
|
||||
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
||||
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
||||
from mediapipe.calculators.util import thresholding_calculator_pb2
|
||||
from mediapipe.calculators.util import visibility_smoothing_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
from mediapipe.python.solutions import download_utils
|
||||
|
||||
|
||||
class PoseLandmark(enum.IntEnum):
|
||||
"""The 33 pose landmarks."""
|
||||
NOSE = 0
|
||||
LEFT_EYE_INNER = 1
|
||||
LEFT_EYE = 2
|
||||
LEFT_EYE_OUTER = 3
|
||||
RIGHT_EYE_INNER = 4
|
||||
RIGHT_EYE = 5
|
||||
RIGHT_EYE_OUTER = 6
|
||||
LEFT_EAR = 7
|
||||
RIGHT_EAR = 8
|
||||
MOUTH_LEFT = 9
|
||||
MOUTH_RIGHT = 10
|
||||
LEFT_SHOULDER = 11
|
||||
RIGHT_SHOULDER = 12
|
||||
LEFT_ELBOW = 13
|
||||
RIGHT_ELBOW = 14
|
||||
LEFT_WRIST = 15
|
||||
RIGHT_WRIST = 16
|
||||
LEFT_PINKY = 17
|
||||
RIGHT_PINKY = 18
|
||||
LEFT_INDEX = 19
|
||||
RIGHT_INDEX = 20
|
||||
LEFT_THUMB = 21
|
||||
RIGHT_THUMB = 22
|
||||
LEFT_HIP = 23
|
||||
RIGHT_HIP = 24
|
||||
LEFT_KNEE = 25
|
||||
RIGHT_KNEE = 26
|
||||
LEFT_ANKLE = 27
|
||||
RIGHT_ANKLE = 28
|
||||
LEFT_HEEL = 29
|
||||
RIGHT_HEEL = 30
|
||||
LEFT_FOOT_INDEX = 31
|
||||
RIGHT_FOOT_INDEX = 32
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
|
||||
POSE_CONNECTIONS = frozenset([
|
||||
(PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
|
||||
(PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
|
||||
(PoseLandmark.RIGHT_EYE, PoseLandmark.RIGHT_EYE_OUTER),
|
||||
(PoseLandmark.RIGHT_EYE_OUTER, PoseLandmark.RIGHT_EAR),
|
||||
(PoseLandmark.NOSE, PoseLandmark.LEFT_EYE_INNER),
|
||||
(PoseLandmark.LEFT_EYE_INNER, PoseLandmark.LEFT_EYE),
|
||||
(PoseLandmark.LEFT_EYE, PoseLandmark.LEFT_EYE_OUTER),
|
||||
(PoseLandmark.LEFT_EYE_OUTER, PoseLandmark.LEFT_EAR),
|
||||
(PoseLandmark.MOUTH_RIGHT, PoseLandmark.MOUTH_LEFT),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.LEFT_SHOULDER),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_ELBOW),
|
||||
(PoseLandmark.RIGHT_ELBOW, PoseLandmark.RIGHT_WRIST),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_PINKY),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_INDEX),
|
||||
(PoseLandmark.RIGHT_WRIST, PoseLandmark.RIGHT_THUMB),
|
||||
(PoseLandmark.RIGHT_PINKY, PoseLandmark.RIGHT_INDEX),
|
||||
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_ELBOW),
|
||||
(PoseLandmark.LEFT_ELBOW, PoseLandmark.LEFT_WRIST),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_PINKY),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_INDEX),
|
||||
(PoseLandmark.LEFT_WRIST, PoseLandmark.LEFT_THUMB),
|
||||
(PoseLandmark.LEFT_PINKY, PoseLandmark.LEFT_INDEX),
|
||||
(PoseLandmark.RIGHT_SHOULDER, PoseLandmark.RIGHT_HIP),
|
||||
(PoseLandmark.LEFT_SHOULDER, PoseLandmark.LEFT_HIP),
|
||||
(PoseLandmark.RIGHT_HIP, PoseLandmark.LEFT_HIP),
|
||||
(PoseLandmark.RIGHT_HIP, PoseLandmark.RIGHT_KNEE),
|
||||
(PoseLandmark.LEFT_HIP, PoseLandmark.LEFT_KNEE),
|
||||
(PoseLandmark.RIGHT_KNEE, PoseLandmark.RIGHT_ANKLE),
|
||||
(PoseLandmark.LEFT_KNEE, PoseLandmark.LEFT_ANKLE),
|
||||
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_HEEL),
|
||||
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_HEEL),
|
||||
(PoseLandmark.RIGHT_HEEL, PoseLandmark.RIGHT_FOOT_INDEX),
|
||||
(PoseLandmark.LEFT_HEEL, PoseLandmark.LEFT_FOOT_INDEX),
|
||||
(PoseLandmark.RIGHT_ANKLE, PoseLandmark.RIGHT_FOOT_INDEX),
|
||||
(PoseLandmark.LEFT_ANKLE, PoseLandmark.LEFT_FOOT_INDEX),
|
||||
])
|
||||
|
||||
|
||||
def _download_oss_pose_landmark_model(model_complexity):
|
||||
"""Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
|
||||
|
||||
if model_complexity == 0:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
|
||||
elif model_complexity == 2:
|
||||
download_utils.download_oss_model(
|
||||
'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
|
||||
|
||||
|
||||
class Pose(SolutionBase):
|
||||
"""MediaPipe Pose.
|
||||
|
||||
MediaPipe Pose processes an RGB image and returns pose landmarks on the most
|
||||
prominent person detected.
|
||||
|
||||
Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
static_image_mode=False,
|
||||
model_complexity=1,
|
||||
smooth_landmarks=True,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Pose object.
|
||||
|
||||
Args:
|
||||
static_image_mode: Whether to treat the input images as a batch of static
|
||||
and possibly unrelated images, or a video stream. See details in
|
||||
https://solutions.mediapipe.dev/pose#static_image_mode.
|
||||
model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
|
||||
details in https://solutions.mediapipe.dev/pose#model_complexity.
|
||||
smooth_landmarks: Whether to filter landmarks across different input
|
||||
images to reduce jitter. See details in
|
||||
https://solutions.mediapipe.dev/pose#smooth_landmarks.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/pose#min_detection_confidence.
|
||||
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
||||
pose landmarks to be considered tracked successfully. See details in
|
||||
https://solutions.mediapipe.dev/pose#min_tracking_confidence.
|
||||
"""
|
||||
_download_oss_pose_landmark_model(model_complexity)
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_complexity': model_complexity,
|
||||
'smooth_landmarks': smooth_landmarks and not static_image_mode,
|
||||
},
|
||||
calculator_params={
|
||||
'ConstantSidePacketCalculator.packet': [
|
||||
constant_side_packet_calculator_pb2
|
||||
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
||||
bool_value=not static_image_mode)
|
||||
],
|
||||
'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
||||
min_detection_confidence,
|
||||
'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['pose_landmarks', 'pose_world_landmarks'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the pose landmarks on the most prominent person detected.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple that has two fields describing the landmarks on the most
|
||||
prominate person detected:
|
||||
1) "pose_landmarks" field that contains the pose landmarks.
|
||||
2) "pose_world_landmarks" field that contains the pose landmarks in
|
||||
real-world 3D coordinates that are in meters with the origin at the
|
||||
center between hips.
|
||||
"""
|
||||
|
||||
results = super().process(input_data={'image': image})
|
||||
if results.pose_landmarks:
|
||||
for landmark in results.pose_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
if results.pose_world_landmarks:
|
||||
for landmark in results.pose_world_landmarks.landmark:
|
||||
landmark.ClearField('presence')
|
||||
return results
|
@@ -0,0 +1,197 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.pose."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from typing import NamedTuple
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import drawing_utils as mp_drawing
|
||||
from mediapipe.python.solutions import pose as mp_pose
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
DIFF_THRESHOLD = 15 # pixels
|
||||
EXPECTED_POSE_LANDMARKS = np.array([[460, 283], [467, 273], [471, 273],
|
||||
[474, 273], [465, 273], [465, 273],
|
||||
[466, 273], [491, 277], [480, 277],
|
||||
[470, 294], [465, 294], [545, 319],
|
||||
[453, 329], [622, 323], [375, 316],
|
||||
[696, 316], [299, 307], [719, 316],
|
||||
[278, 306], [721, 311], [274, 304],
|
||||
[713, 313], [283, 306], [520, 476],
|
||||
[467, 471], [612, 550], [358, 490],
|
||||
[701, 613], [349, 611], [709, 624],
|
||||
[363, 630], [730, 633], [303, 628]])
|
||||
WORLD_DIFF_THRESHOLD = 0.2 # meters
|
||||
EXPECTED_POSE_WORLD_LANDMARKS = np.array([
|
||||
[-0.11, -0.59, -0.15], [-0.09, -0.64, -0.16], [-0.09, -0.64, -0.16],
|
||||
[-0.09, -0.64, -0.16], [-0.11, -0.64, -0.14], [-0.11, -0.64, -0.14],
|
||||
[-0.11, -0.64, -0.14], [0.01, -0.65, -0.15], [-0.06, -0.64, -0.05],
|
||||
[-0.07, -0.57, -0.15], [-0.09, -0.57, -0.12], [0.18, -0.49, -0.09],
|
||||
[-0.14, -0.5, -0.03], [0.41, -0.48, -0.11], [-0.42, -0.5, -0.02],
|
||||
[0.64, -0.49, -0.17], [-0.63, -0.51, -0.13], [0.7, -0.5, -0.19],
|
||||
[-0.71, -0.53, -0.15], [0.72, -0.51, -0.23], [-0.69, -0.54, -0.19],
|
||||
[0.66, -0.49, -0.19], [-0.64, -0.52, -0.15], [0.09, 0., -0.04],
|
||||
[-0.09, -0., 0.03], [0.41, 0.23, -0.09], [-0.43, 0.1, -0.11],
|
||||
[0.69, 0.49, -0.04], [-0.48, 0.47, -0.02], [0.72, 0.52, -0.04],
|
||||
[-0.48, 0.51, -0.02], [0.8, 0.5, -0.14], [-0.59, 0.52, -0.11],
|
||||
])
|
||||
|
||||
|
||||
class PoseTest(parameterized.TestCase):
|
||||
|
||||
def _landmarks_list_to_array(self, landmark_list, image_shape):
|
||||
rows, cols, _ = image_shape
|
||||
return np.asarray([(lmk.x * cols, lmk.y * rows, lmk.z * cols)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _world_landmarks_list_to_array(self, landmark_list):
|
||||
return np.asarray([(lmk.x, lmk.y, lmk.z)
|
||||
for lmk in landmark_list.landmark])
|
||||
|
||||
def _assert_diff_less(self, array1, array2, threshold):
|
||||
npt.assert_array_less(np.abs(array1 - array2), threshold)
|
||||
|
||||
def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
|
||||
mp_drawing.draw_landmarks(frame, results.pose_landmarks,
|
||||
mp_pose.POSE_CONNECTIONS)
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
|
||||
'_frame_{}.png'.format(idx))
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_pose.Pose() as pose:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_pose.Pose() as pose:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = pose.process(image)
|
||||
self.assertIsNone(results.pose_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_lite', True, 0, 3),
|
||||
('static_full', True, 1, 3),
|
||||
('static_heavy', True, 2, 3),
|
||||
('video_lite', False, 0, 3),
|
||||
('video_full', False, 1, 3),
|
||||
('video_heavy', False, 2, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_pose.Pose(static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity) as pose:
|
||||
for idx in range(num_frames):
|
||||
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
# TODO: Add rendering of world 3D when supported.
|
||||
self._annotate(image.copy(), results, idx)
|
||||
self._assert_diff_less(
|
||||
self._landmarks_list_to_array(results.pose_landmarks,
|
||||
image.shape)[:, :2],
|
||||
EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
|
||||
self._assert_diff_less(
|
||||
self._world_landmarks_list_to_array(results.pose_world_landmarks),
|
||||
EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('full', 1, 'pose_squats.full.npz'))
|
||||
def test_on_video(self, model_complexity, expected_name):
|
||||
"""Tests pose models on a video."""
|
||||
# If set to `True` will dump actual predictions to .npz and JSON files.
|
||||
dump_predictions = False
|
||||
# Set threshold for comparing actual and expected predictions in pixels.
|
||||
diff_threshold = 15
|
||||
world_diff_threshold = 0.1
|
||||
|
||||
video_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/pose_squats.mp4')
|
||||
expected_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/{}'.format(expected_name))
|
||||
|
||||
# Predict pose landmarks for each frame.
|
||||
video_cap = cv2.VideoCapture(video_path)
|
||||
actual_per_frame = []
|
||||
actual_world_per_frame = []
|
||||
frame_idx = 0
|
||||
with mp_pose.Pose(static_image_mode=False,
|
||||
model_complexity=model_complexity) as pose:
|
||||
while True:
|
||||
# Get next frame of the video.
|
||||
success, input_frame = video_cap.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
# Run pose tracker.
|
||||
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
|
||||
result = pose.process(image=input_frame)
|
||||
pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
|
||||
input_frame.shape)
|
||||
pose_world_landmarks = self._world_landmarks_list_to_array(
|
||||
result.pose_world_landmarks)
|
||||
|
||||
actual_per_frame.append(pose_landmarks)
|
||||
actual_world_per_frame.append(pose_world_landmarks)
|
||||
|
||||
input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
|
||||
self._annotate(input_frame, result, frame_idx)
|
||||
frame_idx += 1
|
||||
actual = np.array(actual_per_frame)
|
||||
actual_world = np.array(actual_world_per_frame)
|
||||
|
||||
if dump_predictions:
|
||||
# Dump .npz
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
np.savez(tmp_file, predictions=actual, predictions_world=actual_world)
|
||||
print('Predictions saved as .npz to {}'.format(tmp_file.name))
|
||||
|
||||
# Dump JSON
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
with open(tmp_file.name, 'w') as fl:
|
||||
dump_data = {
|
||||
'predictions': np.around(actual, 3).tolist(),
|
||||
'predictions_world': np.around(actual_world, 3).tolist()
|
||||
}
|
||||
fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
|
||||
print('Predictions saved as JSON to {}'.format(tmp_file.name))
|
||||
|
||||
# Validate actual vs. expected landmarks.
|
||||
expected = np.load(expected_path)['predictions']
|
||||
assert actual.shape == expected.shape, (
|
||||
'Unexpected shape of predictions: {} instead of {}'.format(
|
||||
actual.shape, expected.shape))
|
||||
self._assert_diff_less(
|
||||
actual[..., :2], expected[..., :2], threshold=diff_threshold)
|
||||
|
||||
# Validate actual vs. expected world landmarks.
|
||||
expected_world = np.load(expected_path)['predictions_world']
|
||||
assert actual_world.shape == expected_world.shape, (
|
||||
'Unexpected shape of world predictions: {} instead of {}'.format(
|
||||
actual_world.shape, expected_world.shape))
|
||||
self._assert_diff_less(
|
||||
actual_world, expected_world, threshold=world_diff_threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,76 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""MediaPipe Selfie Segmentation."""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import numpy as np
|
||||
# The following imports are needed because python pb2 silently discards
|
||||
# unknown protobuf fields.
|
||||
# pylint: disable=unused-import
|
||||
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
||||
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
||||
from mediapipe.calculators.tensor import inference_calculator_pb2
|
||||
from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
|
||||
from mediapipe.calculators.util import local_file_contents_calculator_pb2
|
||||
from mediapipe.framework.tool import switch_container_pb2
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from mediapipe.python.solution_base import SolutionBase
|
||||
|
||||
BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
|
||||
|
||||
|
||||
class SelfieSegmentation(SolutionBase):
|
||||
"""MediaPipe Selfie Segmentation.
|
||||
|
||||
MediaPipe Selfie Segmentation processes an RGB image and returns a
|
||||
segmentation mask.
|
||||
|
||||
Please refer to
|
||||
https://solutions.mediapipe.dev/selfie_segmentation#python-solution-api for
|
||||
usage examples.
|
||||
"""
|
||||
|
||||
def __init__(self, model_selection=0):
|
||||
"""Initializes a MediaPipe Selfie Segmentation object.
|
||||
|
||||
Args:
|
||||
model_selection: 0 or 1. 0 to select a general-purpose model, and 1 to
|
||||
select a model more optimized for landscape images. See details in
|
||||
https://solutions.mediapipe.dev/selfie_segmentation#model_selection.
|
||||
"""
|
||||
super().__init__(
|
||||
binary_graph_path=BINARYPB_FILE_PATH,
|
||||
side_inputs={
|
||||
'model_selection': model_selection,
|
||||
},
|
||||
outputs=['segmentation_mask'])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns a segmentation mask.
|
||||
|
||||
Args:
|
||||
image: An RGB image represented as a numpy ndarray.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the underlying graph throws any error.
|
||||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with a "segmentation_mask" field that contains a float
|
||||
type 2d np array representing the mask.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
@@ -0,0 +1,68 @@
|
||||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for mediapipe.python.solutions.selfie_segmentation."""
|
||||
|
||||
import os
|
||||
|
||||
from absl.testing import absltest
|
||||
from absl.testing import parameterized
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# resources dependency
|
||||
# undeclared dependency
|
||||
from mediapipe.python.solutions import selfie_segmentation as mp_selfie_segmentation
|
||||
|
||||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
|
||||
|
||||
class SelfieSegmentationTest(parameterized.TestCase):
|
||||
|
||||
def _draw(self, frame: np.ndarray, mask: np.ndarray):
|
||||
frame = np.minimum(frame, np.stack((mask,) * 3, axis=-1))
|
||||
path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + '.png')
|
||||
cv2.imwrite(path, frame)
|
||||
|
||||
def test_invalid_image_shape(self):
|
||||
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, 'Input image must contain three channel rgb data.'):
|
||||
selfie_segmentation.process(
|
||||
np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
|
||||
|
||||
def test_blank_image(self):
|
||||
with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
|
||||
image = np.zeros([100, 100, 3], dtype=np.uint8)
|
||||
image.fill(255)
|
||||
results = selfie_segmentation.process(image)
|
||||
normalized_segmentation_mask = (results.segmentation_mask *
|
||||
255).astype(int)
|
||||
self.assertLess(np.amax(normalized_segmentation_mask), 1)
|
||||
|
||||
@parameterized.named_parameters(('general', 0), ('landscape', 1))
|
||||
def test_segmentation(self, model_selection):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/portrait.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_selfie_segmentation.SelfieSegmentation(
|
||||
model_selection=model_selection) as selfie_segmentation:
|
||||
results = selfie_segmentation.process(
|
||||
cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
normalized_segmentation_mask = (results.segmentation_mask *
|
||||
255).astype(int)
|
||||
self._draw(image.copy(), normalized_segmentation_mask)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
@@ -0,0 +1,75 @@
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for mediapipe.python._framework_bindings.timestamp."""
|
||||
|
||||
import time
|
||||
|
||||
from absl.testing import absltest
|
||||
import mediapipe as mp
|
||||
|
||||
|
||||
class TimestampTest(absltest.TestCase):
|
||||
|
||||
def test_timestamp(self):
|
||||
t = mp.Timestamp(100)
|
||||
self.assertEqual(t.value, 100)
|
||||
self.assertEqual(t, 100)
|
||||
self.assertEqual(str(t), '<mediapipe.Timestamp with value: 100>')
|
||||
|
||||
def test_timestamp_copy_constructor(self):
|
||||
ts1 = mp.Timestamp(100)
|
||||
ts2 = mp.Timestamp(ts1)
|
||||
self.assertEqual(ts1, ts2)
|
||||
|
||||
def test_timestamp_comparsion(self):
|
||||
ts1 = mp.Timestamp(100)
|
||||
ts2 = mp.Timestamp(100)
|
||||
self.assertEqual(ts1, ts2)
|
||||
ts3 = mp.Timestamp(200)
|
||||
self.assertNotEqual(ts1, ts3)
|
||||
|
||||
def test_timestamp_special_values(self):
|
||||
t1 = mp.Timestamp.UNSET
|
||||
self.assertEqual(str(t1), '<mediapipe.Timestamp with value: UNSET>')
|
||||
t2 = mp.Timestamp.UNSTARTED
|
||||
self.assertEqual(str(t2), '<mediapipe.Timestamp with value: UNSTARTED>')
|
||||
t3 = mp.Timestamp.PRESTREAM
|
||||
self.assertEqual(str(t3), '<mediapipe.Timestamp with value: PRESTREAM>')
|
||||
t4 = mp.Timestamp.MIN
|
||||
self.assertEqual(str(t4), '<mediapipe.Timestamp with value: MIN>')
|
||||
t5 = mp.Timestamp.MAX
|
||||
self.assertEqual(str(t5), '<mediapipe.Timestamp with value: MAX>')
|
||||
t6 = mp.Timestamp.POSTSTREAM
|
||||
self.assertEqual(str(t6), '<mediapipe.Timestamp with value: POSTSTREAM>')
|
||||
t7 = mp.Timestamp.DONE
|
||||
self.assertEqual(str(t7), '<mediapipe.Timestamp with value: DONE>')
|
||||
|
||||
def test_timestamp_comparisons(self):
|
||||
ts1 = mp.Timestamp(100)
|
||||
ts2 = mp.Timestamp(101)
|
||||
self.assertGreater(ts2, ts1)
|
||||
self.assertGreaterEqual(ts2, ts1)
|
||||
self.assertLess(ts1, ts2)
|
||||
self.assertLessEqual(ts1, ts2)
|
||||
self.assertNotEqual(ts1, ts2)
|
||||
|
||||
def test_from_seconds(self):
|
||||
now = time.time()
|
||||
ts = mp.Timestamp.from_seconds(now)
|
||||
self.assertAlmostEqual(now, ts.seconds(), delta=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
Reference in New Issue
Block a user