Created
October 12, 2021 17:44
-
-
Save HoangTienDuc/77633d82a63b6d1f7649eea4c6380000 to your computer and use it in GitHub Desktop.
debug request
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pycuda.driver as cuda | |
import pycuda.autoinit | |
import numpy as np | |
import tensorrt as trt | |
import cv2 | |
from hyperpose import Config,Model | |
TRT_LOGGER = trt.Logger() | |
# Simple helper data class that's a little nicer to use than a 2-tuple. | |
class HostDeviceMem(object): | |
def __init__(self, host_mem, device_mem): | |
self.host = host_mem | |
self.device = device_mem | |
def __str__(self): | |
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) | |
def __repr__(self): | |
return self.__str__() | |
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs. | |
def allocate_buffers(engine): | |
inputs = [] | |
outputs = [] | |
bindings = [] | |
stream = cuda.Stream() | |
out_shapes = [] | |
input_shapes = [] | |
out_names = [] | |
max_batch_size = engine.max_batch_size | |
for binding in engine: | |
binding_shape = engine.get_binding_shape(binding) | |
#Fix -1 dimension for proper memory allocation for batch_size > 1 | |
if binding_shape[0] == -1: | |
binding_shape = (1,) + binding_shape[1:] | |
size = trt.volume(binding_shape) * max_batch_size | |
dtype = trt.nptype(engine.get_binding_dtype(binding)) | |
# Allocate host and device buffers | |
host_mem = cuda.pagelocked_empty(size, dtype) | |
device_mem = cuda.mem_alloc(host_mem.nbytes) | |
# Append the device buffer to device bindings. | |
bindings.append(int(device_mem)) | |
# Append to the appropriate list. | |
if engine.binding_is_input(binding): | |
inputs.append(HostDeviceMem(host_mem, device_mem)) | |
input_shapes.append(engine.get_binding_shape(binding)) | |
else: | |
outputs.append(HostDeviceMem(host_mem, device_mem)) | |
#Collect original output shapes and names from engine | |
out_shapes.append(engine.get_binding_shape(binding)) | |
out_names.append(binding) | |
return inputs, outputs, bindings, stream, input_shapes, out_shapes, out_names, max_batch_size | |
# This function is generalized for multiple inputs/outputs. | |
# inputs and outputs are expected to be lists of HostDeviceMem objects. | |
def do_inference(context, bindings, inputs, outputs, stream): | |
# Transfer input data to the GPU. | |
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] | |
# Run inference. | |
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) | |
# Transfer predictions back from the GPU. | |
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] | |
# Synchronize the stream | |
stream.synchronize() | |
# Return only the host outputs. | |
return [out.host for out in outputs] | |
class TrtModel(object): | |
def __init__(self, model): | |
self.engine_file = model | |
self.engine = None | |
self.inputs = None | |
self.outputs = None | |
self.bindings = None | |
self.stream = None | |
self.context = None | |
self.input_shapes = None | |
self.out_shapes = None | |
self.max_batch_size = 1 | |
self.cuda_ctx = cuda.Device(0).make_context() | |
if self.cuda_ctx: | |
self.cuda_ctx.push() | |
def build(self): | |
with open(self.engine_file, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: | |
self.engine = runtime.deserialize_cuda_engine(f.read()) | |
self.inputs, self.outputs, self.bindings, self.stream, self.input_shapes, self.out_shapes, self.out_names, self.max_batch_size = allocate_buffers( | |
self.engine) | |
self.context = self.engine.create_execution_context() | |
self.context.active_optimization_profile = 0 | |
if self.cuda_ctx: | |
self.cuda_ctx.pop() | |
def run(self, input, deflatten: bool = True, as_dict=False): | |
# lazy load implementation | |
if self.engine is None: | |
self.build() | |
if self.cuda_ctx: | |
self.cuda_ctx.push() | |
input = np.asarray(input) | |
batch_size = input.shape[0] | |
allocate_place = np.prod(input.shape) | |
self.inputs[0].host[:allocate_place] = input.flatten(order='C').astype(np.float32) | |
self.context.set_binding_shape(0, input.shape) | |
trt_outputs = do_inference( | |
self.context, bindings=self.bindings, | |
inputs=self.inputs, outputs=self.outputs, stream=self.stream) | |
if self.cuda_ctx: | |
self.cuda_ctx.pop() | |
#Reshape TRT outputs to original shape instead of flattened array | |
if deflatten: | |
trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, self.out_shapes)] | |
if as_dict: | |
return {name: trt_outputs[i] for i, name in enumerate(self.out_names)} | |
return trt_outputs | |
# return [trt_outputs[0][:batch_size]] | |
def preprocess(img): | |
# img = cv2.resize(img, (368, 656)) | |
img = cv2.resize(img, (656, 368)) | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
img = img.transpose((2, 0, 1)).astype(np.float32) | |
img /= 255.0 | |
return img | |
engine = TrtModel("/data/data/openpose-coco-V2-HW=368x656.onnx_b1_gpu0_fp16.engine") | |
engine.build() | |
image = cv2.imread("/data/pose_test.jpg") | |
trt_input = preprocess(image) | |
ori_image = trt_input | |
trt_outputs = engine.run(trt_input) | |
conf_map, paf_map = trt_outputs | |
print("trt_output: ", np.array(trt_outputs[0])) | |
Config.set_model_name("openpose-coco") | |
Config.set_model_type(Config.MODEL.Openpose) | |
#get visualize function, which is able to get visualized part and limb heatmap image from inferred heatmaps | |
visualize=Model.get_visualize(Config.MODEL.Openpose) | |
vis_parts_heatmap,vis_limbs_heatmap=visualize(ori_image,conf_map[0],paf_map[0],save_tofile=True) | |
CocoLimb=list(zip([1, 8, 9, 1, 11, 12, 1, 2, 3, 1, 5, 6, 1, 0, 0, 14, 15], | |
[8, 9, 10, 11, 12, 13, 2, 3, 4, 5, 6, 7, 0, 14, 15, 16, 17])) | |
from enum import Enum | |
class CocoPart(Enum): | |
Nose = 0 | |
Instance = 1 | |
RShoulder = 2 | |
RElbow = 3 | |
RWrist = 4 | |
LShoulder = 5 | |
LElbow = 6 | |
LWrist = 7 | |
RHip = 8 | |
RKnee = 9 | |
RAnkle = 10 | |
LHip = 11 | |
LKnee = 12 | |
LAnkle = 13 | |
REye = 14 | |
LEye = 15 | |
REar = 16 | |
LEar = 17 | |
#get postprocess function, which is able to get humans that contains assembled detected parts from inferred heatmaps | |
PostProcessor=Model.get_postprocessor(Config.MODEL.Openpose) | |
postprocessor=PostProcessor(parts=CocoPart,limbs=CocoLimb,hin=368,\ | |
win=656,hout=38,wout=46,colors=None) | |
humans=postprocessor.process(conf_map[0],paf_map[0], 368, 656) | |
#draw all detected skeletons | |
output_img=ori_image.copy() | |
for human in humans: | |
output_img=human.draw_human(output_img) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment