1 year ago

#336191

test-img

Bilal

How to improve depth segmentation of Lego blocks

Input

I have the following depth images of type uint16 obtained from Intel realsense L515 camera which is supposed to have an Avg Depth Accuracy< 5mm @ 1m.

d1

d2

Goal

I want to quantify the depth of the blocks inside this image to get a discrete representation of the blocks inside my region of interest of 23 x 11 block positions such as

P_x1_y1 : z = 1(one block), P_x2_y2: z = 2 (two blocks), up to 5 blocks (as in the image center).

The ROI RGB Image can clarify my aim (but it is not used as an input):

roi

What I have tried so far:

  1. Obtaining the ROI:
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import numpy as np
import cv2
import matplotlib.pyplot as plt

def get_roi(d1, output_size=(736, 384), ratio=(0.77, 0.54), shift=(0, 80), verbose=False):
    """
    Function: get_roi, to find and resize the ROI.
    ---
    Parameters:
    @param: d1, nd array, depth image.
    @param: output_size, tuple, the output ROI size.
    @param: ratio, tuple, the ratio of the ROI to the detected zone.
    @param: shift, tuple, the shift in pixles to align the ROI.
    @param: verbose, bool, to vizualize the result.
    ---
    @return: roi, nd array, ROI resized.
    """
    d = d1.copy()

    th =  cv2.threshold(d, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    th = th.astype(np.uint8)

    contours = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE )[0]
    cnt_thresh = 10000
    fx, fy = ratio
    x_shift, y_shift = shift
    for i, cnt in enumerate(contours):
        if(cv2.contourArea(cnt)>cnt_thresh):
            x,y,w,h = cv2.boundingRect(cnt)
            cx = x + w//2 + x_shift
            cy = y + h//2 + y_shift
            nw = int(fx * w)
            nh = int(fy * h)
            # cv2.rectangle(d1,(cx-nw//2,cy-nh//2),(cx+nw//2,cy+nh//2),color=0)
    d_roi = d1[cy-nh//2:cy+nh//2,cx-nw//2:cx+nw//2]
    roi = cv2.resize(d_roi, output_size)

    # Visualize results
    if(verbose):
        plt.imshow(roi)
        plt.show()

    return roi
  1. Finding the mode(most frequent) non-zero value of each cell in the grid:
def mode(arr):
    """
    Function: mode, to find the mode of an array.
    ---
    Parameters:
    @param: arr, nd array, any.
    ---
    @return: the mode value (whatever int/float/etc) of this array.
    """
    vals,counts = np.unique(arr, return_counts=True)
    if 0 in vals:
        z_idx = np.where(vals == 0)
        vals   = np.delete(vals,   z_idx)
        counts = np.delete(counts, z_idx)
    index = np.argmax(counts)
    return vals[index]
  1. Quantifying the values of each cell:
def mapVal(val):
    """
    Function: mapVal, to map depth values.
    ---
    Parameters:
    @param: val, int, any.
    ---
    @return: int val, specific value 0, 50, 150, 200, 250, val.
    """
    if val<=183:
        return 0
    if val>183 and val <=230:
        return 50
    if val>230 and val <=295:
        return 100
    if val>295 and val <=390:
        return 150
    if val>390 and val <=470:
        return 200
    if val>470:
        return 250
    else:
        return val
  1. grid the ROI into cells, and applying Linear correction for the depth static error:
def gridWorkspace(roi, gridSize=(23, 11), shift=[0, 5], verbose=False):
    """
    Function: gridWorkspace, to find the contours of the red markers.
    ---
    Parameters:
    @param: roi, nd array, cropped region of interest.
    @param: gridSize, tuple, lenght/width or the Workspace.
    @param: shift, to make static error compensation for alignment.
    @param: verbose, boolean, to show the output of the function. 
    ---
    @return: None.
    """
    # Store a deep copy for results:
    roi_copy = roi.copy()

    # Divide the image into a grid:
    verticalCells   = gridSize[1]
    horizontalCells = gridSize[0]

    # Cell dimensions
    bigRectWidth  =  roi_copy.shape[1] 
    bigRectHeight =  roi_copy.shape[0]

    cellWidth = bigRectWidth // horizontalCells
    cellHeight = bigRectHeight // verticalCells

    x_shift, y_shift = shift

    # # Correction values
    origin = mode(roi[y_shift:cellHeight+ y_shift, x_shift:cellWidth+x_shift])
    x_max  = mode(roi[y_shift:y_shift+cellHeight, x_shift+(horizontalCells-1)*cellWidth:x_shift+horizontalCells*cellWidth])
    y_max  = mode(roi[y_shift++(verticalCells-1)*cellHeight:y_shift+verticalCells*cellHeight, x_shift:x_shift+cellWidth])
    print("origin= {}, x_max= {}, y_max= {}".format(origin, x_max, y_max))

    x_corr = ( int(x_max) - int(origin) ) // horizontalCells
    y_corr = ( int(y_max) - int(origin) ) // verticalCells
    print("x_corr = {}, y_corr = {}".format(x_corr, y_corr))

    # Loop thru vertical dimension:
    for j in range(verticalCells):

        # Cell starting y position:
        yo = j * cellHeight + y_shift

        # Loop thru horizontal dimension:
        for i in range(horizontalCells):

            # Cell starting x position:
            xo = i * cellWidth + x_shift

            # Cell Dimensions:
            cX = int(xo)
            cY = int(yo)

            # Quantify current cell:
            # print(i, j, mode(roi[cY:cY + cellHeight, cX:cX + cellWidth]))
            roi_copy[cY:cY + cellHeight, cX:cX + cellWidth] =  mapVal(mode(roi[cY:cY + cellHeight, cX:cX + cellWidth]) - j*y_corr - i*x_corr)# mapVal(371 - mode(roi[cY:cY + cellHeight, cX:cX + cellWidth]))
            # Draw Cell
            cv2.rectangle(roi_copy, (cX, cY), (cX + cellWidth, cY + cellHeight), (100, 100, 255), 1)

    # Visualize results
    if(verbose):
        plt.imshow(roi_copy)
        plt.show()

So when I try:

path = ""
imName = "d1.png"

d1 = cv2.imread(path+imName, -1)

roi = get_roi(d1, verbose=False)

roi = np.max(roi) - roi

roi[roi<0] = 0
roi[roi>500] = 0

gridWorkspace(roi, verbose=True)

I get this result:

result

Can you please tell me what can I do to improve my segmentation? thanks in advance.

opencv

image-processing

image-segmentation

depth

realsense

0 Answers

Your Answer

Accepted video resources