gobot/board-vision/main.py

import cv2
import argparse
import enum
import abc
import numpy as np

# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("camera", type=int, help="Camera index")
args = parser.parse_args()

cam = cv2.VideoCapture(args.camera)

WINDOW_NAME = "Output Go Board Detection"
cv2.namedWindow(WINDOW_NAME)
cv2.moveWindow(WINDOW_NAME, 1,0)

font = cv2.FONT_HERSHEY_SIMPLEX
font_color = (127, 255, 0)

class Board_Sizes_ABC(abc.ABC):

    @abc.abstractmethod
    def get_cell_rel_pos(self, n: int):
        pass

    def get_cell_position_on_line(self, n: int, p0: np.ndarray, p1: np.ndarray):
        vec = p1 - p0
        return np.round(p0 + vec*self.get_cell_rel_pos(n)).astype(np.int32)

class Board_19x19(Board_Sizes_ABC):
    LINE_WIDTH = 1
    CELL_SIZE = 21
    PADDING = 11.5
    N = 18

    TOTAL_WIDTH = N*CELL_SIZE + (N+1)*LINE_WIDTH + 2*PADDING

    def __init__(self):
        self.rel_lut = [(self.PADDING + self.LINE_WIDTH*0.5 + (self.LINE_WIDTH + self.CELL_SIZE)*i)/self.TOTAL_WIDTH for i in range(19)]

    def get_cell_rel_pos(self, n: int):
        return self.rel_lut[n]

class VISION_PREPROCESSING_MODE(enum.Enum):
    CANNY = "CANNY"
    THRES_HOLD = "THRES_HOLD"
    THRES = "THRES"
    TEST_CANNY_HOLD = "TEST_CANNY_HOLD"
    TEST_CANNY = "TEST_CANNY"

fps = 0

last_decetion_time = 0
hold_time = 0
last_double_detection_time = 0

dection = None

dection_history = []
jitter = 0
num_decetions = 0
num_double_decetions = 0

preproc_state = VISION_PREPROCESSING_MODE.CANNY

board_messurements = Board_19x19()

try:
    while True:
        t_start = cv2.getTickCount()
        ret, frame = cam.read()
        width, height = frame.shape[1], frame.shape[0]

        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        frame_edge = cv2.GaussianBlur(frame_gray, (5, 5), 0)
        frame_edge = cv2.adaptiveThreshold(frame_edge, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 9, 2)
        frame_edge_copy = cv2.cvtColor(frame_edge.copy(), cv2.COLOR_GRAY2BGR)


        if preproc_state == VISION_PREPROCESSING_MODE.CANNY or \
            preproc_state == VISION_PREPROCESSING_MODE.TEST_CANNY or \
            preproc_state == VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD:

            frame_proc = cv2.GaussianBlur(frame_gray, (3, 3), 0)
            frame_proc = cv2.Canny(frame_proc, 130, 200)

        elif preproc_state == VISION_PREPROCESSING_MODE.THRES_HOLD or preproc_state == VISION_PREPROCESSING_MODE.THRES:
            frame_proc = cv2.GaussianBlur(frame_gray, (3, 3), 0)
            frame_proc = cv2.adaptiveThreshold(frame_proc, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 19, 3)

        else:
            raise ValueError("Invalid preprocessing mode")

        countours, hierarchy = cv2.findContours(frame_proc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        countours_simplified = []

        for c in countours:
            esp = cv2.arcLength(c, True)*0.05
            approx = cv2.approxPolyDP(c, esp, True)
            if len(approx) == 4:
                area = cv2.contourArea(approx)

                if area > 1000:
                    lengths = np.array([
                        np.linalg.norm(approx[0] - approx[1]),
                        np.linalg.norm(approx[1] - approx[2]),
                        np.linalg.norm(approx[2] - approx[3]),
                        np.linalg.norm(approx[3] - approx[0])
                    ])

                    avrg_length = np.mean(lengths)
                    lengths_diff = np.abs(lengths)/avrg_length

                    if np.all(lengths_diff < 1.1) and np.all(lengths_diff > 0.90):
                        countours_simplified.append(approx)

        num_decetions = len(countours_simplified)

        if num_decetions > 2:
            num_double_decetions += 1
            num_last_double_detection_time = 0
        else:
            if last_double_detection_time > 300:
                num_double_decetions = 0
            else:
                last_double_detection_time = max(1000, last_double_detection_time)

        if num_decetions == 1:
            last_decetion_time = 0

            dection_history.append(np.squeeze(countours_simplified[0], axis=1))
            dection_history = dection_history[-10:]
        else:
            last_decetion_time += 1

        if len(dection_history) > 0:
            dection_history_np = np.array(dection_history)
            avrage_detection = np.mean(dection_history_np, axis=0)
            dection = avrage_detection.copy()

        if len(dection_history) > 6:
            jitter = 0
            for i in dection_history_np:
                jitter = np.max(np.abs(i - avrage_detection))

        match preproc_state:
            case VISION_PREPROCESSING_MODE.CANNY:
                if last_decetion_time > 50:
                    preproc_state = VISION_PREPROCESSING_MODE.THRES_HOLD
                    hold_time = 100

            case VISION_PREPROCESSING_MODE.THRES_HOLD:
                if hold_time == 0:
                    preproc_state = VISION_PREPROCESSING_MODE.THRES
                else:
                    hold_time -= 1

            case VISION_PREPROCESSING_MODE.THRES:
                if last_decetion_time > 50 or jitter > 3 or num_double_decetions > 15:
                    preproc_state = VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD
                    hold_time = 200

            case VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD:
                if hold_time == 0:
                    preproc_state = VISION_PREPROCESSING_MODE.TEST_CANNY
                else:
                    hold_time -= 1

            case VISION_PREPROCESSING_MODE.TEST_CANNY:
                if last_decetion_time < 10 and jitter < 3:
                    preproc_state = VISION_PREPROCESSING_MODE.CANNY
                else:
                    preproc_state = VISION_PREPROCESSING_MODE.THRES_HOLD
                    hold_time = 100

            case _:
                raise ValueError("Invalid preprocessing mode")

        frame_dection = cv2.drawContours(frame.copy(), countours_simplified, -1, (0, 255, 0), 1)
        frame_classifications = frame.copy()

        if dection is not None:
            N_line_positions = []
            S_line_positions = []
            W_line_positions = []
            E_line_positions = []

            for i in range(19):
                N_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[0], dection[1]))
                E_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[1], dection[2]))
                S_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[2], dection[3]))
                W_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[3], dection[0]))

            N_line_positions = np.array(N_line_positions, dtype=np.int32)
            E_line_positions = np.array(E_line_positions, dtype=np.int32)
            S_line_positions = np.array(list(reversed(S_line_positions)), dtype=np.int32)
            W_line_positions = np.array(list(reversed(W_line_positions)), dtype=np.int32)

            cv2.circle(frame_dection, dection[0].astype(np.int32), 3, (0, 255, 255), -1)
            cv2.circle(frame_dection, dection[1].astype(np.int32), 3, (0, 255, 255), -1)
            cv2.circle(frame_dection, dection[2].astype(np.int32), 3, (0, 255, 255), -1)
            cv2.circle(frame_dection, dection[3].astype(np.int32), 3, (0, 255, 255), -1)

            for p in N_line_positions:
                cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
            for p in E_line_positions:
                cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
            for p in S_line_positions:
                cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
            for p in W_line_positions:
                cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)

            for p0, p1 in zip(N_line_positions, S_line_positions):
                cv2.line(frame_dection, tuple(p0), tuple(p1), (0, 0, 255), 1)

                for i in range(19):
                    p2 = board_messurements.get_cell_position_on_line(i, p0, p1)
                    cv2.circle(frame_dection, p2, 2, (0, 0, 255), -1)

            for p0, p1 in zip(E_line_positions, W_line_positions):
                cv2.line(frame_dection, tuple(p0), tuple(p1), (0, 255, 0), 1)

                for i in range(19):
                    p2 = board_messurements.get_cell_position_on_line(i, p0, p1)
                    cv2.circle(frame_dection, p2, 2, (0, 255, 0), -1)

            MESSURE_WIDTH = 15
            STONE_DECTION_WIDTH = 5
            for x in range(19):
                for y in range(19):
                    mid_point = board_messurements.get_cell_position_on_line(x, N_line_positions[y], S_line_positions[y])
                    subsection = frame[mid_point[1] - MESSURE_WIDTH//2:mid_point[1] + MESSURE_WIDTH//2, mid_point[0] - MESSURE_WIDTH//2:mid_point[0] + MESSURE_WIDTH//2]
                    avrage_color = np.mean(subsection, axis=(0, 1)).astype(np.int32).tolist()

                    cv2.rectangle(frame_classifications, (mid_point[0] - MESSURE_WIDTH//2, mid_point[1] - MESSURE_WIDTH//2), (mid_point[0] + MESSURE_WIDTH//2, mid_point[1] + MESSURE_WIDTH//2), avrage_color, -1)
                    cv2.rectangle(frame_classifications, (mid_point[0] - MESSURE_WIDTH//2, mid_point[1] - MESSURE_WIDTH//2), (mid_point[0] + MESSURE_WIDTH//2, mid_point[1] + MESSURE_WIDTH//2), (0, 0, 255), 1)

                    avrage_stone_color = np.mean(frame_edge, axis=(0, 1)).astype(np.int32).tolist()
                    cv2.rectangle(frame_edge_copy, (mid_point[0] - STONE_DECTION_WIDTH//2, mid_point[1] - STONE_DECTION_WIDTH//2), (mid_point[0] + STONE_DECTION_WIDTH//2, mid_point[1] + STONE_DECTION_WIDTH//2), (avrage_stone_color, avrage_stone_color, avrage_stone_color), -1)
                    cv2.rectangle(frame_edge_copy, (mid_point[0] - STONE_DECTION_WIDTH//2, mid_point[1] - STONE_DECTION_WIDTH//2), (mid_point[0] + STONE_DECTION_WIDTH//2, mid_point[1] + STONE_DECTION_WIDTH//2), (0, 0, 255), 1)


        res = np.concatenate((
            np.concatenate((
                frame,
                frame_classifications,
                frame_edge_copy
            ), axis=1),
            np.concatenate((
                cv2.cvtColor(frame_proc, cv2.COLOR_GRAY2BGR),
                frame_dection,
                frame
            ), axis=1)
        ), axis=0)

        res = cv2.putText(res, f"FPS: {fps:.2f}", (10, 20), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"GoBoard Dections: {num_decetions}", (10, 35), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Preprocessing: {preproc_state.value}", (10, 50), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Double Dection: {num_double_decetions}", (10, 65), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Jitter: {jitter:.2f}", (10, 80), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Last Dection Time: {last_decetion_time}", (10, 95), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Last Double Dection Time: {last_double_detection_time}", (10, 110), font, 0.5, font_color, 1, cv2.LINE_AA)
        res = cv2.putText(res, f"Hold Time: {hold_time}", (10, 125), font, 0.5, font_color, 1, cv2.LINE_AA)

        cv2.imshow(WINDOW_NAME, res)

        t_end = cv2.getTickCount()

        fps = cv2.getTickFrequency() / (t_end - t_start)

        k = cv2.waitKey(1)
        if k%256 == 27: # ESC pressed
            print("Escape hit, closing...")
            break


except KeyboardInterrupt:
    print("Exiting")

cam.release()
cv2.destroyAllWindows()