Files
gobot/board-vision/main.py
2024-09-24 12:14:16 +02:00

286 lines
12 KiB (Stored with Git LFS)
Python

import cv2
import argparse
import enum
import abc
import numpy as np
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("camera", type=int, help="Camera index")
args = parser.parse_args()
cam = cv2.VideoCapture(args.camera)
WINDOW_NAME = "Output Go Board Detection"
cv2.namedWindow(WINDOW_NAME)
cv2.moveWindow(WINDOW_NAME, 1,0)
font = cv2.FONT_HERSHEY_SIMPLEX
font_color = (127, 255, 0)
class Board_Sizes_ABC(abc.ABC):
@abc.abstractmethod
def get_cell_rel_pos(self, n: int):
pass
def get_cell_position_on_line(self, n: int, p0: np.ndarray, p1: np.ndarray):
vec = p1 - p0
return np.round(p0 + vec*self.get_cell_rel_pos(n)).astype(np.int32)
class Board_19x19(Board_Sizes_ABC):
LINE_WIDTH = 1
CELL_SIZE = 21
PADDING = 11.5
N = 18
TOTAL_WIDTH = N*CELL_SIZE + (N+1)*LINE_WIDTH + 2*PADDING
def __init__(self):
self.rel_lut = [(self.PADDING + self.LINE_WIDTH*0.5 + (self.LINE_WIDTH + self.CELL_SIZE)*i)/self.TOTAL_WIDTH for i in range(19)]
def get_cell_rel_pos(self, n: int):
return self.rel_lut[n]
class VISION_PREPROCESSING_MODE(enum.Enum):
CANNY = "CANNY"
THRES_HOLD = "THRES_HOLD"
THRES = "THRES"
TEST_CANNY_HOLD = "TEST_CANNY_HOLD"
TEST_CANNY = "TEST_CANNY"
fps = 0
last_decetion_time = 0
hold_time = 0
last_double_detection_time = 0
dection = None
dection_history = []
jitter = 0
num_decetions = 0
num_double_decetions = 0
preproc_state = VISION_PREPROCESSING_MODE.CANNY
board_messurements = Board_19x19()
try:
while True:
t_start = cv2.getTickCount()
ret, frame = cam.read()
width, height = frame.shape[1], frame.shape[0]
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame_edge = cv2.GaussianBlur(frame_gray, (5, 5), 0)
frame_edge = cv2.adaptiveThreshold(frame_edge, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 9, 2)
frame_edge_copy = cv2.cvtColor(frame_edge.copy(), cv2.COLOR_GRAY2BGR)
if preproc_state == VISION_PREPROCESSING_MODE.CANNY or \
preproc_state == VISION_PREPROCESSING_MODE.TEST_CANNY or \
preproc_state == VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD:
frame_proc = cv2.GaussianBlur(frame_gray, (3, 3), 0)
frame_proc = cv2.Canny(frame_proc, 130, 200)
elif preproc_state == VISION_PREPROCESSING_MODE.THRES_HOLD or preproc_state == VISION_PREPROCESSING_MODE.THRES:
frame_proc = cv2.GaussianBlur(frame_gray, (3, 3), 0)
frame_proc = cv2.adaptiveThreshold(frame_proc, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 19, 3)
else:
raise ValueError("Invalid preprocessing mode")
countours, hierarchy = cv2.findContours(frame_proc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
countours_simplified = []
for c in countours:
esp = cv2.arcLength(c, True)*0.05
approx = cv2.approxPolyDP(c, esp, True)
if len(approx) == 4:
area = cv2.contourArea(approx)
if area > 1000:
lengths = np.array([
np.linalg.norm(approx[0] - approx[1]),
np.linalg.norm(approx[1] - approx[2]),
np.linalg.norm(approx[2] - approx[3]),
np.linalg.norm(approx[3] - approx[0])
])
avrg_length = np.mean(lengths)
lengths_diff = np.abs(lengths)/avrg_length
if np.all(lengths_diff < 1.1) and np.all(lengths_diff > 0.90):
countours_simplified.append(approx)
num_decetions = len(countours_simplified)
if num_decetions > 2:
num_double_decetions += 1
num_last_double_detection_time = 0
else:
if last_double_detection_time > 300:
num_double_decetions = 0
else:
last_double_detection_time = max(1000, last_double_detection_time)
if num_decetions == 1:
last_decetion_time = 0
dection_history.append(np.squeeze(countours_simplified[0], axis=1))
dection_history = dection_history[-10:]
else:
last_decetion_time += 1
if len(dection_history) > 0:
dection_history_np = np.array(dection_history)
avrage_detection = np.mean(dection_history_np, axis=0)
dection = avrage_detection.copy()
if len(dection_history) > 6:
jitter = 0
for i in dection_history_np:
jitter = np.max(np.abs(i - avrage_detection))
match preproc_state:
case VISION_PREPROCESSING_MODE.CANNY:
if last_decetion_time > 50:
preproc_state = VISION_PREPROCESSING_MODE.THRES_HOLD
hold_time = 100
case VISION_PREPROCESSING_MODE.THRES_HOLD:
if hold_time == 0:
preproc_state = VISION_PREPROCESSING_MODE.THRES
else:
hold_time -= 1
case VISION_PREPROCESSING_MODE.THRES:
if last_decetion_time > 50 or jitter > 3 or num_double_decetions > 15:
preproc_state = VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD
hold_time = 200
case VISION_PREPROCESSING_MODE.TEST_CANNY_HOLD:
if hold_time == 0:
preproc_state = VISION_PREPROCESSING_MODE.TEST_CANNY
else:
hold_time -= 1
case VISION_PREPROCESSING_MODE.TEST_CANNY:
if last_decetion_time < 10 and jitter < 3:
preproc_state = VISION_PREPROCESSING_MODE.CANNY
else:
preproc_state = VISION_PREPROCESSING_MODE.THRES_HOLD
hold_time = 100
case _:
raise ValueError("Invalid preprocessing mode")
frame_dection = cv2.drawContours(frame.copy(), countours_simplified, -1, (0, 255, 0), 1)
frame_classifications = frame.copy()
if dection is not None:
N_line_positions = []
S_line_positions = []
W_line_positions = []
E_line_positions = []
for i in range(19):
N_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[0], dection[1]))
E_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[1], dection[2]))
S_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[2], dection[3]))
W_line_positions.append(board_messurements.get_cell_position_on_line(i, dection[3], dection[0]))
N_line_positions = np.array(N_line_positions, dtype=np.int32)
E_line_positions = np.array(E_line_positions, dtype=np.int32)
S_line_positions = np.array(list(reversed(S_line_positions)), dtype=np.int32)
W_line_positions = np.array(list(reversed(W_line_positions)), dtype=np.int32)
cv2.circle(frame_dection, dection[0].astype(np.int32), 3, (0, 255, 255), -1)
cv2.circle(frame_dection, dection[1].astype(np.int32), 3, (0, 255, 255), -1)
cv2.circle(frame_dection, dection[2].astype(np.int32), 3, (0, 255, 255), -1)
cv2.circle(frame_dection, dection[3].astype(np.int32), 3, (0, 255, 255), -1)
for p in N_line_positions:
cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
for p in E_line_positions:
cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
for p in S_line_positions:
cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
for p in W_line_positions:
cv2.circle(frame_dection, p, 2, (0, 0, 255), -1)
for p0, p1 in zip(N_line_positions, S_line_positions):
cv2.line(frame_dection, tuple(p0), tuple(p1), (0, 0, 255), 1)
for i in range(19):
p2 = board_messurements.get_cell_position_on_line(i, p0, p1)
cv2.circle(frame_dection, p2, 2, (0, 0, 255), -1)
for p0, p1 in zip(E_line_positions, W_line_positions):
cv2.line(frame_dection, tuple(p0), tuple(p1), (0, 255, 0), 1)
for i in range(19):
p2 = board_messurements.get_cell_position_on_line(i, p0, p1)
cv2.circle(frame_dection, p2, 2, (0, 255, 0), -1)
MESSURE_WIDTH = 15
STONE_DECTION_WIDTH = 5
for x in range(19):
for y in range(19):
mid_point = board_messurements.get_cell_position_on_line(x, N_line_positions[y], S_line_positions[y])
subsection = frame[mid_point[1] - MESSURE_WIDTH//2:mid_point[1] + MESSURE_WIDTH//2, mid_point[0] - MESSURE_WIDTH//2:mid_point[0] + MESSURE_WIDTH//2]
avrage_color = np.mean(subsection, axis=(0, 1)).astype(np.int32).tolist()
cv2.rectangle(frame_classifications, (mid_point[0] - MESSURE_WIDTH//2, mid_point[1] - MESSURE_WIDTH//2), (mid_point[0] + MESSURE_WIDTH//2, mid_point[1] + MESSURE_WIDTH//2), avrage_color, -1)
cv2.rectangle(frame_classifications, (mid_point[0] - MESSURE_WIDTH//2, mid_point[1] - MESSURE_WIDTH//2), (mid_point[0] + MESSURE_WIDTH//2, mid_point[1] + MESSURE_WIDTH//2), (0, 0, 255), 1)
avrage_stone_color = np.mean(frame_edge, axis=(0, 1)).astype(np.int32).tolist()
cv2.rectangle(frame_edge_copy, (mid_point[0] - STONE_DECTION_WIDTH//2, mid_point[1] - STONE_DECTION_WIDTH//2), (mid_point[0] + STONE_DECTION_WIDTH//2, mid_point[1] + STONE_DECTION_WIDTH//2), (avrage_stone_color, avrage_stone_color, avrage_stone_color), -1)
cv2.rectangle(frame_edge_copy, (mid_point[0] - STONE_DECTION_WIDTH//2, mid_point[1] - STONE_DECTION_WIDTH//2), (mid_point[0] + STONE_DECTION_WIDTH//2, mid_point[1] + STONE_DECTION_WIDTH//2), (0, 0, 255), 1)
res = np.concatenate((
np.concatenate((
frame,
frame_classifications,
frame_edge_copy
), axis=1),
np.concatenate((
cv2.cvtColor(frame_proc, cv2.COLOR_GRAY2BGR),
frame_dection,
frame
), axis=1)
), axis=0)
res = cv2.putText(res, f"FPS: {fps:.2f}", (10, 20), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"GoBoard Dections: {num_decetions}", (10, 35), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Preprocessing: {preproc_state.value}", (10, 50), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Double Dection: {num_double_decetions}", (10, 65), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Jitter: {jitter:.2f}", (10, 80), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Last Dection Time: {last_decetion_time}", (10, 95), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Last Double Dection Time: {last_double_detection_time}", (10, 110), font, 0.5, font_color, 1, cv2.LINE_AA)
res = cv2.putText(res, f"Hold Time: {hold_time}", (10, 125), font, 0.5, font_color, 1, cv2.LINE_AA)
cv2.imshow(WINDOW_NAME, res)
t_end = cv2.getTickCount()
fps = cv2.getTickFrequency() / (t_end - t_start)
k = cv2.waitKey(1)
if k%256 == 27: # ESC pressed
print("Escape hit, closing...")
break
except KeyboardInterrupt:
print("Exiting")
cam.release()
cv2.destroyAllWindows()