|
| 1 | +# ------------------------------------------------------------------------------ |
| 2 | +# Copyright (c) ETRI. All rights reserved. |
| 3 | +# Licensed under the BSD 3-Clause License. |
| 4 | +# This file is part of Youtube-Gesture-Dataset, a sub-project of AIR(AI for Robots) project. |
| 5 | +# You can refer to details of AIR project at https://aiforrobots.github.io |
| 6 | +# Written by Youngwoo Yoon (youngwoo@etri.re.kr) |
| 7 | +# ------------------------------------------------------------------------------ |
| 8 | + |
| 9 | +from tkinter import ttk |
| 10 | +import tkinter as tk |
| 11 | +import os |
| 12 | +from PIL import Image, ImageTk |
| 13 | +from data_utils import * |
| 14 | +from config import * |
| 15 | +import numpy as np |
| 16 | +import enum |
| 17 | + |
| 18 | + |
| 19 | +review_img_width = 3000 |
| 20 | +review_img_height = 1500 |
| 21 | + |
| 22 | + |
| 23 | +class Criteria(enum.Enum): |
| 24 | + # too short, many_people, skeleton_back, skeleton_missing, skeleton_side, skeleton_small,is_picture |
| 25 | + too_short = 0 |
| 26 | + many_people = 1 |
| 27 | + skeleton_back = 2 |
| 28 | + skeleton_missing = 3 |
| 29 | + skeleton_side = 4 |
| 30 | + skeleton_small = 5 |
| 31 | + is_picture = 6 |
| 32 | + |
| 33 | + |
| 34 | +class ReviewApp: |
| 35 | + MODE = 'ALL' |
| 36 | + vid = '-1' |
| 37 | + |
| 38 | + def __init__(self): |
| 39 | + self.win = tk.Tk() |
| 40 | + self.win.geometry("1500x800+100+100") |
| 41 | + |
| 42 | + self.make_frame() |
| 43 | + self.make_label() |
| 44 | + self.make_filtering_box() |
| 45 | + self.make_img_canvas() |
| 46 | + self.make_view_combobox() |
| 47 | + |
| 48 | + self.make_vid_treeView() |
| 49 | + self.vid_tree.bind("<Double-1>", self.OnVideoListClick) |
| 50 | + |
| 51 | + self.make_clip_treeView() |
| 52 | + self.clip_tree.bind("<Double-1>", self.OnClipListClick) |
| 53 | + self.clip_tree.bind("<<TreeviewSelect>>", self.OnClipTreeSelect) |
| 54 | + self.img_canvas.focus_set() |
| 55 | + |
| 56 | + self.win.mainloop() |
| 57 | + |
| 58 | + def make_frame(self): |
| 59 | + # main grid |
| 60 | + self.win.rowconfigure(0, weight=1) |
| 61 | + self.win.rowconfigure(1, weight=9) |
| 62 | + self.win.columnconfigure(0, weight=1) |
| 63 | + |
| 64 | + self.top_frame = tk.Frame(self.win, bg='#e9e9e9') |
| 65 | + self.top_frame.grid(row=0, sticky='nsew') |
| 66 | + self.top_frame.columnconfigure(0, weight=1) |
| 67 | + self.top_frame.columnconfigure(1, weight=12) |
| 68 | + |
| 69 | + self.top_frame.rowconfigure(0, weight=1) |
| 70 | + self.top_frame.rowconfigure(1, weight=1) |
| 71 | + self.top_frame.rowconfigure(2, weight=1) |
| 72 | + |
| 73 | + self.bottom_frame = tk.Frame(self.win) |
| 74 | + self.bottom_frame.grid(row=1, sticky='nsew', padx=5, pady=5) |
| 75 | + self.bottom_frame.columnconfigure(0, weight=1) |
| 76 | + self.bottom_frame.columnconfigure(1, weight=1) |
| 77 | + |
| 78 | + # bottom frame grid |
| 79 | + self.bottom_frame.columnconfigure(0, weight=1) |
| 80 | + self.bottom_frame.columnconfigure(1, weight=1) |
| 81 | + self.bottom_frame.columnconfigure(2, weight=15) |
| 82 | + self.bottom_frame.rowconfigure(0, weight=1) |
| 83 | + |
| 84 | + self.img_frame = tk.Frame(self.bottom_frame) |
| 85 | + self.img_frame.grid(row=0, column=2, sticky='nsew', padx=5, pady=5) |
| 86 | + |
| 87 | + def make_label(self): |
| 88 | + self.tx_vid_name = tk.Label(self.top_frame, bg='#8C8C8C', text='No selected video') |
| 89 | + self.tx_clip_interval = tk.Label(self.top_frame, bg='#8C8C8C', text='No selected clip') |
| 90 | + self.tx_vid_name.grid(row=0, column=0, sticky=(tk.N + tk.S + tk.E + tk.W)) |
| 91 | + self.tx_clip_interval.grid(row=1, column=0, sticky=(tk.N + tk.S + tk.E + tk.W)) |
| 92 | + |
| 93 | + def make_view_combobox(self): |
| 94 | + self.mode = tk.StringVar() |
| 95 | + self.view_combo = ttk.Combobox(self.top_frame, values=('ALL', 'TRUE', 'FALSE'), textvariable=self.mode) |
| 96 | + self.view_combo.grid(row=2, column=0, sticky=(tk.N + tk.S + tk.E + tk.W), padx=5, pady=5) |
| 97 | + self.view_combo.current(0) |
| 98 | + self.view_combo.bind('<<ComboboxSelected>>', self.OnComboSelected) |
| 99 | + |
| 100 | + def make_filtering_box(self): |
| 101 | + self.skeltonoptionFrame = tk.Frame(self.top_frame, bg='#e9e9e9') |
| 102 | + self.skeltonoptionFrame.grid(row=0, column=1, sticky='nsew') |
| 103 | + ratioFrame = tk.Frame(self.top_frame, bg='#e9e9e9') |
| 104 | + ratioFrame.grid(row=1, column=1, sticky='nsew') |
| 105 | + |
| 106 | + msgFrame = tk.Frame(self.top_frame, bg='#e9e9e9') |
| 107 | + msgFrame.grid(row=2, column=1, sticky='nsew') |
| 108 | + |
| 109 | + tx_back = tk.Label(ratioFrame, text="looking behind ratio: ", foreground='#3985F8', bg='#e9e9e9') |
| 110 | + tx_back.pack(side=tk.LEFT, padx=5) |
| 111 | + self.tx_ratio_back = tk.Label(ratioFrame, text="None", bg='#e9e9e9') |
| 112 | + self.tx_ratio_back.pack(side=tk.LEFT) |
| 113 | + |
| 114 | + tx_missing = tk.Label(ratioFrame, text="missing joints ratio: ", foreground='#3985F8', bg='#e9e9e9') |
| 115 | + tx_missing.pack(side=tk.LEFT, padx=10) |
| 116 | + self.tx_ratio_missing = tk.Label(ratioFrame, text="None", bg='#e9e9e9') |
| 117 | + self.tx_ratio_missing.pack(side=tk.LEFT) |
| 118 | + |
| 119 | + tx_side = tk.Label(ratioFrame, text="looking sideways ratio: ", foreground='#3985F8', bg='#e9e9e9') |
| 120 | + tx_side.pack(side=tk.LEFT, padx=10) |
| 121 | + self.tx_ratio_side = tk.Label(ratioFrame, text="None", bg='#e9e9e9') |
| 122 | + self.tx_ratio_side.pack(side=tk.LEFT) |
| 123 | + |
| 124 | + tx_small = tk.Label(ratioFrame, text="small person ratio: ", foreground='#3985F8', bg='#e9e9e9') |
| 125 | + tx_small.pack(side=tk.LEFT, padx=10) |
| 126 | + self.tx_ratio_small = tk.Label(ratioFrame, text="None", bg='#e9e9e9') |
| 127 | + self.tx_ratio_small.pack(side=tk.LEFT) |
| 128 | + |
| 129 | + tx_diff = tk.Label(ratioFrame, text="frame diff: ", foreground='#3985F8', bg='#e9e9e9') |
| 130 | + tx_diff.pack(side=tk.LEFT, padx=10) |
| 131 | + self.tx_frame_diff = tk.Label(ratioFrame, text="None", bg='#e9e9e9') |
| 132 | + self.tx_frame_diff.pack(side=tk.LEFT) |
| 133 | + |
| 134 | + tx_option = tk.Label(self.skeltonoptionFrame, text='Criteria: ', foreground='#3985F8', bg='#e9e9e9') |
| 135 | + tx_option.pack(side=tk.LEFT, padx=5, pady=5) |
| 136 | + tx_res = tk.Label(msgFrame, text='Message:', foreground='#3985F8', bg='#e9e9e9') |
| 137 | + tx_res.pack(side=tk.LEFT, padx=5) |
| 138 | + self.message = tk.Label(msgFrame, text=' ', bg='#e9e9e9') |
| 139 | + self.message.pack(side=tk.LEFT) |
| 140 | + |
| 141 | + skeleton_option = ["too Short", "many people", "looking behind", "joint missing", "sideways", "small", "picture"] |
| 142 | + self.item = [] |
| 143 | + for i in range(7): |
| 144 | + self.item.append(tk.IntVar()) |
| 145 | + |
| 146 | + for val, option in enumerate(skeleton_option): |
| 147 | + tk.Checkbutton(self.skeltonoptionFrame, |
| 148 | + text=option, |
| 149 | + padx=5, |
| 150 | + pady=5, |
| 151 | + bg='#e9e9e9', |
| 152 | + variable=self.item[val], |
| 153 | + activebackground="blue").pack(side=tk.LEFT, padx=5, pady=5) |
| 154 | + |
| 155 | + def make_vid_treeView(self): |
| 156 | + self.vid_tree = tk.ttk.Treeview(self.bottom_frame) |
| 157 | + self.vid_tree.grid(row=0, column=0, sticky='nsew', padx=5, pady=5) |
| 158 | + self.vid_tree.heading("#0", text="Video List") |
| 159 | + |
| 160 | + for file in sorted(glob.glob(VIDEO_PATH + "/*.mp4"), key=os.path.getmtime): |
| 161 | + vid = os.path.split(file)[1][-15:-4] |
| 162 | + self.vid_tree.insert('', 'end', text=vid, values=vid, iid=vid) |
| 163 | + |
| 164 | + def make_clip_treeView(self): |
| 165 | + self.clip_tree = tk.ttk.Treeview(self.bottom_frame) |
| 166 | + self.clip_tree.grid(row=0, column=1, sticky='nsew', padx=5, pady=5) |
| 167 | + self.clip_tree.heading("#0", text="Clip List") |
| 168 | + self.clip_tree.tag_configure('False', background='#E8E8E8') |
| 169 | + |
| 170 | + def make_img_canvas(self): |
| 171 | + self.img_canvas = tk.Canvas(self.img_frame, bg='black') |
| 172 | + self.img_canvas.config(scrollregion=(0, 0, review_img_width, review_img_height)) |
| 173 | + |
| 174 | + hbar = tk.Scrollbar(self.img_frame, orient=tk.HORIZONTAL) |
| 175 | + hbar.pack(side=tk.BOTTOM, fill=tk.X) |
| 176 | + hbar.config(command=self.img_canvas.xview) |
| 177 | + vbar = tk.Scrollbar(self.img_frame, orient=tk.VERTICAL) |
| 178 | + vbar.pack(side=tk.RIGHT, fill=tk.Y) |
| 179 | + vbar.config(command=self.img_canvas.yview) |
| 180 | + self.img_canvas.bind("<MouseWheel>", self._on_mousewheel) |
| 181 | + |
| 182 | + self.img_canvas.config(xscrollcommand=hbar.set, yscrollcommand=vbar.set) |
| 183 | + self.img_canvas.pack(expand=tk.YES, fill=tk.BOTH) |
| 184 | + |
| 185 | + def _on_mousewheel(self, event): |
| 186 | + self.img_canvas.yview_scroll(-1 * event.delta, "units") |
| 187 | + |
| 188 | + def OnComboSelected(self, event): |
| 189 | + change_mode = self.view_combo.get() |
| 190 | + |
| 191 | + if change_mode != self.MODE: |
| 192 | + self.MODE = change_mode |
| 193 | + self.load_clip() |
| 194 | + |
| 195 | + def OnVideoListClick(self, event): |
| 196 | + """ load clip data """ |
| 197 | + item = self.vid_tree.identify('item', event.x, event.y) |
| 198 | + vid = self.vid_tree.item(item, "text") |
| 199 | + self.vid = vid |
| 200 | + |
| 201 | + self.tx_vid_name.configure(text=vid) |
| 202 | + self.tx_clip_interval.configure(text='No selected clip') |
| 203 | + self.img_canvas.delete(tk.ALL) |
| 204 | + self.message.config(text=' ') |
| 205 | + self.tx_ratio_small.config(text='None') |
| 206 | + self.tx_ratio_side.config(text='None') |
| 207 | + self.tx_ratio_missing.config(text='None') |
| 208 | + self.tx_ratio_back.config(text='None') |
| 209 | + self.tx_frame_diff.config(text='None') |
| 210 | + |
| 211 | + print(vid) |
| 212 | + |
| 213 | + self.clip_data = load_clip_data(vid) |
| 214 | + self.skeleton = SkeletonWrapper(SKELETON_PATH, vid) |
| 215 | + self.video_wrapper = read_video(VIDEO_PATH, vid) |
| 216 | + self.clip_filter_data = load_clip_filtering_aux_info(vid) |
| 217 | + |
| 218 | + self.load_clip() |
| 219 | + self.win.update() |
| 220 | + |
| 221 | + def OnClipListClick(self, event): |
| 222 | + item = self.clip_tree.identify('item', event.x, event.y) |
| 223 | + item_index = int(self.clip_tree.item(item, "values")[0]) |
| 224 | + print(item_index, 'Double_Click') |
| 225 | + |
| 226 | + def OnClipTreeSelect(self, event): |
| 227 | + item = self.clip_tree.item(self.clip_tree.focus()) |
| 228 | + item_index = int(self.clip_tree.item(self.clip_tree.focus(), 'values')[0]) |
| 229 | + print('Load clip, idx:', item_index) |
| 230 | + |
| 231 | + # load image |
| 232 | + self.review_clip = self.clip_data[item_index] |
| 233 | + start_frame_no = self.review_clip['clip_info'][0] |
| 234 | + end_frame_no = self.review_clip['clip_info'][1] |
| 235 | + correct_clip = self.review_clip['clip_info'][2] |
| 236 | + |
| 237 | + image = self.show_clips(clip=self.review_clip, correct_clip=correct_clip) |
| 238 | + |
| 239 | + b, g, r = cv2.split(image) |
| 240 | + img = cv2.merge((r, g, b)) |
| 241 | + im = Image.fromarray(img) |
| 242 | + imgtk = ImageTk.PhotoImage(image=im) |
| 243 | + |
| 244 | + self.image = imgtk |
| 245 | + self.img_canvas.delete(tk.ALL) |
| 246 | + self.img_canvas.create_image(0, 0, image=imgtk, anchor=tk.NW) |
| 247 | + |
| 248 | + # self.img_label.image = self.image |
| 249 | + # self.img_label.config(image=self.image) |
| 250 | + # self.img_label.place(x=0, y=0) |
| 251 | + |
| 252 | + # load filtering results |
| 253 | + clip_filter_data = self.clip_filter_data[item_index] |
| 254 | + filtering_results = clip_filter_data['filtering_results'] |
| 255 | + message = clip_filter_data['message'] |
| 256 | + debugging_info = clip_filter_data['debugging_info'] |
| 257 | + |
| 258 | + # tooshort, many_people, skeleton_back, skeleton_missing, skeleton_side, skeleton_small, is_picture 순서 |
| 259 | + self.item[Criteria.too_short.value].set(filtering_results[Criteria.too_short.value]) |
| 260 | + self.item[Criteria.many_people.value].set(filtering_results[Criteria.many_people.value]) |
| 261 | + self.item[Criteria.skeleton_back.value].set(filtering_results[Criteria.skeleton_back.value]) |
| 262 | + self.item[Criteria.skeleton_missing.value].set(filtering_results[Criteria.skeleton_missing.value]) |
| 263 | + self.item[Criteria.skeleton_side.value].set(filtering_results[Criteria.skeleton_side.value]) |
| 264 | + self.item[Criteria.skeleton_small.value].set(filtering_results[Criteria.skeleton_small.value]) |
| 265 | + self.item[Criteria.is_picture.value].set(filtering_results[Criteria.is_picture.value]) |
| 266 | + self.message.config(text=message) |
| 267 | + |
| 268 | + self.tx_ratio_back.config(text=debugging_info[0]) |
| 269 | + self.tx_ratio_missing.config(text=debugging_info[1]) |
| 270 | + self.tx_ratio_side.config(text=debugging_info[2]) |
| 271 | + self.tx_ratio_small.config(text=debugging_info[3]) |
| 272 | + self.tx_frame_diff.config(text=debugging_info[4]) |
| 273 | + |
| 274 | + self.tx_clip_interval.configure(text=str(start_frame_no) + ' ~ ' + str(end_frame_no) + ' ' + str(correct_clip)) |
| 275 | + # self.win.update() |
| 276 | + |
| 277 | + def load_clip(self): |
| 278 | + if self.vid == '-1': |
| 279 | + print('Error: load video first') |
| 280 | + return |
| 281 | + |
| 282 | + # init clip tree |
| 283 | + for i in self.clip_tree.get_children(): |
| 284 | + self.clip_tree.delete(i) |
| 285 | + |
| 286 | + self.tx_clip_interval.configure(text='No selected clip') |
| 287 | + self.img_canvas.delete(tk.ALL) |
| 288 | + |
| 289 | + for item in self.item: |
| 290 | + item.set(False) |
| 291 | + |
| 292 | + if self.clip_data and self.skeleton.skeletons != []: |
| 293 | + # load clips |
| 294 | + for i, clip in enumerate(self.clip_data): |
| 295 | + start_frame_no = clip['clip_info'][0] |
| 296 | + end_frame_no = clip['clip_info'][1] |
| 297 | + correct_clip = clip['clip_info'][2] |
| 298 | + |
| 299 | + if self.MODE == 'ALL': |
| 300 | + self.clip_tree.insert('', 'end', text=str(start_frame_no) + ' ~ ' + str(end_frame_no), values=i, |
| 301 | + iid=i, tag=str(correct_clip)) |
| 302 | + elif self.MODE == 'TRUE': |
| 303 | + if correct_clip: |
| 304 | + self.clip_tree.insert('', 'end', text=str(start_frame_no) + ' ~ ' + str(end_frame_no), values=i, |
| 305 | + iid=i, tag=str(correct_clip)) |
| 306 | + elif self.MODE == 'FALSE': |
| 307 | + if not correct_clip: |
| 308 | + self.clip_tree.insert('', 'end', text=str(start_frame_no) + ' ~ ' + str(end_frame_no), values=i, |
| 309 | + iid=i, tag=str(correct_clip)) |
| 310 | + else: |
| 311 | + print('[Error] Data file does not exist') |
| 312 | + self.tx_clip_interval.configure(text="Data file does not exist") |
| 313 | + |
| 314 | + self.win.update() |
| 315 | + |
| 316 | + def show_clips(self, clip, correct_clip): |
| 317 | + N_IMAGES_PER_VIEW = 20 |
| 318 | + |
| 319 | + start_frame_no = clip['clip_info'][0] |
| 320 | + end_frame_no = clip['clip_info'][1] |
| 321 | + print(start_frame_no, end_frame_no) # start and end frame no |
| 322 | + |
| 323 | + # get frames |
| 324 | + resized_frames = [] |
| 325 | + skip_amount = int(max((end_frame_no - start_frame_no) / N_IMAGES_PER_VIEW, 1)) |
| 326 | + self.video_wrapper.set_current_frame(start_frame_no) |
| 327 | + skeleton_chunk = self.skeleton.get(start_frame_no, end_frame_no) |
| 328 | + for i in range(end_frame_no - start_frame_no): |
| 329 | + ret, frame = self.video_wrapper.video.read() |
| 330 | + |
| 331 | + if i % skip_amount == 0: |
| 332 | + # overlay raw skeleton on the frame |
| 333 | + if skeleton_chunk and skeleton_chunk[i]: |
| 334 | + for person in skeleton_chunk[i]: |
| 335 | + body_pose = get_skeleton_from_frame(person) |
| 336 | + frame = draw_skeleton_on_image(frame, body_pose, thickness=5) |
| 337 | + |
| 338 | + if correct_clip and clip['frames']: |
| 339 | + # overlay selected skeleton |
| 340 | + |
| 341 | + if clip['frames'][i]: |
| 342 | + body_pose = get_skeleton_from_frame(clip['frames'][i]) |
| 343 | + frame = draw_skeleton_on_image(frame, body_pose, thickness=20) |
| 344 | + |
| 345 | + resized_frame = cv2.resize(frame, (0, 0), None, .35, .35) |
| 346 | + resized_frames.append(resized_frame) |
| 347 | + |
| 348 | + # make summary img |
| 349 | + n_imgs_per_row = 4 |
| 350 | + n_rows_per_page = 5 |
| 351 | + frame_idx = 0 |
| 352 | + page_img = [] |
| 353 | + for row_idx in range(n_rows_per_page): |
| 354 | + row_img = [] |
| 355 | + for col_idx in range(n_imgs_per_row): |
| 356 | + if frame_idx >= len(resized_frames): |
| 357 | + break |
| 358 | + |
| 359 | + if row_img == []: |
| 360 | + row_img = resized_frames[frame_idx] |
| 361 | + else: |
| 362 | + row_img = np.hstack((row_img, resized_frames[frame_idx])) |
| 363 | + frame_idx += 1 |
| 364 | + |
| 365 | + if page_img == []: |
| 366 | + page_img = row_img |
| 367 | + elif row_img != []: |
| 368 | + n_pad = page_img.shape[1] - row_img.shape[1] |
| 369 | + if n_pad > 0: |
| 370 | + row_img = np.pad(row_img, ((0, 0), (0, n_pad), (0, 0)), mode='constant') |
| 371 | + page_img = np.vstack((page_img, row_img)) |
| 372 | + |
| 373 | + return page_img |
| 374 | + |
| 375 | + |
| 376 | +if __name__ == '__main__': |
| 377 | + myReviewApp = ReviewApp() |
0 commit comments