|
| 1 | +# ------------------------------------------------------------------------------ |
| 2 | +# Copyright (c) ETRI. All rights reserved. |
| 3 | +# Licensed under the BSD 3-Clause License. |
| 4 | +# This file is part of Youtube-Gesture-Dataset, a sub-project of AIR(AI for Robots) project. |
| 5 | +# You can refer to details of AIR project at https://aiforrobots.github.io |
| 6 | +# Written by Youngwoo Yoon (youngwoo@etri.re.kr) |
| 7 | +# ------------------------------------------------------------------------------ |
| 8 | + |
| 9 | +import glob |
| 10 | +import logging |
| 11 | +import multiprocessing |
| 12 | +import os |
| 13 | +import re |
| 14 | +import sys |
| 15 | + |
| 16 | +from tqdm import tqdm |
| 17 | + |
| 18 | +from config import * |
| 19 | +from make_ted_dataset import read_subtitle |
| 20 | + |
| 21 | +sys.path.insert(0, '../../../gentle') |
| 22 | +import gentle |
| 23 | + |
| 24 | + |
| 25 | +# prepare gentle |
| 26 | +nthreads = multiprocessing.cpu_count() - 2 |
| 27 | +logging.getLogger().setLevel("WARNING") |
| 28 | +disfluencies = set(['uh', 'um']) |
| 29 | +resources = gentle.Resources() |
| 30 | + |
| 31 | + |
| 32 | +def run_gentle(video_path, vid, result_path): |
| 33 | + vtt_subtitle = read_subtitle(vid) |
| 34 | + transcript = '' |
| 35 | + for i, sub in enumerate(vtt_subtitle): |
| 36 | + transcript += (vtt_subtitle[i].text + ' ') |
| 37 | + transcript = re.sub('\n', ' ', transcript) # remove newline characters |
| 38 | + |
| 39 | + # align |
| 40 | + with gentle.resampled(video_path) as wav_file: |
| 41 | + aligner = gentle.ForcedAligner(resources, transcript, nthreads=nthreads, disfluency=False, conservative=False, |
| 42 | + disfluencies=disfluencies) |
| 43 | + result = aligner.transcribe(wav_file, logging=logging) |
| 44 | + |
| 45 | + # write results |
| 46 | + with open(result_path, 'w', encoding="utf-8") as fh: |
| 47 | + fh.write(result.to_json(indent=2)) |
| 48 | + |
| 49 | + |
| 50 | +def main(): |
| 51 | + videos = glob.glob(VIDEO_PATH + "/*.mp4") |
| 52 | + n_total = len(videos) |
| 53 | + for i, file_path in tqdm(enumerate(sorted(videos, key=os.path.getmtime))): |
| 54 | + vid = os.path.split(file_path)[1][-15:-4] |
| 55 | + print('{}/{} - {}'.format(i+1, n_total, vid)) |
| 56 | + result_path = VIDEO_PATH + '/' + vid + '_align_results.json' |
| 57 | + if os.path.exists(result_path) and os.path.getsize(result_path): # existing and not empty |
| 58 | + print('JSON file already exists ({})'.format(vid)) |
| 59 | + else: |
| 60 | + run_gentle(file_path, vid, result_path) |
| 61 | + |
| 62 | + |
| 63 | +if __name__ == '__main__': |
| 64 | + main() |
0 commit comments