diff --git a/src/SumMeVideo.py b/src/SumMeVideo.py index d86f5c16a6a80b2df3402e794d15d3c61fac5035..3857203f5d54e060b2d40180a371ce9ea29c212a 100644 --- a/src/SumMeVideo.py +++ b/src/SumMeVideo.py @@ -1,6 +1,7 @@ from scipy.io import loadmat import os from moviepy.editor import VideoFileClip +import numpy as np class SumMeVideo(VideoFileClip): @@ -13,7 +14,21 @@ class SumMeVideo(VideoFileClip): def get_gt(self): video_gt = loadmat(self.gt_path) - return video_gt['gt_score'] + video_gt = video_gt['user_score'] # (n_frames,n_annotator) + return self.bin_classify_user_score(video_gt) + + """ + :param user scores + :returns binary nparray + """ + + def bin_classify_user_score(self, video_gt): + user_scores = video_gt + result = [] + for user in range(user_scores.shape[1]): + print(user_scores[:, user].shape) + result.append([int(item > 0.5) for item in user_scores[:, user]]) + return np.asarray(result).T # (n_frames,n_annotator) def get_frames(self): return list(self.video_clip.iter_frames(with_times=False)) diff --git a/src/TVSumVideo.py b/src/TVSumVideo.py index 29b849df5b21d01d2f6b254b8cbefb57b0f1f305..132a9beb6c91777959a2a0e0ef070dedb4cb4b90 100644 --- a/src/TVSumVideo.py +++ b/src/TVSumVideo.py @@ -21,7 +21,8 @@ class TVSumVideo(VideoFileClip): for i in range(len(sub_gt)): users_gt.append(sub_gt.iloc[i, -1].split(",")) users_gt = np.array(users_gt) - return self.__avg_array(users_gt) + avg_gt = self.__avg_array(users_gt) + return np.expand_dims(avg_gt, axis=1) def __avg_array(self, users_gt): users_gt = users_gt.astype(int) diff --git a/src/features.pickle b/src/features.pickle new file mode 100644 index 0000000000000000000000000000000000000000..cb2b97355b07b3ed2fbe1b642697f90930b27880 Binary files /dev/null and b/src/features.pickle differ diff --git a/src/utils.py b/src/utils.py index 1d00346e8e62bc1eebba514ecc7cfe22ac338335..1b9c91ce49a8ce8ed854d81d907fa0531051c4dc 100644 --- a/src/utils.py +++ b/src/utils.py @@ -54,27 +54,42 @@ def resize_image(img_path, width, height): return resized -def numbers_to_classes(array, n_classes): - # TODO - return 0 - - -def sample_from_video_with_gt(video_frames, video_gt, duration, fps, n_samples=2): +def sample_from_video_with_gt(video_frames, user_scores, duration, fps, n_samples=2): sampled_frames = [] sampled_gt = [] - source_array = np.arange(fps); + source_array = np.arange(fps) for i in range(duration): sorted_samples = make_sorted_sample(source_array, n_samples) - for idx in sorted_samples: - sampled_frames.append(video_frames[i * fps:(i + 1) * fps][idx - 1]) - sampled_gt.append(video_gt[i * fps:(i + 1) * fps][idx - 1]) + start = i * fps + end = (i + 1) * fps + time_span = (start, end) + sampled_frames.extend(select_frames_from_video(sorted_samples, video_frames, time_span)) + sampled_gt.extend(select_user_scores(sorted_samples, user_scores, time_span)) return sampled_frames, sampled_gt +def select_frames_from_video(samples, video_frames, time_span): + sampled_frames = [] + for idx in samples: + sampled_frames.append(video_frames[time_span[0]:time_span[1]][idx - 1]) + return sampled_frames + + +def select_user_scores(samples, user_scores, time_span): + sampled_gt = [] + for idx in samples: + gt = [user_scores[time_span[0]:time_span[1], user][idx - 1] for user in range(user_scores.shape[1])] + sampled_gt.append(np.asarray(gt)) + return sampled_gt + + def make_sorted_sample(source_array, n_samples): sorted_samples = np.sort(np.random.choice(source_array, n_samples, replace=False)) return sorted_samples def drop_file_extension(file_name): - return file_name.split('.')[0] + if file_name is None: + raise ValueError + file_name = file_name.split('.')[:-1] + return '.'.join(file_name) \ No newline at end of file diff --git a/src/video2frames.py b/src/video2frames.py index b456fb98c1f3b67840210668b96abd4bea8c558c..891b12358e2e9fc76fd6f906aa19c9d2bfaebd99 100644 --- a/src/video2frames.py +++ b/src/video2frames.py @@ -17,9 +17,9 @@ def create_video_obj(dataset, video_name, video_path, gt_dir): def arg_parser(): parser = argparse.ArgumentParser(description='Extract Features') - parser.add_argument('--dataset', default='TVSum', type=str, help='SumMe or TVSum') - parser.add_argument('--videos_dir', metavar='DIR', default='../data/TVSum/video', help='path input videos') - parser.add_argument('--gt', metavar='GT_Dir', default='../data/TVSum/data', help='path ground truth') + parser.add_argument('--dataset', default='SumMe', type=str, help='SumMe or TVSum') + parser.add_argument('--videos_dir', metavar='DIR', default='../data/SumMe/videos/test', help='path input videos') + parser.add_argument('--gt', metavar='GT_Dir', default='../data/SumMe/GT', help='path ground truth') parser.add_argument('--fps', default=2, type=int, help='Frames per second for the extraction') parser.add_argument('--model_arch', default='googlenet', help='pretrained model architecture e.g. resnet50 or alexnet')