src.utils.dataset_utils
1import cv2 2import numpy as np 3import glob 4import matplotlib.pyplot as plt 5import keras 6 7class VideoDataLoader: 8 """ 9 A class to load and process videos into grayscale frame batches for model training. 10 11 Attributes: 12 folder_path (str): Directory containing video files. 13 max_frames (int): Maximum number of frames to extract per video. 14 num_vids (int): Number of videos to process for a batch. 15 video_files (list): List of .mp4 video file paths in the folder. 16 """ 17 18 def __init__(self, folder_path, max_frames, num_vids): 19 """ 20 Initializes the loader with folder path and limits. 21 22 Args: 23 folder_path (str): Path to the folder containing .mp4 files. 24 max_frames (int): Number of frames to extract per video. 25 num_vids (int): Number of videos to load for each batch. 26 """ 27 self.folder_path = folder_path 28 self.max_frames = max_frames 29 self.num_vids = num_vids 30 self.video_files = glob.glob(f"{folder_path}/*.mp4") # MP4 format 31 32 def process_video(self, video_path): 33 """ 34 Loads a single video and converts it into a fixed number of grayscale frames. 35 36 Args: 37 video_path (str): Path to the video file. 38 39 Returns: 40 np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames. 41 """ 42 cap = cv2.VideoCapture(video_path) 43 frames = [] 44 45 while len(frames) < self.max_frames: 46 ret, frame = cap.read() 47 if not ret: 48 break # End of video 49 50 # Resize frame and convert to grayscale by averaging across color channels 51 frame = cv2.resize(frame, (256, 256)) 52 gray_frame = np.mean(frame, axis=2).astype(np.uint8) 53 frames.append(gray_frame) 54 55 cap.release() 56 57 # Pad with last frame if video has fewer frames than max_frames 58 if len(frames) < self.max_frames: 59 last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8) 60 frames.extend([last_frame] * (self.max_frames - len(frames))) 61 62 return np.array(frames) 63 64 def get_batch(self): 65 """ 66 Retrieves a batch of videos, each with max_frames grayscale frames. 67 68 Returns: 69 np.ndarray: Array of shape (num_vids, max_frames, 256, 256). 70 """ 71 batch_videos = [] 72 for video_file in self.video_files[:self.num_vids]: 73 video_data = self.process_video(video_file) 74 batch_videos.append(video_data) 75 76 return np.array(batch_videos) 77 78 def get_stats(self): 79 """ 80 Calculates statistics about the videos in the dataset. 81 82 Returns: 83 dict: Dictionary with total, average, min, and max frame counts across all videos. 84 """ 85 frame_counts = [] 86 87 for video_file in self.video_files: 88 cap = cv2.VideoCapture(video_file) 89 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 90 frame_counts.append(frame_count) 91 cap.release() 92 93 avg_frames = np.mean(frame_counts) if frame_counts else 0 94 min_frames = np.min(frame_counts) if frame_counts else 0 95 max_frames = np.max(frame_counts) if frame_counts else 0 96 97 return { 98 "total_videos": len(self.video_files), 99 "average_frames": avg_frames, 100 "min_frames": min_frames, 101 "max_frames": max_frames 102 } 103 104 105def create_dataset(batch): 106 """ 107 Prepares training data from a batch of grayscale video frames. 108 109 Args: 110 batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width). 111 112 Returns: 113 tuple: ((X0, X1), Y) 114 - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1)) 115 - X1: All frames from the video as input sequence 116 - Y: Target output frames (same as X1 for reconstruction) 117 """ 118 num_vids, num_frames, h, w = batch.shape 119 120 # Extract the first frame of each video and repeat it across all frames 121 first_frames = batch[:, 0:1, :, :] 122 X0 = np.repeat(first_frames, num_frames, axis=1) # shape: (num_vids, num_frames, h, w) 123 124 # Input frames (could be motion or target-dependent) 125 X1 = batch.copy() 126 127 # Ground truth (target) frames 128 Y = batch.copy() 129 130 # Reshape for model input: (num_vids*num_frames, h, w, 1) and normalize to [0, 1] 131 X0 = X0.reshape(-1, h, w, 1) / 255 132 X1 = X1.reshape(-1, h, w, 1) / 255 133 Y = Y.reshape(-1, h, w, 1) / 255 134 135 return (X0, X1), Y
8class VideoDataLoader: 9 """ 10 A class to load and process videos into grayscale frame batches for model training. 11 12 Attributes: 13 folder_path (str): Directory containing video files. 14 max_frames (int): Maximum number of frames to extract per video. 15 num_vids (int): Number of videos to process for a batch. 16 video_files (list): List of .mp4 video file paths in the folder. 17 """ 18 19 def __init__(self, folder_path, max_frames, num_vids): 20 """ 21 Initializes the loader with folder path and limits. 22 23 Args: 24 folder_path (str): Path to the folder containing .mp4 files. 25 max_frames (int): Number of frames to extract per video. 26 num_vids (int): Number of videos to load for each batch. 27 """ 28 self.folder_path = folder_path 29 self.max_frames = max_frames 30 self.num_vids = num_vids 31 self.video_files = glob.glob(f"{folder_path}/*.mp4") # MP4 format 32 33 def process_video(self, video_path): 34 """ 35 Loads a single video and converts it into a fixed number of grayscale frames. 36 37 Args: 38 video_path (str): Path to the video file. 39 40 Returns: 41 np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames. 42 """ 43 cap = cv2.VideoCapture(video_path) 44 frames = [] 45 46 while len(frames) < self.max_frames: 47 ret, frame = cap.read() 48 if not ret: 49 break # End of video 50 51 # Resize frame and convert to grayscale by averaging across color channels 52 frame = cv2.resize(frame, (256, 256)) 53 gray_frame = np.mean(frame, axis=2).astype(np.uint8) 54 frames.append(gray_frame) 55 56 cap.release() 57 58 # Pad with last frame if video has fewer frames than max_frames 59 if len(frames) < self.max_frames: 60 last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8) 61 frames.extend([last_frame] * (self.max_frames - len(frames))) 62 63 return np.array(frames) 64 65 def get_batch(self): 66 """ 67 Retrieves a batch of videos, each with max_frames grayscale frames. 68 69 Returns: 70 np.ndarray: Array of shape (num_vids, max_frames, 256, 256). 71 """ 72 batch_videos = [] 73 for video_file in self.video_files[:self.num_vids]: 74 video_data = self.process_video(video_file) 75 batch_videos.append(video_data) 76 77 return np.array(batch_videos) 78 79 def get_stats(self): 80 """ 81 Calculates statistics about the videos in the dataset. 82 83 Returns: 84 dict: Dictionary with total, average, min, and max frame counts across all videos. 85 """ 86 frame_counts = [] 87 88 for video_file in self.video_files: 89 cap = cv2.VideoCapture(video_file) 90 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 91 frame_counts.append(frame_count) 92 cap.release() 93 94 avg_frames = np.mean(frame_counts) if frame_counts else 0 95 min_frames = np.min(frame_counts) if frame_counts else 0 96 max_frames = np.max(frame_counts) if frame_counts else 0 97 98 return { 99 "total_videos": len(self.video_files), 100 "average_frames": avg_frames, 101 "min_frames": min_frames, 102 "max_frames": max_frames 103 }
A class to load and process videos into grayscale frame batches for model training.
Attributes: folder_path (str): Directory containing video files. max_frames (int): Maximum number of frames to extract per video. num_vids (int): Number of videos to process for a batch. video_files (list): List of .mp4 video file paths in the folder.
19 def __init__(self, folder_path, max_frames, num_vids): 20 """ 21 Initializes the loader with folder path and limits. 22 23 Args: 24 folder_path (str): Path to the folder containing .mp4 files. 25 max_frames (int): Number of frames to extract per video. 26 num_vids (int): Number of videos to load for each batch. 27 """ 28 self.folder_path = folder_path 29 self.max_frames = max_frames 30 self.num_vids = num_vids 31 self.video_files = glob.glob(f"{folder_path}/*.mp4") # MP4 format
Initializes the loader with folder path and limits.
Args: folder_path (str): Path to the folder containing .mp4 files. max_frames (int): Number of frames to extract per video. num_vids (int): Number of videos to load for each batch.
33 def process_video(self, video_path): 34 """ 35 Loads a single video and converts it into a fixed number of grayscale frames. 36 37 Args: 38 video_path (str): Path to the video file. 39 40 Returns: 41 np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames. 42 """ 43 cap = cv2.VideoCapture(video_path) 44 frames = [] 45 46 while len(frames) < self.max_frames: 47 ret, frame = cap.read() 48 if not ret: 49 break # End of video 50 51 # Resize frame and convert to grayscale by averaging across color channels 52 frame = cv2.resize(frame, (256, 256)) 53 gray_frame = np.mean(frame, axis=2).astype(np.uint8) 54 frames.append(gray_frame) 55 56 cap.release() 57 58 # Pad with last frame if video has fewer frames than max_frames 59 if len(frames) < self.max_frames: 60 last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8) 61 frames.extend([last_frame] * (self.max_frames - len(frames))) 62 63 return np.array(frames)
Loads a single video and converts it into a fixed number of grayscale frames.
Args: video_path (str): Path to the video file.
Returns: np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames.
65 def get_batch(self): 66 """ 67 Retrieves a batch of videos, each with max_frames grayscale frames. 68 69 Returns: 70 np.ndarray: Array of shape (num_vids, max_frames, 256, 256). 71 """ 72 batch_videos = [] 73 for video_file in self.video_files[:self.num_vids]: 74 video_data = self.process_video(video_file) 75 batch_videos.append(video_data) 76 77 return np.array(batch_videos)
Retrieves a batch of videos, each with max_frames grayscale frames.
Returns: np.ndarray: Array of shape (num_vids, max_frames, 256, 256).
79 def get_stats(self): 80 """ 81 Calculates statistics about the videos in the dataset. 82 83 Returns: 84 dict: Dictionary with total, average, min, and max frame counts across all videos. 85 """ 86 frame_counts = [] 87 88 for video_file in self.video_files: 89 cap = cv2.VideoCapture(video_file) 90 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 91 frame_counts.append(frame_count) 92 cap.release() 93 94 avg_frames = np.mean(frame_counts) if frame_counts else 0 95 min_frames = np.min(frame_counts) if frame_counts else 0 96 max_frames = np.max(frame_counts) if frame_counts else 0 97 98 return { 99 "total_videos": len(self.video_files), 100 "average_frames": avg_frames, 101 "min_frames": min_frames, 102 "max_frames": max_frames 103 }
Calculates statistics about the videos in the dataset.
Returns: dict: Dictionary with total, average, min, and max frame counts across all videos.
106def create_dataset(batch): 107 """ 108 Prepares training data from a batch of grayscale video frames. 109 110 Args: 111 batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width). 112 113 Returns: 114 tuple: ((X0, X1), Y) 115 - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1)) 116 - X1: All frames from the video as input sequence 117 - Y: Target output frames (same as X1 for reconstruction) 118 """ 119 num_vids, num_frames, h, w = batch.shape 120 121 # Extract the first frame of each video and repeat it across all frames 122 first_frames = batch[:, 0:1, :, :] 123 X0 = np.repeat(first_frames, num_frames, axis=1) # shape: (num_vids, num_frames, h, w) 124 125 # Input frames (could be motion or target-dependent) 126 X1 = batch.copy() 127 128 # Ground truth (target) frames 129 Y = batch.copy() 130 131 # Reshape for model input: (num_vids*num_frames, h, w, 1) and normalize to [0, 1] 132 X0 = X0.reshape(-1, h, w, 1) / 255 133 X1 = X1.reshape(-1, h, w, 1) / 255 134 Y = Y.reshape(-1, h, w, 1) / 255 135 136 return (X0, X1), Y
Prepares training data from a batch of grayscale video frames.
Args: batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width).
Returns: tuple: ((X0, X1), Y) - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1)) - X1: All frames from the video as input sequence - Y: Target output frames (same as X1 for reconstruction)