src.utils.dataset_utils

  1import cv2
  2import numpy as np
  3import glob
  4import matplotlib.pyplot as plt
  5import keras
  6
  7class VideoDataLoader:
  8    """
  9    A class to load and process videos into grayscale frame batches for model training.
 10
 11    Attributes:
 12        folder_path (str): Directory containing video files.
 13        max_frames (int): Maximum number of frames to extract per video.
 14        num_vids (int): Number of videos to process for a batch.
 15        video_files (list): List of .mp4 video file paths in the folder.
 16    """
 17    
 18    def __init__(self, folder_path, max_frames, num_vids):
 19        """
 20        Initializes the loader with folder path and limits.
 21
 22        Args:
 23            folder_path (str): Path to the folder containing .mp4 files.
 24            max_frames (int): Number of frames to extract per video.
 25            num_vids (int): Number of videos to load for each batch.
 26        """
 27        self.folder_path = folder_path
 28        self.max_frames = max_frames
 29        self.num_vids = num_vids
 30        self.video_files = glob.glob(f"{folder_path}/*.mp4")  # MP4 format
 31
 32    def process_video(self, video_path):
 33        """
 34        Loads a single video and converts it into a fixed number of grayscale frames.
 35
 36        Args:
 37            video_path (str): Path to the video file.
 38
 39        Returns:
 40            np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames.
 41        """
 42        cap = cv2.VideoCapture(video_path)
 43        frames = []
 44
 45        while len(frames) < self.max_frames:
 46            ret, frame = cap.read()
 47            if not ret:
 48                break  # End of video
 49
 50            # Resize frame and convert to grayscale by averaging across color channels
 51            frame = cv2.resize(frame, (256, 256))
 52            gray_frame = np.mean(frame, axis=2).astype(np.uint8)
 53            frames.append(gray_frame)
 54
 55        cap.release()
 56
 57        # Pad with last frame if video has fewer frames than max_frames
 58        if len(frames) < self.max_frames:
 59            last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8)
 60            frames.extend([last_frame] * (self.max_frames - len(frames)))
 61
 62        return np.array(frames)
 63
 64    def get_batch(self):
 65        """
 66        Retrieves a batch of videos, each with max_frames grayscale frames.
 67
 68        Returns:
 69            np.ndarray: Array of shape (num_vids, max_frames, 256, 256).
 70        """
 71        batch_videos = []
 72        for video_file in self.video_files[:self.num_vids]:
 73            video_data = self.process_video(video_file)
 74            batch_videos.append(video_data)
 75
 76        return np.array(batch_videos)
 77
 78    def get_stats(self):
 79        """
 80        Calculates statistics about the videos in the dataset.
 81
 82        Returns:
 83            dict: Dictionary with total, average, min, and max frame counts across all videos.
 84        """
 85        frame_counts = []
 86
 87        for video_file in self.video_files:
 88            cap = cv2.VideoCapture(video_file)
 89            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 90            frame_counts.append(frame_count)
 91            cap.release()
 92
 93        avg_frames = np.mean(frame_counts) if frame_counts else 0
 94        min_frames = np.min(frame_counts) if frame_counts else 0
 95        max_frames = np.max(frame_counts) if frame_counts else 0
 96
 97        return {
 98            "total_videos": len(self.video_files),
 99            "average_frames": avg_frames,
100            "min_frames": min_frames,
101            "max_frames": max_frames
102        }
103
104
105def create_dataset(batch):
106    """
107    Prepares training data from a batch of grayscale video frames.
108
109    Args:
110        batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width).
111
112    Returns:
113        tuple: ((X0, X1), Y)
114            - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1))
115            - X1: All frames from the video as input sequence
116            - Y: Target output frames (same as X1 for reconstruction)
117    """
118    num_vids, num_frames, h, w = batch.shape
119
120    # Extract the first frame of each video and repeat it across all frames
121    first_frames = batch[:, 0:1, :, :]
122    X0 = np.repeat(first_frames, num_frames, axis=1)  # shape: (num_vids, num_frames, h, w)
123
124    # Input frames (could be motion or target-dependent)
125    X1 = batch.copy()
126
127    # Ground truth (target) frames
128    Y = batch.copy()
129
130    # Reshape for model input: (num_vids*num_frames, h, w, 1) and normalize to [0, 1]
131    X0 = X0.reshape(-1, h, w, 1) / 255
132    X1 = X1.reshape(-1, h, w, 1) / 255
133    Y  = Y.reshape(-1, h, w, 1) / 255
134
135    return (X0, X1), Y
class VideoDataLoader:
  8class VideoDataLoader:
  9    """
 10    A class to load and process videos into grayscale frame batches for model training.
 11
 12    Attributes:
 13        folder_path (str): Directory containing video files.
 14        max_frames (int): Maximum number of frames to extract per video.
 15        num_vids (int): Number of videos to process for a batch.
 16        video_files (list): List of .mp4 video file paths in the folder.
 17    """
 18    
 19    def __init__(self, folder_path, max_frames, num_vids):
 20        """
 21        Initializes the loader with folder path and limits.
 22
 23        Args:
 24            folder_path (str): Path to the folder containing .mp4 files.
 25            max_frames (int): Number of frames to extract per video.
 26            num_vids (int): Number of videos to load for each batch.
 27        """
 28        self.folder_path = folder_path
 29        self.max_frames = max_frames
 30        self.num_vids = num_vids
 31        self.video_files = glob.glob(f"{folder_path}/*.mp4")  # MP4 format
 32
 33    def process_video(self, video_path):
 34        """
 35        Loads a single video and converts it into a fixed number of grayscale frames.
 36
 37        Args:
 38            video_path (str): Path to the video file.
 39
 40        Returns:
 41            np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames.
 42        """
 43        cap = cv2.VideoCapture(video_path)
 44        frames = []
 45
 46        while len(frames) < self.max_frames:
 47            ret, frame = cap.read()
 48            if not ret:
 49                break  # End of video
 50
 51            # Resize frame and convert to grayscale by averaging across color channels
 52            frame = cv2.resize(frame, (256, 256))
 53            gray_frame = np.mean(frame, axis=2).astype(np.uint8)
 54            frames.append(gray_frame)
 55
 56        cap.release()
 57
 58        # Pad with last frame if video has fewer frames than max_frames
 59        if len(frames) < self.max_frames:
 60            last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8)
 61            frames.extend([last_frame] * (self.max_frames - len(frames)))
 62
 63        return np.array(frames)
 64
 65    def get_batch(self):
 66        """
 67        Retrieves a batch of videos, each with max_frames grayscale frames.
 68
 69        Returns:
 70            np.ndarray: Array of shape (num_vids, max_frames, 256, 256).
 71        """
 72        batch_videos = []
 73        for video_file in self.video_files[:self.num_vids]:
 74            video_data = self.process_video(video_file)
 75            batch_videos.append(video_data)
 76
 77        return np.array(batch_videos)
 78
 79    def get_stats(self):
 80        """
 81        Calculates statistics about the videos in the dataset.
 82
 83        Returns:
 84            dict: Dictionary with total, average, min, and max frame counts across all videos.
 85        """
 86        frame_counts = []
 87
 88        for video_file in self.video_files:
 89            cap = cv2.VideoCapture(video_file)
 90            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 91            frame_counts.append(frame_count)
 92            cap.release()
 93
 94        avg_frames = np.mean(frame_counts) if frame_counts else 0
 95        min_frames = np.min(frame_counts) if frame_counts else 0
 96        max_frames = np.max(frame_counts) if frame_counts else 0
 97
 98        return {
 99            "total_videos": len(self.video_files),
100            "average_frames": avg_frames,
101            "min_frames": min_frames,
102            "max_frames": max_frames
103        }

A class to load and process videos into grayscale frame batches for model training.

Attributes: folder_path (str): Directory containing video files. max_frames (int): Maximum number of frames to extract per video. num_vids (int): Number of videos to process for a batch. video_files (list): List of .mp4 video file paths in the folder.

VideoDataLoader(folder_path, max_frames, num_vids)
19    def __init__(self, folder_path, max_frames, num_vids):
20        """
21        Initializes the loader with folder path and limits.
22
23        Args:
24            folder_path (str): Path to the folder containing .mp4 files.
25            max_frames (int): Number of frames to extract per video.
26            num_vids (int): Number of videos to load for each batch.
27        """
28        self.folder_path = folder_path
29        self.max_frames = max_frames
30        self.num_vids = num_vids
31        self.video_files = glob.glob(f"{folder_path}/*.mp4")  # MP4 format

Initializes the loader with folder path and limits.

Args: folder_path (str): Path to the folder containing .mp4 files. max_frames (int): Number of frames to extract per video. num_vids (int): Number of videos to load for each batch.

folder_path
max_frames
num_vids
video_files
def process_video(self, video_path):
33    def process_video(self, video_path):
34        """
35        Loads a single video and converts it into a fixed number of grayscale frames.
36
37        Args:
38            video_path (str): Path to the video file.
39
40        Returns:
41            np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames.
42        """
43        cap = cv2.VideoCapture(video_path)
44        frames = []
45
46        while len(frames) < self.max_frames:
47            ret, frame = cap.read()
48            if not ret:
49                break  # End of video
50
51            # Resize frame and convert to grayscale by averaging across color channels
52            frame = cv2.resize(frame, (256, 256))
53            gray_frame = np.mean(frame, axis=2).astype(np.uint8)
54            frames.append(gray_frame)
55
56        cap.release()
57
58        # Pad with last frame if video has fewer frames than max_frames
59        if len(frames) < self.max_frames:
60            last_frame = frames[-1] if frames else np.zeros((256, 256), dtype=np.uint8)
61            frames.extend([last_frame] * (self.max_frames - len(frames)))
62
63        return np.array(frames)

Loads a single video and converts it into a fixed number of grayscale frames.

Args: video_path (str): Path to the video file.

Returns: np.ndarray: Array of shape (max_frames, 256, 256) representing grayscale frames.

def get_batch(self):
65    def get_batch(self):
66        """
67        Retrieves a batch of videos, each with max_frames grayscale frames.
68
69        Returns:
70            np.ndarray: Array of shape (num_vids, max_frames, 256, 256).
71        """
72        batch_videos = []
73        for video_file in self.video_files[:self.num_vids]:
74            video_data = self.process_video(video_file)
75            batch_videos.append(video_data)
76
77        return np.array(batch_videos)

Retrieves a batch of videos, each with max_frames grayscale frames.

Returns: np.ndarray: Array of shape (num_vids, max_frames, 256, 256).

def get_stats(self):
 79    def get_stats(self):
 80        """
 81        Calculates statistics about the videos in the dataset.
 82
 83        Returns:
 84            dict: Dictionary with total, average, min, and max frame counts across all videos.
 85        """
 86        frame_counts = []
 87
 88        for video_file in self.video_files:
 89            cap = cv2.VideoCapture(video_file)
 90            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 91            frame_counts.append(frame_count)
 92            cap.release()
 93
 94        avg_frames = np.mean(frame_counts) if frame_counts else 0
 95        min_frames = np.min(frame_counts) if frame_counts else 0
 96        max_frames = np.max(frame_counts) if frame_counts else 0
 97
 98        return {
 99            "total_videos": len(self.video_files),
100            "average_frames": avg_frames,
101            "min_frames": min_frames,
102            "max_frames": max_frames
103        }

Calculates statistics about the videos in the dataset.

Returns: dict: Dictionary with total, average, min, and max frame counts across all videos.

def create_dataset(batch):
106def create_dataset(batch):
107    """
108    Prepares training data from a batch of grayscale video frames.
109
110    Args:
111        batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width).
112
113    Returns:
114        tuple: ((X0, X1), Y)
115            - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1))
116            - X1: All frames from the video as input sequence
117            - Y: Target output frames (same as X1 for reconstruction)
118    """
119    num_vids, num_frames, h, w = batch.shape
120
121    # Extract the first frame of each video and repeat it across all frames
122    first_frames = batch[:, 0:1, :, :]
123    X0 = np.repeat(first_frames, num_frames, axis=1)  # shape: (num_vids, num_frames, h, w)
124
125    # Input frames (could be motion or target-dependent)
126    X1 = batch.copy()
127
128    # Ground truth (target) frames
129    Y = batch.copy()
130
131    # Reshape for model input: (num_vids*num_frames, h, w, 1) and normalize to [0, 1]
132    X0 = X0.reshape(-1, h, w, 1) / 255
133    X1 = X1.reshape(-1, h, w, 1) / 255
134    Y  = Y.reshape(-1, h, w, 1) / 255
135
136    return (X0, X1), Y

Prepares training data from a batch of grayscale video frames.

Args: batch (np.ndarray): Input array of shape (num_videos, num_frames, height, width).

Returns: tuple: ((X0, X1), Y) - X0: Repeated first frame of each video across time (shape: (num_videos*num_frames, height, width, 1)) - X1: All frames from the video as input sequence - Y: Target output frames (same as X1 for reconstruction)