processing

`audio` ¶

`convert_audio(src_file, dst_file, target_sr=None, normalize=False)` ¶

Convert an audio file to a different sample rate and save it to a new file.

Parameters:

Name	Type	Description	Default
`src_file`	`str`	Path to the source audio file.	required
`dst_file`	`str`	Path to the destination audio file.	required
`target_sr`	`int`	Target sample rate for the output audio file.	`None`
`normalize`	`bool`	If True, normalize the audio waveform before conversion.	`False`

Returns:

Name	Type	Description
`AudioSegment`	`AudioSegment`	The converted audio waveform as an AudioSegment object.

Source code in aimet_ml/processing/audio.py

def convert_audio(
    src_file: str, dst_file: str, target_sr: Optional[int] = None, normalize: bool = False
) -> AudioSegment:
    """
    Convert an audio file to a different sample rate and save it to a new file.

    Args:
        src_file (str): Path to the source audio file.
        dst_file (str): Path to the destination audio file.
        target_sr (int, optional): Target sample rate for the output audio file.
        normalize (bool, optional): If True, normalize the audio waveform before conversion.

    Returns:
        AudioSegment: The converted audio waveform as an AudioSegment object.
    """
    audio = read_audio(src_file, target_sr, normalize)
    output_format = dst_file.split(".")[-1]
    audio.export(dst_file, format=output_format)
    return audio

`load_audio(file_path, target_sr=None, normalize=False)` ¶

Load an audio file and return the waveform as a NumPy array and the target sample rate.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the audio file.	required
`target_sr`	`int`	Target sample rate for the audio waveform.	`None`
`normalize`	`bool`	If True, normalize the audio waveform.	`False`

Returns:

Type	Description
`Tuple[ndarray, int]`	Tuple[np.ndarray, int]: A tuple containing the waveform as a NumPy array and the target sample rate.

Source code in aimet_ml/processing/audio.py

def load_audio(file_path: str, target_sr: Optional[int] = None, normalize: bool = False) -> Tuple[np.ndarray, int]:
    """
    Load an audio file and return the waveform as a NumPy array and the target sample rate.

    Args:
        file_path (str): Path to the audio file.
        target_sr (int, optional): Target sample rate for the audio waveform.
        normalize (bool, optional): If True, normalize the audio waveform.

    Returns:
        Tuple[np.ndarray, int]: A tuple containing the waveform as a NumPy array and the target sample rate.
    """
    audio = read_audio(file_path, target_sr, normalize)
    waveform = np.asarray(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
    sample_rate = audio.frame_rate
    return waveform, sample_rate

`read_audio(file_path, target_sr=None, normalize=False)` ¶

Read an audio file and return the waveform as an AudioSegment object with the target sample rate.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the audio file.	required
`target_sr`	`int`	Target sample rate for the audio waveform.	`None`
`normalize`	`bool`	If True, normalize the audio waveform.	`False`

Returns:

Name	Type	Description
`AudioSegment`	`AudioSegment`	Audio waveform as an AudioSegment object.

Source code in aimet_ml/processing/audio.py

def read_audio(file_path: str, target_sr: Optional[int] = None, normalize: bool = False) -> AudioSegment:
    """
    Read an audio file and return the waveform as an AudioSegment object with the target sample rate.

    Args:
        file_path (str): Path to the audio file.
        target_sr (int, optional): Target sample rate for the audio waveform.
        normalize (bool, optional): If True, normalize the audio waveform.

    Returns:
        AudioSegment: Audio waveform as an AudioSegment object.
    """
    audio = AudioSegment.from_file(file_path)
    if target_sr:
        audio = audio.set_frame_rate(target_sr)
    if normalize:
        audio = effects.normalize(audio)
    return audio

`text` ¶

`clean_repeated_tokens(tokens)` ¶

Remove sequences of repeated tokens from a list.

Parameters:

Name	Type	Description	Default
`tokens`	`list[str]`	List of tokens to clean.	required

Returns:

Type	Description
`List[str]`	list[str]: List of tokens with repeated sequences removed.

Source code in aimet_ml/processing/text.py

def clean_repeated_tokens(tokens: List[str]) -> List[str]:
    """
    Remove sequences of repeated tokens from a list.

    Args:
        tokens (list[str]): List of tokens to clean.

    Returns:
        list[str]: List of tokens with repeated sequences removed.
    """
    tokens = tokens.copy()
    sequence_size = len(tokens) // 2
    while sequence_size > 0:
        cur_idx = 0
        while cur_idx < len(tokens) - sequence_size:
            next_idx = cur_idx + sequence_size
            cur_text = "".join(tokens[cur_idx : cur_idx + sequence_size])
            next_text = "".join(tokens[next_idx : next_idx + sequence_size])
            if cur_text == next_text:
                tokens = tokens[: cur_idx + sequence_size] + tokens[next_idx + sequence_size :]
            else:
                cur_idx += 1
        sequence_size -= 1
    return tokens

`exclude_keywords(text, keywords)` ¶

Check if any of the given keywords are present in the text.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to search for keywords.	required
`keywords`	`list[str]`	List of keywords to check for.	required

Returns:

Name	Type	Description
`bool`	`bool`	False if any keyword is present in the text, True otherwise.

Source code in aimet_ml/processing/text.py

def exclude_keywords(text: str, keywords: List[str]) -> bool:
    """
    Check if any of the given keywords are present in the text.

    Args:
        text (str): The text to search for keywords.
        keywords (list[str]): List of keywords to check for.

    Returns:
        bool: False if any keyword is present in the text, True otherwise.
    """
    for kw in keywords:
        if kw in text:
            return False
    return True

`include_keywords(text, keywords)` ¶

Check if any of the given keywords are present in the text.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to search for keywords.	required
`keywords`	`list[str]`	List of keywords to check for.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if any keyword is present in the text, False otherwise.

Source code in aimet_ml/processing/text.py

def include_keywords(text: str, keywords: List[str]) -> bool:
    """
    Check if any of the given keywords are present in the text.

    Args:
        text (str): The text to search for keywords.
        keywords (list[str]): List of keywords to check for.

    Returns:
        bool: True if any keyword is present in the text, False otherwise.
    """
    for kw in keywords:
        if kw in text:
            return True
    return False

`trim_tokens(tokenizer, text, max_len)` ¶

Trims a list of tokens generated by a tokenizer to ensure it doesn't exceed a maximum length.

Parameters:

Name	Type	Description	Default
`tokenizer`	`PreTrainedTokenizer`	The tokenizer used to tokenize the input text.	required
`text`	`str`	The input text to tokenize and trim.	required
`max_len`	`int`	The maximum allowed length for the list of tokens.	required

Returns:

Type	Description
`Tuple[str, int]`	Tuple[str, int]: A tuple containing the trimmed text and the number of tokens in the trimmed list.

Source code in aimet_ml/processing/text.py

def trim_tokens(tokenizer: PreTrainedTokenizer, text: str, max_len: int) -> Tuple[str, int]:
    """
    Trims a list of tokens generated by a tokenizer to ensure it doesn't exceed a maximum length.

    Args:
        tokenizer (PreTrainedTokenizer): The tokenizer used to tokenize the input text.
        text (str): The input text to tokenize and trim.
        max_len (int): The maximum allowed length for the list of tokens.

    Returns:
        Tuple[str, int]: A tuple containing the trimmed text and the number of tokens in the trimmed list.
    """
    tokens = tokenizer.tokenize(text)

    if len(tokens) > max_len:
        tokens = tokens[:max_len]

    return tokenizer.convert_tokens_to_string(tokens), len(tokens)

`video` ¶

`convert_video(src_file, dst_file, target_fps)` ¶

Convert a video to a different frame rate and save to a new file.

Parameters:

Name	Type	Description	Default
`src_file`	`str`	Path to the source video file.	required
`dst_file`	`str`	Path to the output video file.	required
`target_fps`	`int`	The target frames per second for the output video.	required

Source code in aimet_ml/processing/video.py

def convert_video(src_file: str, dst_file: str, target_fps: int) -> None:
    """
    Convert a video to a different frame rate and save to a new file.

    Args:
        src_file (str): Path to the source video file.
        dst_file (str): Path to the output video file.
        target_fps (int): The target frames per second for the output video.
    """
    input_vid = ffmpeg.input(src_file)

    audio = input_vid.audio
    video = input_vid.video.filter("fps", target_fps)
    (
        ffmpeg.output(
            video,
            audio,
            dst_file,
            acodec="aac",
            loglevel="quiet",
            max_muxing_queue_size=1024,
        )
        .overwrite_output()
        .run()
    )

`is_video(file_path)` ¶

Check if a given file contains video streams.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	The path to the input file.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the file contains video streams, False otherwise.

Source code in aimet_ml/processing/video.py

def is_video(file_path: str) -> bool:
    """
    Check if a given file contains video streams.

    Args:
        file_path (str): The path to the input file.

    Returns:
        bool: True if the file contains video streams, False otherwise.
    """
    probe = ffmpeg.probe(file_path)
    streams = probe["streams"]

    for stream in streams:
        if stream["codec_type"] == "video":
            return True

    return False

`load_video(file_path)` ¶

Load frames from a video file.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	The path to the video file.	required

Returns:

Name	Type	Description
`tuple`	`tuple`	A tuple containing a list of frames and the frames per second (fps).

Source code in aimet_ml/processing/video.py

def load_video(file_path: str) -> tuple:
    """
    Load frames from a video file.

    Args:
        file_path (str): The path to the video file.

    Returns:
        tuple: A tuple containing a list of frames and the frames per second (fps).
    """
    frames: List[np.ndarray] = []
    cap = cv2.VideoCapture(file_path)
    fps = cap.get(cv2.CAP_PROP_FPS)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    cap.release()
    return frames, fps

processing

audio ¶

convert_audio(src_file, dst_file, target_sr=None, normalize=False) ¶

load_audio(file_path, target_sr=None, normalize=False) ¶

read_audio(file_path, target_sr=None, normalize=False) ¶

text ¶

clean_repeated_tokens(tokens) ¶

exclude_keywords(text, keywords) ¶

include_keywords(text, keywords) ¶

trim_tokens(tokenizer, text, max_len) ¶

video ¶

convert_video(src_file, dst_file, target_fps) ¶

is_video(file_path) ¶

load_video(file_path) ¶

`audio` ¶

`convert_audio(src_file, dst_file, target_sr=None, normalize=False)` ¶

`load_audio(file_path, target_sr=None, normalize=False)` ¶

`read_audio(file_path, target_sr=None, normalize=False)` ¶

`text` ¶

`clean_repeated_tokens(tokens)` ¶

`exclude_keywords(text, keywords)` ¶

`include_keywords(text, keywords)` ¶

`trim_tokens(tokenizer, text, max_len)` ¶

`video` ¶

`convert_video(src_file, dst_file, target_fps)` ¶

`is_video(file_path)` ¶

`load_video(file_path)` ¶