Skip to content

processing

audio

convert_audio(src_file, dst_file, target_sr=None, normalize=False)

Convert an audio file to a different sample rate and save it to a new file.

Parameters:

Name Type Description Default
src_file str

Path to the source audio file.

required
dst_file str

Path to the destination audio file.

required
target_sr int

Target sample rate for the output audio file.

None
normalize bool

If True, normalize the audio waveform before conversion.

False

Returns:

Name Type Description
AudioSegment AudioSegment

The converted audio waveform as an AudioSegment object.

Source code in aimet_ml/processing/audio.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def convert_audio(
    src_file: str, dst_file: str, target_sr: Optional[int] = None, normalize: bool = False
) -> AudioSegment:
    """
    Convert an audio file to a different sample rate and save it to a new file.

    Args:
        src_file (str): Path to the source audio file.
        dst_file (str): Path to the destination audio file.
        target_sr (int, optional): Target sample rate for the output audio file.
        normalize (bool, optional): If True, normalize the audio waveform before conversion.

    Returns:
        AudioSegment: The converted audio waveform as an AudioSegment object.
    """
    audio = read_audio(src_file, target_sr, normalize)
    output_format = dst_file.split(".")[-1]
    audio.export(dst_file, format=output_format)
    return audio

load_audio(file_path, target_sr=None, normalize=False)

Load an audio file and return the waveform as a NumPy array and the target sample rate.

Parameters:

Name Type Description Default
file_path str

Path to the audio file.

required
target_sr int

Target sample rate for the audio waveform.

None
normalize bool

If True, normalize the audio waveform.

False

Returns:

Type Description
Tuple[ndarray, int]

Tuple[np.ndarray, int]: A tuple containing the waveform as a NumPy array and the target sample rate.

Source code in aimet_ml/processing/audio.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def load_audio(file_path: str, target_sr: Optional[int] = None, normalize: bool = False) -> Tuple[np.ndarray, int]:
    """
    Load an audio file and return the waveform as a NumPy array and the target sample rate.

    Args:
        file_path (str): Path to the audio file.
        target_sr (int, optional): Target sample rate for the audio waveform.
        normalize (bool, optional): If True, normalize the audio waveform.

    Returns:
        Tuple[np.ndarray, int]: A tuple containing the waveform as a NumPy array and the target sample rate.
    """
    audio = read_audio(file_path, target_sr, normalize)
    waveform = np.asarray(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
    sample_rate = audio.frame_rate
    return waveform, sample_rate

read_audio(file_path, target_sr=None, normalize=False)

Read an audio file and return the waveform as an AudioSegment object with the target sample rate.

Parameters:

Name Type Description Default
file_path str

Path to the audio file.

required
target_sr int

Target sample rate for the audio waveform.

None
normalize bool

If True, normalize the audio waveform.

False

Returns:

Name Type Description
AudioSegment AudioSegment

Audio waveform as an AudioSegment object.

Source code in aimet_ml/processing/audio.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def read_audio(file_path: str, target_sr: Optional[int] = None, normalize: bool = False) -> AudioSegment:
    """
    Read an audio file and return the waveform as an AudioSegment object with the target sample rate.

    Args:
        file_path (str): Path to the audio file.
        target_sr (int, optional): Target sample rate for the audio waveform.
        normalize (bool, optional): If True, normalize the audio waveform.

    Returns:
        AudioSegment: Audio waveform as an AudioSegment object.
    """
    audio = AudioSegment.from_file(file_path)
    if target_sr:
        audio = audio.set_frame_rate(target_sr)
    if normalize:
        audio = effects.normalize(audio)
    return audio

text

clean_repeated_tokens(tokens)

Remove sequences of repeated tokens from a list.

Parameters:

Name Type Description Default
tokens list[str]

List of tokens to clean.

required

Returns:

Type Description
List[str]

list[str]: List of tokens with repeated sequences removed.

Source code in aimet_ml/processing/text.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def clean_repeated_tokens(tokens: List[str]) -> List[str]:
    """
    Remove sequences of repeated tokens from a list.

    Args:
        tokens (list[str]): List of tokens to clean.

    Returns:
        list[str]: List of tokens with repeated sequences removed.
    """
    tokens = tokens.copy()
    sequence_size = len(tokens) // 2
    while sequence_size > 0:
        cur_idx = 0
        while cur_idx < len(tokens) - sequence_size:
            next_idx = cur_idx + sequence_size
            cur_text = "".join(tokens[cur_idx : cur_idx + sequence_size])
            next_text = "".join(tokens[next_idx : next_idx + sequence_size])
            if cur_text == next_text:
                tokens = tokens[: cur_idx + sequence_size] + tokens[next_idx + sequence_size :]
            else:
                cur_idx += 1
        sequence_size -= 1
    return tokens

exclude_keywords(text, keywords)

Check if any of the given keywords are present in the text.

Parameters:

Name Type Description Default
text str

The text to search for keywords.

required
keywords list[str]

List of keywords to check for.

required

Returns:

Name Type Description
bool bool

False if any keyword is present in the text, True otherwise.

Source code in aimet_ml/processing/text.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def exclude_keywords(text: str, keywords: List[str]) -> bool:
    """
    Check if any of the given keywords are present in the text.

    Args:
        text (str): The text to search for keywords.
        keywords (list[str]): List of keywords to check for.

    Returns:
        bool: False if any keyword is present in the text, True otherwise.
    """
    for kw in keywords:
        if kw in text:
            return False
    return True

include_keywords(text, keywords)

Check if any of the given keywords are present in the text.

Parameters:

Name Type Description Default
text str

The text to search for keywords.

required
keywords list[str]

List of keywords to check for.

required

Returns:

Name Type Description
bool bool

True if any keyword is present in the text, False otherwise.

Source code in aimet_ml/processing/text.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
def include_keywords(text: str, keywords: List[str]) -> bool:
    """
    Check if any of the given keywords are present in the text.

    Args:
        text (str): The text to search for keywords.
        keywords (list[str]): List of keywords to check for.

    Returns:
        bool: True if any keyword is present in the text, False otherwise.
    """
    for kw in keywords:
        if kw in text:
            return True
    return False

trim_tokens(tokenizer, text, max_len)

Trims a list of tokens generated by a tokenizer to ensure it doesn't exceed a maximum length.

Parameters:

Name Type Description Default
tokenizer PreTrainedTokenizer

The tokenizer used to tokenize the input text.

required
text str

The input text to tokenize and trim.

required
max_len int

The maximum allowed length for the list of tokens.

required

Returns:

Type Description
Tuple[str, int]

Tuple[str, int]: A tuple containing the trimmed text and the number of tokens in the trimmed list.

Source code in aimet_ml/processing/text.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def trim_tokens(tokenizer: PreTrainedTokenizer, text: str, max_len: int) -> Tuple[str, int]:
    """
    Trims a list of tokens generated by a tokenizer to ensure it doesn't exceed a maximum length.

    Args:
        tokenizer (PreTrainedTokenizer): The tokenizer used to tokenize the input text.
        text (str): The input text to tokenize and trim.
        max_len (int): The maximum allowed length for the list of tokens.

    Returns:
        Tuple[str, int]: A tuple containing the trimmed text and the number of tokens in the trimmed list.
    """
    tokens = tokenizer.tokenize(text)

    if len(tokens) > max_len:
        tokens = tokens[:max_len]

    return tokenizer.convert_tokens_to_string(tokens), len(tokens)

video

convert_video(src_file, dst_file, target_fps)

Convert a video to a different frame rate and save to a new file.

Parameters:

Name Type Description Default
src_file str

Path to the source video file.

required
dst_file str

Path to the output video file.

required
target_fps int

The target frames per second for the output video.

required
Source code in aimet_ml/processing/video.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def convert_video(src_file: str, dst_file: str, target_fps: int) -> None:
    """
    Convert a video to a different frame rate and save to a new file.

    Args:
        src_file (str): Path to the source video file.
        dst_file (str): Path to the output video file.
        target_fps (int): The target frames per second for the output video.
    """
    input_vid = ffmpeg.input(src_file)

    audio = input_vid.audio
    video = input_vid.video.filter("fps", target_fps)
    (
        ffmpeg.output(
            video,
            audio,
            dst_file,
            acodec="aac",
            loglevel="quiet",
            max_muxing_queue_size=1024,
        )
        .overwrite_output()
        .run()
    )

is_video(file_path)

Check if a given file contains video streams.

Parameters:

Name Type Description Default
file_path str

The path to the input file.

required

Returns:

Name Type Description
bool bool

True if the file contains video streams, False otherwise.

Source code in aimet_ml/processing/video.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def is_video(file_path: str) -> bool:
    """
    Check if a given file contains video streams.

    Args:
        file_path (str): The path to the input file.

    Returns:
        bool: True if the file contains video streams, False otherwise.
    """
    probe = ffmpeg.probe(file_path)
    streams = probe["streams"]

    for stream in streams:
        if stream["codec_type"] == "video":
            return True

    return False

load_video(file_path)

Load frames from a video file.

Parameters:

Name Type Description Default
file_path str

The path to the video file.

required

Returns:

Name Type Description
tuple tuple

A tuple containing a list of frames and the frames per second (fps).

Source code in aimet_ml/processing/video.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def load_video(file_path: str) -> tuple:
    """
    Load frames from a video file.

    Args:
        file_path (str): The path to the video file.

    Returns:
        tuple: A tuple containing a list of frames and the frames per second (fps).
    """
    frames: List[np.ndarray] = []
    cap = cv2.VideoCapture(file_path)
    fps = cap.get(cv2.CAP_PROP_FPS)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    cap.release()
    return frames, fps