Skip to content

Audio File Operations

The audio file module provides functionality for reading and writing audio files in various formats.

Object-Oriented API

AudioFile Class

The AudioFile class provides high-level audio file operations with automatic resource management.

import coremusic as cm

# Context manager usage (recommended)
with cm.AudioFile("audio.wav") as audio:
    print(f"Duration: {audio.duration:.2f}s")
    data, count = audio.read_packets(0, 1000)

# Explicit management
audio = cm.AudioFile("audio.wav")
audio.open()
try:
    data = audio.read_packets(0, 1000)
finally:
    audio.close()

Class Reference

coremusic.audio.AudioFile

Bases: CoreAudioObject

High-level audio file operations with automatic resource management

Source code in src/coremusic/audio/core.py
class AudioFile(capi.CoreAudioObject):
    """High-level audio file operations with automatic resource management"""

    def __init__(self, path: str | Path, *, writable: bool = False):
        super().__init__()
        self._path = str(path)
        self._format: AudioFormat | None = None
        self._is_open = False
        self._writable = writable

    def open(self) -> "AudioFile":
        """Open the audio file"""
        self._ensure_not_disposed()
        if not self._is_open:
            try:
                permissions = 3 if self._writable else 1  # READ_WRITE or READ
                file_id = capi.audio_file_open_url(self._path, permissions)
                self._set_object_id(file_id)
                self._is_open = True
            except Exception as e:
                raise AudioFileError(f"Failed to open file {self._path}: {e}")
        return self

    def close(self) -> None:
        """Close the audio file"""
        if self._is_open:
            try:
                capi.audio_file_close(self.object_id)
            except Exception as e:
                raise AudioFileError(f"Failed to close file: {e}")
            finally:
                self._is_open = False
                self.dispose()

    def __enter__(self) -> "AudioFile":
        self.open()
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        self.close()

    @property
    def format(self) -> AudioFormat:
        """Get the audio format of the file"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        if self._format is None:
            try:
                format_data = capi.audio_file_get_property(
                    self.object_id, capi.get_audio_file_property_data_format()
                )
                self._format = AudioFormat.from_asbd_bytes(format_data)
            except Exception as e:
                raise AudioFileError(f"Failed to get format: {e}")

        return self._format

    def read_packets(self, start_packet: int, packet_count: int) -> tuple[bytes, int]:
        """Read audio packets from the file.

        Args:
            start_packet: Starting packet index (must be non-negative)
            packet_count: Number of packets to read (must be positive)

        Returns:
            Tuple of (audio_data_bytes, packets_read)

        Raises:
            ValueError: If start_packet < 0 or packet_count <= 0
            AudioFileError: If reading fails

        Example::

            from coremusic.audio import AudioFile

            # Read audio data in chunks
            with AudioFile("audio.wav") as audio:
                chunk_size = 4096
                offset = 0

                while True:
                    data, packets_read = audio.read_packets(offset, chunk_size)
                    if packets_read == 0:
                        break
                    # Process data...
                    offset += packets_read
        """
        if start_packet < 0:
            raise ValueError(f"start_packet must be non-negative, got {start_packet}")
        if packet_count <= 0:
            raise ValueError(f"packet_count must be positive, got {packet_count}")

        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_read_packets(
                self.object_id, start_packet, packet_count
            )
        except Exception as e:
            raise AudioFileError(f"Failed to read packets: {e}")

    def read_as_numpy(
        self, start_packet: int = 0, packet_count: int | None = None
    ) -> "NDArray[Any]":
        """
        Read audio data from the file as a NumPy array.

        Args:
            start_packet: Starting packet index (default: 0)
            packet_count: Number of packets to read (default: all remaining packets)

        Returns:
            NumPy array with shape (frames, channels) for multi-channel audio,
            or (frames,) for mono audio. The dtype is determined by the audio format.

        Raises:
            ImportError: If NumPy is not available
            AudioFileError: If reading fails

        Example:

            with AudioFile("audio.wav") as audio:
                data = audio.read_as_numpy()
                print(f"Shape: {data.shape}, dtype: {data.dtype}")

            # output: Shape: (44100, 2), dtype: int16
        """
        if not NUMPY_AVAILABLE:
            raise ImportError(
                "NumPy is not available. Install numpy to use this feature."
            )

        if start_packet < 0:
            raise ValueError(f"start_packet must be non-negative, got {start_packet}")
        if packet_count is not None and packet_count <= 0:
            raise ValueError(f"packet_count must be positive, got {packet_count}")

        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            # Get format information
            format = self.format

            # If packet_count not specified, read all remaining packets
            if packet_count is None:
                # Get total packet count from file
                packet_count_data = capi.audio_file_get_property(
                    self.object_id,
                    capi.get_audio_file_property_audio_data_packet_count(),
                )
                if len(packet_count_data) >= 8:
                    total_packets = struct.unpack("<Q", packet_count_data[:8])[0]
                    packet_count = total_packets - start_packet
                else:
                    raise AudioFileError("Cannot determine packet count")

            # Read the raw audio data
            data_bytes, actual_count = capi.audio_file_read_packets(
                self.object_id, start_packet, packet_count
            )

            # Get NumPy dtype from format
            dtype = format.to_numpy_dtype()

            # Convert bytes to NumPy array
            audio_data = np.frombuffer(data_bytes, dtype=dtype)

            # Reshape for multi-channel audio
            # Audio data is typically interleaved: L R L R L R ...
            if format.channels_per_frame > 1:
                # Calculate number of frames
                samples_per_frame = format.channels_per_frame
                num_frames = len(audio_data) // samples_per_frame

                # Reshape to (frames, channels)
                audio_data = audio_data[: num_frames * samples_per_frame].reshape(
                    num_frames, samples_per_frame
                )

            return audio_data

        except Exception as e:
            if isinstance(e, (ImportError, AudioFileError)):
                raise
            raise AudioFileError(f"Failed to read as NumPy array: {e}")

    def get_property(self, property_id: int) -> bytes:
        """Get a property from the audio file"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_get_property(self.object_id, property_id)
        except Exception as e:
            raise AudioFileError(f"Failed to get property: {e}")

    def set_property(self, property_id: int, data: bytes) -> None:
        """Set a property on the audio file.

        The file must have been opened with writable=True.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            capi.audio_file_set_property(self.object_id, property_id, data)
        except Exception as e:
            raise AudioFileError(f"Failed to set property: {e}")

    @property
    def metadata(self) -> dict[str, Any] | None:
        """Read the info dictionary metadata from the audio file.

        Returns a dict with string keys and string/number/bytes values,
        or None if the file format does not support metadata.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_read_info_dictionary(self.object_id)
        except Exception:
            return None

    def set_metadata(self, tags: dict[str, Any]) -> None:
        """Write metadata tags to the audio file.

        The file must have been opened with writable=True.
        Keys should be strings. Values can be str, int, or float.

        Common keys: 'title', 'artist', 'album', 'genre', 'year',
        'track number', 'comments', 'approximate duration in seconds'.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()
        if not self._writable:
            raise AudioFileError(
                "File not opened for writing. Use AudioFile(path, writable=True)."
            )

        try:
            capi.audio_file_write_info_dictionary(self.object_id, tags)
        except Exception as e:
            raise AudioFileError(f"Failed to write metadata: {e}")

    @property
    def duration(self) -> float:
        """Duration in seconds"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            # Try to get estimated duration property
            duration_data = capi.audio_file_get_property(
                self.object_id, capi.get_audio_file_property_estimated_duration()
            )
            if len(duration_data) >= 8:
                # Duration is a Float64 (double)
                duration: float = struct.unpack("<d", duration_data[:8])[0]
                return duration
            else:
                # Fallback: calculate from packet count and sample rate
                packet_count_data = capi.audio_file_get_property(
                    self.object_id,
                    capi.get_audio_file_property_audio_data_packet_count(),
                )
                if len(packet_count_data) >= 8:
                    packet_count = struct.unpack("<Q", packet_count_data[:8])[0]
                    format = self.format
                    if format.sample_rate > 0:
                        calculated: float = (
                            packet_count * format.frames_per_packet / format.sample_rate
                        )
                        return calculated
                return 0.0
        except Exception:
            # If all methods fail, return 0.0
            return 0.0

    def __repr__(self) -> str:
        status = "open" if self._is_open else "closed"
        return f"AudioFile({self._path}, {status})"

    def dispose(self) -> None:
        """Dispose of the audio file"""
        if not self.is_disposed:
            if self._is_open:
                try:
                    capi.audio_file_close(self.object_id)
                except Exception:
                    pass  # Best effort cleanup
                finally:
                    self._is_open = False
            super().dispose()

_path = str(path) instance-attribute

_format = None instance-attribute

_is_open = False instance-attribute

_writable = writable instance-attribute

format property

Get the audio format of the file

metadata property

Read the info dictionary metadata from the audio file.

Returns a dict with string keys and string/number/bytes values, or None if the file format does not support metadata.

duration property

Duration in seconds

__init__(path, *, writable=False)

Source code in src/coremusic/audio/core.py
def __init__(self, path: str | Path, *, writable: bool = False):
    super().__init__()
    self._path = str(path)
    self._format: AudioFormat | None = None
    self._is_open = False
    self._writable = writable

open()

Open the audio file

Source code in src/coremusic/audio/core.py
def open(self) -> "AudioFile":
    """Open the audio file"""
    self._ensure_not_disposed()
    if not self._is_open:
        try:
            permissions = 3 if self._writable else 1  # READ_WRITE or READ
            file_id = capi.audio_file_open_url(self._path, permissions)
            self._set_object_id(file_id)
            self._is_open = True
        except Exception as e:
            raise AudioFileError(f"Failed to open file {self._path}: {e}")
    return self

close()

Close the audio file

Source code in src/coremusic/audio/core.py
def close(self) -> None:
    """Close the audio file"""
    if self._is_open:
        try:
            capi.audio_file_close(self.object_id)
        except Exception as e:
            raise AudioFileError(f"Failed to close file: {e}")
        finally:
            self._is_open = False
            self.dispose()

__enter__()

Source code in src/coremusic/audio/core.py
def __enter__(self) -> "AudioFile":
    self.open()
    return self

__exit__(exc_type, exc_val, exc_tb)

Source code in src/coremusic/audio/core.py
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
    self.close()

read_packets(start_packet, packet_count)

Read audio packets from the file.

Parameters:

Name Type Description Default
start_packet int

Starting packet index (must be non-negative)

required
packet_count int

Number of packets to read (must be positive)

required

Returns:

Type Description
tuple[bytes, int]

Tuple of (audio_data_bytes, packets_read)

Raises:

Type Description
ValueError

If start_packet < 0 or packet_count <= 0

AudioFileError

If reading fails

Example::

from coremusic.audio import AudioFile

# Read audio data in chunks
with AudioFile("audio.wav") as audio:
    chunk_size = 4096
    offset = 0

    while True:
        data, packets_read = audio.read_packets(offset, chunk_size)
        if packets_read == 0:
            break
        # Process data...
        offset += packets_read
Source code in src/coremusic/audio/core.py
def read_packets(self, start_packet: int, packet_count: int) -> tuple[bytes, int]:
    """Read audio packets from the file.

    Args:
        start_packet: Starting packet index (must be non-negative)
        packet_count: Number of packets to read (must be positive)

    Returns:
        Tuple of (audio_data_bytes, packets_read)

    Raises:
        ValueError: If start_packet < 0 or packet_count <= 0
        AudioFileError: If reading fails

    Example::

        from coremusic.audio import AudioFile

        # Read audio data in chunks
        with AudioFile("audio.wav") as audio:
            chunk_size = 4096
            offset = 0

            while True:
                data, packets_read = audio.read_packets(offset, chunk_size)
                if packets_read == 0:
                    break
                # Process data...
                offset += packets_read
    """
    if start_packet < 0:
        raise ValueError(f"start_packet must be non-negative, got {start_packet}")
    if packet_count <= 0:
        raise ValueError(f"packet_count must be positive, got {packet_count}")

    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        return capi.audio_file_read_packets(
            self.object_id, start_packet, packet_count
        )
    except Exception as e:
        raise AudioFileError(f"Failed to read packets: {e}")

read_as_numpy(start_packet=0, packet_count=None)

Read audio data from the file as a NumPy array.

Parameters:

Name Type Description Default
start_packet int

Starting packet index (default: 0)

0
packet_count int | None

Number of packets to read (default: all remaining packets)

None

Returns:

Type Description
'NDArray[Any]'

NumPy array with shape (frames, channels) for multi-channel audio,

'NDArray[Any]'

or (frames,) for mono audio. The dtype is determined by the audio format.

Raises:

Type Description
ImportError

If NumPy is not available

AudioFileError

If reading fails

Example:

with AudioFile("audio.wav") as audio:
    data = audio.read_as_numpy()
    print(f"Shape: {data.shape}, dtype: {data.dtype}")

# output: Shape: (44100, 2), dtype: int16
Source code in src/coremusic/audio/core.py
def read_as_numpy(
    self, start_packet: int = 0, packet_count: int | None = None
) -> "NDArray[Any]":
    """
    Read audio data from the file as a NumPy array.

    Args:
        start_packet: Starting packet index (default: 0)
        packet_count: Number of packets to read (default: all remaining packets)

    Returns:
        NumPy array with shape (frames, channels) for multi-channel audio,
        or (frames,) for mono audio. The dtype is determined by the audio format.

    Raises:
        ImportError: If NumPy is not available
        AudioFileError: If reading fails

    Example:

        with AudioFile("audio.wav") as audio:
            data = audio.read_as_numpy()
            print(f"Shape: {data.shape}, dtype: {data.dtype}")

        # output: Shape: (44100, 2), dtype: int16
    """
    if not NUMPY_AVAILABLE:
        raise ImportError(
            "NumPy is not available. Install numpy to use this feature."
        )

    if start_packet < 0:
        raise ValueError(f"start_packet must be non-negative, got {start_packet}")
    if packet_count is not None and packet_count <= 0:
        raise ValueError(f"packet_count must be positive, got {packet_count}")

    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        # Get format information
        format = self.format

        # If packet_count not specified, read all remaining packets
        if packet_count is None:
            # Get total packet count from file
            packet_count_data = capi.audio_file_get_property(
                self.object_id,
                capi.get_audio_file_property_audio_data_packet_count(),
            )
            if len(packet_count_data) >= 8:
                total_packets = struct.unpack("<Q", packet_count_data[:8])[0]
                packet_count = total_packets - start_packet
            else:
                raise AudioFileError("Cannot determine packet count")

        # Read the raw audio data
        data_bytes, actual_count = capi.audio_file_read_packets(
            self.object_id, start_packet, packet_count
        )

        # Get NumPy dtype from format
        dtype = format.to_numpy_dtype()

        # Convert bytes to NumPy array
        audio_data = np.frombuffer(data_bytes, dtype=dtype)

        # Reshape for multi-channel audio
        # Audio data is typically interleaved: L R L R L R ...
        if format.channels_per_frame > 1:
            # Calculate number of frames
            samples_per_frame = format.channels_per_frame
            num_frames = len(audio_data) // samples_per_frame

            # Reshape to (frames, channels)
            audio_data = audio_data[: num_frames * samples_per_frame].reshape(
                num_frames, samples_per_frame
            )

        return audio_data

    except Exception as e:
        if isinstance(e, (ImportError, AudioFileError)):
            raise
        raise AudioFileError(f"Failed to read as NumPy array: {e}")

get_property(property_id)

Get a property from the audio file

Source code in src/coremusic/audio/core.py
def get_property(self, property_id: int) -> bytes:
    """Get a property from the audio file"""
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        return capi.audio_file_get_property(self.object_id, property_id)
    except Exception as e:
        raise AudioFileError(f"Failed to get property: {e}")

set_property(property_id, data)

Set a property on the audio file.

The file must have been opened with writable=True.

Source code in src/coremusic/audio/core.py
def set_property(self, property_id: int, data: bytes) -> None:
    """Set a property on the audio file.

    The file must have been opened with writable=True.
    """
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        capi.audio_file_set_property(self.object_id, property_id, data)
    except Exception as e:
        raise AudioFileError(f"Failed to set property: {e}")

set_metadata(tags)

Write metadata tags to the audio file.

The file must have been opened with writable=True. Keys should be strings. Values can be str, int, or float.

Common keys: 'title', 'artist', 'album', 'genre', 'year', 'track number', 'comments', 'approximate duration in seconds'.

Source code in src/coremusic/audio/core.py
def set_metadata(self, tags: dict[str, Any]) -> None:
    """Write metadata tags to the audio file.

    The file must have been opened with writable=True.
    Keys should be strings. Values can be str, int, or float.

    Common keys: 'title', 'artist', 'album', 'genre', 'year',
    'track number', 'comments', 'approximate duration in seconds'.
    """
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()
    if not self._writable:
        raise AudioFileError(
            "File not opened for writing. Use AudioFile(path, writable=True)."
        )

    try:
        capi.audio_file_write_info_dictionary(self.object_id, tags)
    except Exception as e:
        raise AudioFileError(f"Failed to write metadata: {e}")

__repr__()

Source code in src/coremusic/audio/core.py
def __repr__(self) -> str:
    status = "open" if self._is_open else "closed"
    return f"AudioFile({self._path}, {status})"

dispose()

Dispose of the audio file

Source code in src/coremusic/audio/core.py
def dispose(self) -> None:
    """Dispose of the audio file"""
    if not self.is_disposed:
        if self._is_open:
            try:
                capi.audio_file_close(self.object_id)
            except Exception:
                pass  # Best effort cleanup
            finally:
                self._is_open = False
        super().dispose()

AudioFormat Class

The AudioFormat class represents audio stream format information.

import coremusic as cm

# Access format from audio file
with cm.AudioFile("audio.wav") as audio:
    fmt = audio.format
    print(f"Sample rate: {fmt.sample_rate}Hz")
    print(f"Channels: {fmt.channels_per_frame}")
    print(f"Bit depth: {fmt.bits_per_channel}")

# Create custom format
format = cm.AudioFormat(
    sample_rate=44100.0,
    format_id='lpcm',
    channels_per_frame=2,
    bits_per_channel=16
)

Class Reference

coremusic.audio.AudioFormat

Pythonic representation of AudioStreamBasicDescription

Source code in src/coremusic/audio/core.py
class AudioFormat:
    """Pythonic representation of AudioStreamBasicDescription"""

    def __init__(
        self,
        sample_rate: float,
        format_id: str,
        format_flags: int = 0,
        bytes_per_packet: int = 0,
        frames_per_packet: int = 0,
        bytes_per_frame: int = 0,
        channels_per_frame: int = 2,
        bits_per_channel: int = 16,
    ):
        self.sample_rate = sample_rate
        self.format_id = format_id
        self.format_flags = format_flags
        self.bytes_per_packet = bytes_per_packet
        self.frames_per_packet = frames_per_packet
        self.bytes_per_frame = bytes_per_frame
        self.channels_per_frame = channels_per_frame
        self.bits_per_channel = bits_per_channel

    @classmethod
    def from_asbd_bytes(cls, data: bytes) -> "AudioFormat":
        """Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

        Args:
            data: Raw ASBD bytes (at least 40 bytes)

        Returns:
            AudioFormat with parsed fields

        Raises:
            ValueError: If data is too short
        """
        if len(data) < 40:
            raise ValueError(f"ASBD data too short: {len(data)} bytes (need 40)")
        (
            sample_rate,
            format_id_int,
            format_flags,
            bytes_per_packet,
            frames_per_packet,
            bytes_per_frame,
            channels_per_frame,
            bits_per_channel,
            _reserved,
        ) = struct.unpack("<dLLLLLLLL", data[:40])
        format_id = capi.int_to_fourchar(format_id_int)
        return cls(
            sample_rate=sample_rate,
            format_id=format_id,
            format_flags=format_flags,
            bytes_per_packet=bytes_per_packet,
            frames_per_packet=frames_per_packet,
            bytes_per_frame=bytes_per_frame,
            channels_per_frame=channels_per_frame,
            bits_per_channel=bits_per_channel,
        )

    @classmethod
    def pcm(
        cls,
        sample_rate: float = 44100.0,
        channels: int = 2,
        bits: int = 16,
        is_float: bool = False,
    ) -> "AudioFormat":
        """Create a PCM AudioFormat with correctly computed derived fields.

        Args:
            sample_rate: Sample rate in Hz (default: 44100.0)
            channels: Number of channels (default: 2)
            bits: Bits per sample (default: 16)
            is_float: If True, create float format; otherwise signed integer

        Returns:
            AudioFormat with all ASBD fields correctly computed
        """
        bytes_per_sample = bits // 8
        bytes_per_frame = bytes_per_sample * channels
        flags = 0
        if is_float:
            flags |= 1  # kAudioFormatFlagIsFloat
        else:
            flags |= 4 | 2  # kAudioFormatFlagIsPacked | kAudioFormatFlagIsSignedInteger
        return cls(
            sample_rate=sample_rate,
            format_id="lpcm",
            format_flags=flags,
            bytes_per_packet=bytes_per_frame,
            frames_per_packet=1,
            bytes_per_frame=bytes_per_frame,
            channels_per_frame=channels,
            bits_per_channel=bits,
        )

    @property
    def is_pcm(self) -> bool:
        """Check if this is a PCM format"""
        return self.format_id == "lpcm"

    @property
    def is_stereo(self) -> bool:
        """Check if this is stereo (2 channels)"""
        return self.channels_per_frame == 2

    @property
    def is_mono(self) -> bool:
        """Check if this is mono (1 channel)"""
        return self.channels_per_frame == 1

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary format for functional API"""
        format_id_int = (
            capi.fourchar_to_int(self.format_id)
            if isinstance(self.format_id, str)
            else self.format_id
        )

        return {
            "sample_rate": self.sample_rate,
            "format_id": format_id_int,
            "format_flags": self.format_flags,
            "bytes_per_packet": self.bytes_per_packet,
            "frames_per_packet": self.frames_per_packet,
            "bytes_per_frame": self.bytes_per_frame,
            "channels_per_frame": self.channels_per_frame,
            "bits_per_channel": self.bits_per_channel,
        }

    def to_numpy_dtype(self) -> "np.dtype[Any]":
        """
        Convert audio format to NumPy dtype for audio data arrays.

        Returns:
            NumPy dtype object suitable for audio data representation

        Raises:
            ImportError: If NumPy is not available
            ValueError: If format cannot be converted to NumPy dtype
        """
        if not NUMPY_AVAILABLE:
            raise ImportError(
                "NumPy is not available. Install numpy to use this feature."
            )

        # Handle PCM formats
        if self.is_pcm:
            # Check if float or integer
            is_float = bool(self.format_flags & 1)  # kAudioFormatFlagIsFloat
            is_signed = not bool(
                self.format_flags & 2
            )  # kAudioFormatFlagIsSignedInteger

            if is_float:
                if self.bits_per_channel == 32:
                    return np.dtype(np.float32)
                elif self.bits_per_channel == 64:
                    return np.dtype(np.float64)
                else:
                    raise ValueError(
                        f"Unsupported float bit depth: {self.bits_per_channel}"
                    )
            else:
                # Integer formats
                if self.bits_per_channel == 8:
                    return np.dtype(np.int8 if is_signed else np.uint8)
                elif self.bits_per_channel == 16:
                    return np.dtype(np.int16)
                elif self.bits_per_channel == 24:
                    # 24-bit audio is typically padded to 32-bit
                    return np.dtype(np.int32)
                elif self.bits_per_channel == 32:
                    return np.dtype(np.int32)
                else:
                    raise ValueError(
                        f"Unsupported integer bit depth: {self.bits_per_channel}"
                    )
        else:
            raise ValueError(
                f"Cannot convert non-PCM format '{self.format_id}' to NumPy dtype"
            )

    def __repr__(self) -> str:
        return (
            f"AudioFormat({self.sample_rate}Hz, {self.format_id}, "
            f"channels={self.channels_per_frame}, bits={self.bits_per_channel})"
        )

sample_rate = sample_rate instance-attribute

format_id = format_id instance-attribute

format_flags = format_flags instance-attribute

bytes_per_packet = bytes_per_packet instance-attribute

frames_per_packet = frames_per_packet instance-attribute

bytes_per_frame = bytes_per_frame instance-attribute

channels_per_frame = channels_per_frame instance-attribute

bits_per_channel = bits_per_channel instance-attribute

is_pcm property

Check if this is a PCM format

is_stereo property

Check if this is stereo (2 channels)

is_mono property

Check if this is mono (1 channel)

__init__(sample_rate, format_id, format_flags=0, bytes_per_packet=0, frames_per_packet=0, bytes_per_frame=0, channels_per_frame=2, bits_per_channel=16)

Source code in src/coremusic/audio/core.py
def __init__(
    self,
    sample_rate: float,
    format_id: str,
    format_flags: int = 0,
    bytes_per_packet: int = 0,
    frames_per_packet: int = 0,
    bytes_per_frame: int = 0,
    channels_per_frame: int = 2,
    bits_per_channel: int = 16,
):
    self.sample_rate = sample_rate
    self.format_id = format_id
    self.format_flags = format_flags
    self.bytes_per_packet = bytes_per_packet
    self.frames_per_packet = frames_per_packet
    self.bytes_per_frame = bytes_per_frame
    self.channels_per_frame = channels_per_frame
    self.bits_per_channel = bits_per_channel

from_asbd_bytes(data) classmethod

Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

Parameters:

Name Type Description Default
data bytes

Raw ASBD bytes (at least 40 bytes)

required

Returns:

Type Description
'AudioFormat'

AudioFormat with parsed fields

Raises:

Type Description
ValueError

If data is too short

Source code in src/coremusic/audio/core.py
@classmethod
def from_asbd_bytes(cls, data: bytes) -> "AudioFormat":
    """Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

    Args:
        data: Raw ASBD bytes (at least 40 bytes)

    Returns:
        AudioFormat with parsed fields

    Raises:
        ValueError: If data is too short
    """
    if len(data) < 40:
        raise ValueError(f"ASBD data too short: {len(data)} bytes (need 40)")
    (
        sample_rate,
        format_id_int,
        format_flags,
        bytes_per_packet,
        frames_per_packet,
        bytes_per_frame,
        channels_per_frame,
        bits_per_channel,
        _reserved,
    ) = struct.unpack("<dLLLLLLLL", data[:40])
    format_id = capi.int_to_fourchar(format_id_int)
    return cls(
        sample_rate=sample_rate,
        format_id=format_id,
        format_flags=format_flags,
        bytes_per_packet=bytes_per_packet,
        frames_per_packet=frames_per_packet,
        bytes_per_frame=bytes_per_frame,
        channels_per_frame=channels_per_frame,
        bits_per_channel=bits_per_channel,
    )

pcm(sample_rate=44100.0, channels=2, bits=16, is_float=False) classmethod

Create a PCM AudioFormat with correctly computed derived fields.

Parameters:

Name Type Description Default
sample_rate float

Sample rate in Hz (default: 44100.0)

44100.0
channels int

Number of channels (default: 2)

2
bits int

Bits per sample (default: 16)

16
is_float bool

If True, create float format; otherwise signed integer

False

Returns:

Type Description
'AudioFormat'

AudioFormat with all ASBD fields correctly computed

Source code in src/coremusic/audio/core.py
@classmethod
def pcm(
    cls,
    sample_rate: float = 44100.0,
    channels: int = 2,
    bits: int = 16,
    is_float: bool = False,
) -> "AudioFormat":
    """Create a PCM AudioFormat with correctly computed derived fields.

    Args:
        sample_rate: Sample rate in Hz (default: 44100.0)
        channels: Number of channels (default: 2)
        bits: Bits per sample (default: 16)
        is_float: If True, create float format; otherwise signed integer

    Returns:
        AudioFormat with all ASBD fields correctly computed
    """
    bytes_per_sample = bits // 8
    bytes_per_frame = bytes_per_sample * channels
    flags = 0
    if is_float:
        flags |= 1  # kAudioFormatFlagIsFloat
    else:
        flags |= 4 | 2  # kAudioFormatFlagIsPacked | kAudioFormatFlagIsSignedInteger
    return cls(
        sample_rate=sample_rate,
        format_id="lpcm",
        format_flags=flags,
        bytes_per_packet=bytes_per_frame,
        frames_per_packet=1,
        bytes_per_frame=bytes_per_frame,
        channels_per_frame=channels,
        bits_per_channel=bits,
    )

to_dict()

Convert to dictionary format for functional API

Source code in src/coremusic/audio/core.py
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary format for functional API"""
    format_id_int = (
        capi.fourchar_to_int(self.format_id)
        if isinstance(self.format_id, str)
        else self.format_id
    )

    return {
        "sample_rate": self.sample_rate,
        "format_id": format_id_int,
        "format_flags": self.format_flags,
        "bytes_per_packet": self.bytes_per_packet,
        "frames_per_packet": self.frames_per_packet,
        "bytes_per_frame": self.bytes_per_frame,
        "channels_per_frame": self.channels_per_frame,
        "bits_per_channel": self.bits_per_channel,
    }

to_numpy_dtype()

Convert audio format to NumPy dtype for audio data arrays.

Returns:

Type Description
'np.dtype[Any]'

NumPy dtype object suitable for audio data representation

Raises:

Type Description
ImportError

If NumPy is not available

ValueError

If format cannot be converted to NumPy dtype

Source code in src/coremusic/audio/core.py
def to_numpy_dtype(self) -> "np.dtype[Any]":
    """
    Convert audio format to NumPy dtype for audio data arrays.

    Returns:
        NumPy dtype object suitable for audio data representation

    Raises:
        ImportError: If NumPy is not available
        ValueError: If format cannot be converted to NumPy dtype
    """
    if not NUMPY_AVAILABLE:
        raise ImportError(
            "NumPy is not available. Install numpy to use this feature."
        )

    # Handle PCM formats
    if self.is_pcm:
        # Check if float or integer
        is_float = bool(self.format_flags & 1)  # kAudioFormatFlagIsFloat
        is_signed = not bool(
            self.format_flags & 2
        )  # kAudioFormatFlagIsSignedInteger

        if is_float:
            if self.bits_per_channel == 32:
                return np.dtype(np.float32)
            elif self.bits_per_channel == 64:
                return np.dtype(np.float64)
            else:
                raise ValueError(
                    f"Unsupported float bit depth: {self.bits_per_channel}"
                )
        else:
            # Integer formats
            if self.bits_per_channel == 8:
                return np.dtype(np.int8 if is_signed else np.uint8)
            elif self.bits_per_channel == 16:
                return np.dtype(np.int16)
            elif self.bits_per_channel == 24:
                # 24-bit audio is typically padded to 32-bit
                return np.dtype(np.int32)
            elif self.bits_per_channel == 32:
                return np.dtype(np.int32)
            else:
                raise ValueError(
                    f"Unsupported integer bit depth: {self.bits_per_channel}"
                )
    else:
        raise ValueError(
            f"Cannot convert non-PCM format '{self.format_id}' to NumPy dtype"
        )

__repr__()

Source code in src/coremusic/audio/core.py
def __repr__(self) -> str:
    return (
        f"AudioFormat({self.sample_rate}Hz, {self.format_id}, "
        f"channels={self.channels_per_frame}, bits={self.bits_per_channel})"
    )

Functional API

The functional API provides direct access to CoreAudio file operations through the coremusic.capi module.

Note

The object-oriented AudioFile API is recommended for most use cases. Use the functional API only when you need fine-grained control.

Opening and Closing Files

Example:

import coremusic.capi as capi

# Open audio file
file_id = capi.audio_file_open_url("audio.wav")
try:
    # Use file...
    pass
finally:
    capi.audio_file_close(file_id)

Reading Audio Data

Example:

import coremusic.capi as capi

file_id = capi.audio_file_open_url("audio.wav")
try:
    # Read 1000 packets starting from packet 0
    data, packets_read = capi.audio_file_read_packets(file_id, 0, 1000)
    print(f"Read {packets_read} packets, {len(data)} bytes")
finally:
    capi.audio_file_close(file_id)

File Properties

Example:

import coremusic.capi as capi

file_id = capi.audio_file_open_url("audio.wav")
try:
    # Get audio format
    format_data = capi.audio_file_get_property(
        file_id,
        capi.get_audio_file_property_data_format()
    )
    print(f"Format: {format_data}")
finally:
    capi.audio_file_close(file_id)

Supported Formats

coremusic supports all audio formats supported by CoreAudio, including:

Common Formats

  • WAV (Waveform Audio File Format)
  • AIFF (Audio Interchange File Format)
  • MP3 (MPEG-1 Audio Layer 3)
  • AAC (Advanced Audio Coding)
  • ALAC (Apple Lossless Audio Codec)
  • FLAC (Free Lossless Audio Codec)

Format IDs

Common format IDs (FourCC codes):

  • 'lpcm' - Linear PCM (uncompressed)
  • 'aac ' - AAC
  • '.mp3' - MP3
  • 'alac' - Apple Lossless
  • 'flac' - FLAC

Format Flags

For Linear PCM, common format flags include:

  • Float vs Integer
  • Big Endian vs Little Endian
  • Packed vs Aligned
  • Signed vs Unsigned

Use the provided constant functions to get appropriate flags:

import coremusic.capi as capi

# Get standard format flags
flags = capi.get_audio_format_flag_is_float() | \
        capi.get_audio_format_flag_is_packed()

Examples

Read Entire Audio File

import coremusic as cm

def read_audio_file(filepath):
    """Read entire audio file into memory."""
    with cm.AudioFile(filepath) as audio:
        # Get total frame count
        total_frames = audio.frame_count

        # Read all data
        data, count = audio.read_packets(0, total_frames)

        return {
            'data': data,
            'sample_rate': audio.format.sample_rate,
            'channels': audio.format.channels_per_frame,
            'format': audio.format.format_id
        }

# Use the function
audio_data = read_audio_file("audio.wav")
print(f"Loaded {len(audio_data['data'])} bytes")

Process Audio in Chunks

import coremusic as cm

def process_audio_chunks(filepath, chunk_size=1024):
    """Process audio file in chunks."""
    with cm.AudioFile(filepath) as audio:
        total_frames = audio.frame_count
        current_frame = 0

        while current_frame < total_frames:
            # Calculate chunk size
            frames_to_read = min(chunk_size, total_frames - current_frame)

            # Read chunk
            data, count = audio.read_packets(current_frame, frames_to_read)

            # Process chunk
            process_audio_data(data)

            current_frame += count

def process_audio_data(data):
    """Process audio data chunk."""
    # Your processing logic here
    pass

Audio Format Conversion

import coremusic as cm

def convert_audio_format(input_path, output_path, target_format):
    """Convert audio file to different format."""
    # Open input file
    with cm.AudioFile(input_path) as input_audio:
        # Create converter
        converter = cm.AudioConverter(input_audio.format, target_format)

        # Read and convert
        data, count = input_audio.read_packets(0, input_audio.frame_count)
        converted_data = converter.convert(data, count)

        # Write to output file
        # (implementation depends on output requirements)

See Also