Audio File Operations¶

The audio file module provides functionality for reading and writing audio files in various formats.

Object-Oriented API¶

AudioFile Class¶

The AudioFile class provides high-level audio file operations with automatic resource management.

import coremusic as cm

# Context manager usage (recommended)
with cm.AudioFile("audio.wav") as audio:
    print(f"Duration: {audio.duration:.2f}s")
    data, count = audio.read_packets(0, 1000)

# Explicit management
audio = cm.AudioFile("audio.wav")
audio.open()
try:
    data = audio.read_packets(0, 1000)
finally:
    audio.close()

Class Reference¶

`coremusic.audio.AudioFile` ¶

Bases: CoreAudioObject

High-level audio file operations with automatic resource management

Source code in src/coremusic/audio/core.py

class AudioFile(capi.CoreAudioObject):
    """High-level audio file operations with automatic resource management"""

    def __init__(self, path: str | Path, *, writable: bool = False):
        super().__init__()
        self._path = str(path)
        self._format: AudioFormat | None = None
        self._is_open = False
        self._writable = writable

    def open(self) -> "AudioFile":
        """Open the audio file"""
        self._ensure_not_disposed()
        if not self._is_open:
            try:
                permissions = 3 if self._writable else 1  # READ_WRITE or READ
                file_id = capi.audio_file_open_url(self._path, permissions)
                self._set_object_id(file_id)
                self._is_open = True
            except Exception as e:
                raise AudioFileError(f"Failed to open file {self._path}: {e}")
        return self

    def close(self) -> None:
        """Close the audio file"""
        if self._is_open:
            try:
                capi.audio_file_close(self.object_id)
            except Exception as e:
                raise AudioFileError(f"Failed to close file: {e}")
            finally:
                self._is_open = False
                self.dispose()

    def __enter__(self) -> "AudioFile":
        self.open()
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        self.close()

    @property
    def format(self) -> AudioFormat:
        """Get the audio format of the file"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        if self._format is None:
            try:
                format_data = capi.audio_file_get_property(
                    self.object_id, capi.get_audio_file_property_data_format()
                )
                self._format = AudioFormat.from_asbd_bytes(format_data)
            except Exception as e:
                raise AudioFileError(f"Failed to get format: {e}")

        return self._format

    def read_packets(self, start_packet: int, packet_count: int) -> tuple[bytes, int]:
        """Read audio packets from the file.

        Args:
            start_packet: Starting packet index (must be non-negative)
            packet_count: Number of packets to read (must be positive)

        Returns:
            Tuple of (audio_data_bytes, packets_read)

        Raises:
            ValueError: If start_packet < 0 or packet_count <= 0
            AudioFileError: If reading fails

        Example::

            from coremusic.audio import AudioFile

            # Read audio data in chunks
            with AudioFile("audio.wav") as audio:
                chunk_size = 4096
                offset = 0

                while True:
                    data, packets_read = audio.read_packets(offset, chunk_size)
                    if packets_read == 0:
                        break
                    # Process data...
                    offset += packets_read
        """
        if start_packet < 0:
            raise ValueError(f"start_packet must be non-negative, got {start_packet}")
        if packet_count <= 0:
            raise ValueError(f"packet_count must be positive, got {packet_count}")

        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_read_packets(
                self.object_id, start_packet, packet_count
            )
        except Exception as e:
            raise AudioFileError(f"Failed to read packets: {e}")

    def read_as_numpy(
        self, start_packet: int = 0, packet_count: int | None = None
    ) -> "NDArray[Any]":
        """
        Read audio data from the file as a NumPy array.

        Args:
            start_packet: Starting packet index (default: 0)
            packet_count: Number of packets to read (default: all remaining packets)

        Returns:
            NumPy array with shape (frames, channels) for multi-channel audio,
            or (frames,) for mono audio. The dtype is determined by the audio format.

        Raises:
            ImportError: If NumPy is not available
            AudioFileError: If reading fails

        Example:

            with AudioFile("audio.wav") as audio:
                data = audio.read_as_numpy()
                print(f"Shape: {data.shape}, dtype: {data.dtype}")

            # output: Shape: (44100, 2), dtype: int16
        """
        if not NUMPY_AVAILABLE:
            raise ImportError(
                "NumPy is not available. Install numpy to use this feature."
            )

        if start_packet < 0:
            raise ValueError(f"start_packet must be non-negative, got {start_packet}")
        if packet_count is not None and packet_count <= 0:
            raise ValueError(f"packet_count must be positive, got {packet_count}")

        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            # Get format information
            format = self.format

            # If packet_count not specified, read all remaining packets
            if packet_count is None:
                # Get total packet count from file
                packet_count_data = capi.audio_file_get_property(
                    self.object_id,
                    capi.get_audio_file_property_audio_data_packet_count(),
                )
                if len(packet_count_data) >= 8:
                    total_packets = struct.unpack("<Q", packet_count_data[:8])[0]
                    packet_count = total_packets - start_packet
                else:
                    raise AudioFileError("Cannot determine packet count")

            # Read the raw audio data
            data_bytes, actual_count = capi.audio_file_read_packets(
                self.object_id, start_packet, packet_count
            )

            # Get NumPy dtype from format
            dtype = format.to_numpy_dtype()

            # Convert bytes to NumPy array
            audio_data = np.frombuffer(data_bytes, dtype=dtype)

            # Reshape for multi-channel audio
            # Audio data is typically interleaved: L R L R L R ...
            if format.channels_per_frame > 1:
                # Calculate number of frames
                samples_per_frame = format.channels_per_frame
                num_frames = len(audio_data) // samples_per_frame

                # Reshape to (frames, channels)
                audio_data = audio_data[: num_frames * samples_per_frame].reshape(
                    num_frames, samples_per_frame
                )

            return audio_data

        except Exception as e:
            if isinstance(e, (ImportError, AudioFileError)):
                raise
            raise AudioFileError(f"Failed to read as NumPy array: {e}")

    def get_property(self, property_id: int) -> bytes:
        """Get a property from the audio file"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_get_property(self.object_id, property_id)
        except Exception as e:
            raise AudioFileError(f"Failed to get property: {e}")

    def set_property(self, property_id: int, data: bytes) -> None:
        """Set a property on the audio file.

        The file must have been opened with writable=True.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            capi.audio_file_set_property(self.object_id, property_id, data)
        except Exception as e:
            raise AudioFileError(f"Failed to set property: {e}")

    @property
    def metadata(self) -> dict[str, Any] | None:
        """Read the info dictionary metadata from the audio file.

        Returns a dict with string keys and string/number/bytes values,
        or None if the file format does not support metadata.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            return capi.audio_file_read_info_dictionary(self.object_id)
        except Exception:
            return None

    def set_metadata(self, tags: dict[str, Any]) -> None:
        """Write metadata tags to the audio file.

        The file must have been opened with writable=True.
        Keys should be strings. Values can be str, int, or float.

        Common keys: 'title', 'artist', 'album', 'genre', 'year',
        'track number', 'comments', 'approximate duration in seconds'.
        """
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()
        if not self._writable:
            raise AudioFileError(
                "File not opened for writing. Use AudioFile(path, writable=True)."
            )

        try:
            capi.audio_file_write_info_dictionary(self.object_id, tags)
        except Exception as e:
            raise AudioFileError(f"Failed to write metadata: {e}")

    @property
    def duration(self) -> float:
        """Duration in seconds"""
        self._ensure_not_disposed()
        if not self._is_open:
            self.open()

        try:
            # Try to get estimated duration property
            duration_data = capi.audio_file_get_property(
                self.object_id, capi.get_audio_file_property_estimated_duration()
            )
            if len(duration_data) >= 8:
                # Duration is a Float64 (double)
                duration: float = struct.unpack("<d", duration_data[:8])[0]
                return duration
            else:
                # Fallback: calculate from packet count and sample rate
                packet_count_data = capi.audio_file_get_property(
                    self.object_id,
                    capi.get_audio_file_property_audio_data_packet_count(),
                )
                if len(packet_count_data) >= 8:
                    packet_count = struct.unpack("<Q", packet_count_data[:8])[0]
                    format = self.format
                    if format.sample_rate > 0:
                        calculated: float = (
                            packet_count * format.frames_per_packet / format.sample_rate
                        )
                        return calculated
                return 0.0
        except Exception:
            # If all methods fail, return 0.0
            return 0.0

    def __repr__(self) -> str:
        status = "open" if self._is_open else "closed"
        return f"AudioFile({self._path}, {status})"

    def dispose(self) -> None:
        """Dispose of the audio file"""
        if not self.is_disposed:
            if self._is_open:
                try:
                    capi.audio_file_close(self.object_id)
                except Exception:
                    pass  # Best effort cleanup
                finally:
                    self._is_open = False
            super().dispose()

`_path = str(path)` `instance-attribute` ¶

`_format = None` `instance-attribute` ¶

`_is_open = False` `instance-attribute` ¶

`_writable = writable` `instance-attribute` ¶

`format` `property` ¶

Get the audio format of the file

`metadata` `property` ¶

Read the info dictionary metadata from the audio file.

Returns a dict with string keys and string/number/bytes values, or None if the file format does not support metadata.

`duration` `property` ¶

Duration in seconds

`init(path, *, writable=False)` ¶

Source code in src/coremusic/audio/core.py

def __init__(self, path: str | Path, *, writable: bool = False):
    super().__init__()
    self._path = str(path)
    self._format: AudioFormat | None = None
    self._is_open = False
    self._writable = writable

`open()` ¶

Open the audio file

Source code in src/coremusic/audio/core.py

def open(self) -> "AudioFile":
    """Open the audio file"""
    self._ensure_not_disposed()
    if not self._is_open:
        try:
            permissions = 3 if self._writable else 1  # READ_WRITE or READ
            file_id = capi.audio_file_open_url(self._path, permissions)
            self._set_object_id(file_id)
            self._is_open = True
        except Exception as e:
            raise AudioFileError(f"Failed to open file {self._path}: {e}")
    return self

`close()` ¶

Close the audio file

Source code in src/coremusic/audio/core.py

def close(self) -> None:
    """Close the audio file"""
    if self._is_open:
        try:
            capi.audio_file_close(self.object_id)
        except Exception as e:
            raise AudioFileError(f"Failed to close file: {e}")
        finally:
            self._is_open = False
            self.dispose()

`enter()` ¶

Source code in src/coremusic/audio/core.py

def __enter__(self) -> "AudioFile":
    self.open()
    return self

`exit(exc_type, exc_val, exc_tb)` ¶

Source code in src/coremusic/audio/core.py

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
    self.close()

`read_packets(start_packet, packet_count)` ¶

Read audio packets from the file.

Parameters:

Name	Type	Description	Default
`start_packet`	`int`	Starting packet index (must be non-negative)	required
`packet_count`	`int`	Number of packets to read (must be positive)	required

Returns:

Type	Description
`tuple[bytes, int]`	Tuple of (audio_data_bytes, packets_read)

Raises:

Type	Description
`ValueError`	If start_packet < 0 or packet_count <= 0
`AudioFileError`	If reading fails

Example::

from coremusic.audio import AudioFile

# Read audio data in chunks
with AudioFile("audio.wav") as audio:
    chunk_size = 4096
    offset = 0

    while True:
        data, packets_read = audio.read_packets(offset, chunk_size)
        if packets_read == 0:
            break
        # Process data...
        offset += packets_read

Source code in src/coremusic/audio/core.py

def read_packets(self, start_packet: int, packet_count: int) -> tuple[bytes, int]:
    """Read audio packets from the file.

    Args:
        start_packet: Starting packet index (must be non-negative)
        packet_count: Number of packets to read (must be positive)

    Returns:
        Tuple of (audio_data_bytes, packets_read)

    Raises:
        ValueError: If start_packet < 0 or packet_count <= 0
        AudioFileError: If reading fails

    Example::

        from coremusic.audio import AudioFile

        # Read audio data in chunks
        with AudioFile("audio.wav") as audio:
            chunk_size = 4096
            offset = 0

            while True:
                data, packets_read = audio.read_packets(offset, chunk_size)
                if packets_read == 0:
                    break
                # Process data...
                offset += packets_read
    """
    if start_packet < 0:
        raise ValueError(f"start_packet must be non-negative, got {start_packet}")
    if packet_count <= 0:
        raise ValueError(f"packet_count must be positive, got {packet_count}")

    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        return capi.audio_file_read_packets(
            self.object_id, start_packet, packet_count
        )
    except Exception as e:
        raise AudioFileError(f"Failed to read packets: {e}")

`read_as_numpy(start_packet=0, packet_count=None)` ¶

Read audio data from the file as a NumPy array.

Parameters:

Name	Type	Description	Default
`start_packet`	`int`	Starting packet index (default: 0)	`0`
`packet_count`	`int \| None`	Number of packets to read (default: all remaining packets)	`None`

Returns:

Type	Description
`'NDArray[Any]'`	NumPy array with shape (frames, channels) for multi-channel audio,
`'NDArray[Any]'`	or (frames,) for mono audio. The dtype is determined by the audio format.

Raises:

Type	Description
`ImportError`	If NumPy is not available
`AudioFileError`	If reading fails

Example:

with AudioFile("audio.wav") as audio:
    data = audio.read_as_numpy()
    print(f"Shape: {data.shape}, dtype: {data.dtype}")

# output: Shape: (44100, 2), dtype: int16

Source code in src/coremusic/audio/core.py

def read_as_numpy(
    self, start_packet: int = 0, packet_count: int | None = None
) -> "NDArray[Any]":
    """
    Read audio data from the file as a NumPy array.

    Args:
        start_packet: Starting packet index (default: 0)
        packet_count: Number of packets to read (default: all remaining packets)

    Returns:
        NumPy array with shape (frames, channels) for multi-channel audio,
        or (frames,) for mono audio. The dtype is determined by the audio format.

    Raises:
        ImportError: If NumPy is not available
        AudioFileError: If reading fails

    Example:

        with AudioFile("audio.wav") as audio:
            data = audio.read_as_numpy()
            print(f"Shape: {data.shape}, dtype: {data.dtype}")

        # output: Shape: (44100, 2), dtype: int16
    """
    if not NUMPY_AVAILABLE:
        raise ImportError(
            "NumPy is not available. Install numpy to use this feature."
        )

    if start_packet < 0:
        raise ValueError(f"start_packet must be non-negative, got {start_packet}")
    if packet_count is not None and packet_count <= 0:
        raise ValueError(f"packet_count must be positive, got {packet_count}")

    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        # Get format information
        format = self.format

        # If packet_count not specified, read all remaining packets
        if packet_count is None:
            # Get total packet count from file
            packet_count_data = capi.audio_file_get_property(
                self.object_id,
                capi.get_audio_file_property_audio_data_packet_count(),
            )
            if len(packet_count_data) >= 8:
                total_packets = struct.unpack("<Q", packet_count_data[:8])[0]
                packet_count = total_packets - start_packet
            else:
                raise AudioFileError("Cannot determine packet count")

        # Read the raw audio data
        data_bytes, actual_count = capi.audio_file_read_packets(
            self.object_id, start_packet, packet_count
        )

        # Get NumPy dtype from format
        dtype = format.to_numpy_dtype()

        # Convert bytes to NumPy array
        audio_data = np.frombuffer(data_bytes, dtype=dtype)

        # Reshape for multi-channel audio
        # Audio data is typically interleaved: L R L R L R ...
        if format.channels_per_frame > 1:
            # Calculate number of frames
            samples_per_frame = format.channels_per_frame
            num_frames = len(audio_data) // samples_per_frame

            # Reshape to (frames, channels)
            audio_data = audio_data[: num_frames * samples_per_frame].reshape(
                num_frames, samples_per_frame
            )

        return audio_data

    except Exception as e:
        if isinstance(e, (ImportError, AudioFileError)):
            raise
        raise AudioFileError(f"Failed to read as NumPy array: {e}")

`get_property(property_id)` ¶

Get a property from the audio file

Source code in src/coremusic/audio/core.py

def get_property(self, property_id: int) -> bytes:
    """Get a property from the audio file"""
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        return capi.audio_file_get_property(self.object_id, property_id)
    except Exception as e:
        raise AudioFileError(f"Failed to get property: {e}")

`set_property(property_id, data)` ¶

Set a property on the audio file.

The file must have been opened with writable=True.

Source code in src/coremusic/audio/core.py

def set_property(self, property_id: int, data: bytes) -> None:
    """Set a property on the audio file.

    The file must have been opened with writable=True.
    """
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()

    try:
        capi.audio_file_set_property(self.object_id, property_id, data)
    except Exception as e:
        raise AudioFileError(f"Failed to set property: {e}")

`set_metadata(tags)` ¶

Write metadata tags to the audio file.

The file must have been opened with writable=True. Keys should be strings. Values can be str, int, or float.

Common keys: 'title', 'artist', 'album', 'genre', 'year', 'track number', 'comments', 'approximate duration in seconds'.

Source code in src/coremusic/audio/core.py

def set_metadata(self, tags: dict[str, Any]) -> None:
    """Write metadata tags to the audio file.

    The file must have been opened with writable=True.
    Keys should be strings. Values can be str, int, or float.

    Common keys: 'title', 'artist', 'album', 'genre', 'year',
    'track number', 'comments', 'approximate duration in seconds'.
    """
    self._ensure_not_disposed()
    if not self._is_open:
        self.open()
    if not self._writable:
        raise AudioFileError(
            "File not opened for writing. Use AudioFile(path, writable=True)."
        )

    try:
        capi.audio_file_write_info_dictionary(self.object_id, tags)
    except Exception as e:
        raise AudioFileError(f"Failed to write metadata: {e}")

`repr()` ¶

Source code in src/coremusic/audio/core.py

def __repr__(self) -> str:
    status = "open" if self._is_open else "closed"
    return f"AudioFile({self._path}, {status})"

`dispose()` ¶

Dispose of the audio file

Source code in src/coremusic/audio/core.py

def dispose(self) -> None:
    """Dispose of the audio file"""
    if not self.is_disposed:
        if self._is_open:
            try:
                capi.audio_file_close(self.object_id)
            except Exception:
                pass  # Best effort cleanup
            finally:
                self._is_open = False
        super().dispose()

AudioFormat Class¶

The AudioFormat class represents audio stream format information.

import coremusic as cm

# Access format from audio file
with cm.AudioFile("audio.wav") as audio:
    fmt = audio.format
    print(f"Sample rate: {fmt.sample_rate}Hz")
    print(f"Channels: {fmt.channels_per_frame}")
    print(f"Bit depth: {fmt.bits_per_channel}")

# Create custom format
format = cm.AudioFormat(
    sample_rate=44100.0,
    format_id='lpcm',
    channels_per_frame=2,
    bits_per_channel=16
)

Class Reference¶

`coremusic.audio.AudioFormat` ¶

Pythonic representation of AudioStreamBasicDescription

Source code in src/coremusic/audio/core.py

class AudioFormat:
    """Pythonic representation of AudioStreamBasicDescription"""

    def __init__(
        self,
        sample_rate: float,
        format_id: str,
        format_flags: int = 0,
        bytes_per_packet: int = 0,
        frames_per_packet: int = 0,
        bytes_per_frame: int = 0,
        channels_per_frame: int = 2,
        bits_per_channel: int = 16,
    ):
        self.sample_rate = sample_rate
        self.format_id = format_id
        self.format_flags = format_flags
        self.bytes_per_packet = bytes_per_packet
        self.frames_per_packet = frames_per_packet
        self.bytes_per_frame = bytes_per_frame
        self.channels_per_frame = channels_per_frame
        self.bits_per_channel = bits_per_channel

    @classmethod
    def from_asbd_bytes(cls, data: bytes) -> "AudioFormat":
        """Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

        Args:
            data: Raw ASBD bytes (at least 40 bytes)

        Returns:
            AudioFormat with parsed fields

        Raises:
            ValueError: If data is too short
        """
        if len(data) < 40:
            raise ValueError(f"ASBD data too short: {len(data)} bytes (need 40)")
        (
            sample_rate,
            format_id_int,
            format_flags,
            bytes_per_packet,
            frames_per_packet,
            bytes_per_frame,
            channels_per_frame,
            bits_per_channel,
            _reserved,
        ) = struct.unpack("<dLLLLLLLL", data[:40])
        format_id = capi.int_to_fourchar(format_id_int)
        return cls(
            sample_rate=sample_rate,
            format_id=format_id,
            format_flags=format_flags,
            bytes_per_packet=bytes_per_packet,
            frames_per_packet=frames_per_packet,
            bytes_per_frame=bytes_per_frame,
            channels_per_frame=channels_per_frame,
            bits_per_channel=bits_per_channel,
        )

    @classmethod
    def pcm(
        cls,
        sample_rate: float = 44100.0,
        channels: int = 2,
        bits: int = 16,
        is_float: bool = False,
    ) -> "AudioFormat":
        """Create a PCM AudioFormat with correctly computed derived fields.

        Args:
            sample_rate: Sample rate in Hz (default: 44100.0)
            channels: Number of channels (default: 2)
            bits: Bits per sample (default: 16)
            is_float: If True, create float format; otherwise signed integer

        Returns:
            AudioFormat with all ASBD fields correctly computed
        """
        bytes_per_sample = bits // 8
        bytes_per_frame = bytes_per_sample * channels
        flags = 0
        if is_float:
            flags |= 1  # kAudioFormatFlagIsFloat
        else:
            flags |= 4 | 2  # kAudioFormatFlagIsPacked | kAudioFormatFlagIsSignedInteger
        return cls(
            sample_rate=sample_rate,
            format_id="lpcm",
            format_flags=flags,
            bytes_per_packet=bytes_per_frame,
            frames_per_packet=1,
            bytes_per_frame=bytes_per_frame,
            channels_per_frame=channels,
            bits_per_channel=bits,
        )

    @property
    def is_pcm(self) -> bool:
        """Check if this is a PCM format"""
        return self.format_id == "lpcm"

    @property
    def is_stereo(self) -> bool:
        """Check if this is stereo (2 channels)"""
        return self.channels_per_frame == 2

    @property
    def is_mono(self) -> bool:
        """Check if this is mono (1 channel)"""
        return self.channels_per_frame == 1

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary format for functional API"""
        format_id_int = (
            capi.fourchar_to_int(self.format_id)
            if isinstance(self.format_id, str)
            else self.format_id
        )

        return {
            "sample_rate": self.sample_rate,
            "format_id": format_id_int,
            "format_flags": self.format_flags,
            "bytes_per_packet": self.bytes_per_packet,
            "frames_per_packet": self.frames_per_packet,
            "bytes_per_frame": self.bytes_per_frame,
            "channels_per_frame": self.channels_per_frame,
            "bits_per_channel": self.bits_per_channel,
        }

    def to_numpy_dtype(self) -> "np.dtype[Any]":
        """
        Convert audio format to NumPy dtype for audio data arrays.

        Returns:
            NumPy dtype object suitable for audio data representation

        Raises:
            ImportError: If NumPy is not available
            ValueError: If format cannot be converted to NumPy dtype
        """
        if not NUMPY_AVAILABLE:
            raise ImportError(
                "NumPy is not available. Install numpy to use this feature."
            )

        # Handle PCM formats
        if self.is_pcm:
            # Check if float or integer
            is_float = bool(self.format_flags & 1)  # kAudioFormatFlagIsFloat
            is_signed = not bool(
                self.format_flags & 2
            )  # kAudioFormatFlagIsSignedInteger

            if is_float:
                if self.bits_per_channel == 32:
                    return np.dtype(np.float32)
                elif self.bits_per_channel == 64:
                    return np.dtype(np.float64)
                else:
                    raise ValueError(
                        f"Unsupported float bit depth: {self.bits_per_channel}"
                    )
            else:
                # Integer formats
                if self.bits_per_channel == 8:
                    return np.dtype(np.int8 if is_signed else np.uint8)
                elif self.bits_per_channel == 16:
                    return np.dtype(np.int16)
                elif self.bits_per_channel == 24:
                    # 24-bit audio is typically padded to 32-bit
                    return np.dtype(np.int32)
                elif self.bits_per_channel == 32:
                    return np.dtype(np.int32)
                else:
                    raise ValueError(
                        f"Unsupported integer bit depth: {self.bits_per_channel}"
                    )
        else:
            raise ValueError(
                f"Cannot convert non-PCM format '{self.format_id}' to NumPy dtype"
            )

    def __repr__(self) -> str:
        return (
            f"AudioFormat({self.sample_rate}Hz, {self.format_id}, "
            f"channels={self.channels_per_frame}, bits={self.bits_per_channel})"
        )

`sample_rate = sample_rate` `instance-attribute` ¶

`format_id = format_id` `instance-attribute` ¶

`format_flags = format_flags` `instance-attribute` ¶

`bytes_per_packet = bytes_per_packet` `instance-attribute` ¶

`frames_per_packet = frames_per_packet` `instance-attribute` ¶

`bytes_per_frame = bytes_per_frame` `instance-attribute` ¶

`channels_per_frame = channels_per_frame` `instance-attribute` ¶

`bits_per_channel = bits_per_channel` `instance-attribute` ¶

`is_pcm` `property` ¶

Check if this is a PCM format

`is_stereo` `property` ¶

Check if this is stereo (2 channels)

`is_mono` `property` ¶

Check if this is mono (1 channel)

`init(sample_rate, format_id, format_flags=0, bytes_per_packet=0, frames_per_packet=0, bytes_per_frame=0, channels_per_frame=2, bits_per_channel=16)` ¶

Source code in src/coremusic/audio/core.py

def __init__(
    self,
    sample_rate: float,
    format_id: str,
    format_flags: int = 0,
    bytes_per_packet: int = 0,
    frames_per_packet: int = 0,
    bytes_per_frame: int = 0,
    channels_per_frame: int = 2,
    bits_per_channel: int = 16,
):
    self.sample_rate = sample_rate
    self.format_id = format_id
    self.format_flags = format_flags
    self.bytes_per_packet = bytes_per_packet
    self.frames_per_packet = frames_per_packet
    self.bytes_per_frame = bytes_per_frame
    self.channels_per_frame = channels_per_frame
    self.bits_per_channel = bits_per_channel

`from_asbd_bytes(data)` `classmethod` ¶

Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

Parameters:

Name	Type	Description	Default
`data`	`bytes`	Raw ASBD bytes (at least 40 bytes)	required

Returns:

Type	Description
`'AudioFormat'`	AudioFormat with parsed fields

Raises:

Type	Description
`ValueError`	If data is too short

Source code in src/coremusic/audio/core.py

@classmethod
def from_asbd_bytes(cls, data: bytes) -> "AudioFormat":
    """Parse an AudioStreamBasicDescription (40 bytes) into an AudioFormat.

    Args:
        data: Raw ASBD bytes (at least 40 bytes)

    Returns:
        AudioFormat with parsed fields

    Raises:
        ValueError: If data is too short
    """
    if len(data) < 40:
        raise ValueError(f"ASBD data too short: {len(data)} bytes (need 40)")
    (
        sample_rate,
        format_id_int,
        format_flags,
        bytes_per_packet,
        frames_per_packet,
        bytes_per_frame,
        channels_per_frame,
        bits_per_channel,
        _reserved,
    ) = struct.unpack("<dLLLLLLLL", data[:40])
    format_id = capi.int_to_fourchar(format_id_int)
    return cls(
        sample_rate=sample_rate,
        format_id=format_id,
        format_flags=format_flags,
        bytes_per_packet=bytes_per_packet,
        frames_per_packet=frames_per_packet,
        bytes_per_frame=bytes_per_frame,
        channels_per_frame=channels_per_frame,
        bits_per_channel=bits_per_channel,
    )

`pcm(sample_rate=44100.0, channels=2, bits=16, is_float=False)` `classmethod` ¶

Create a PCM AudioFormat with correctly computed derived fields.

Parameters:

Name	Type	Description	Default
`sample_rate`	`float`	Sample rate in Hz (default: 44100.0)	`44100.0`
`channels`	`int`	Number of channels (default: 2)	`2`
`bits`	`int`	Bits per sample (default: 16)	`16`
`is_float`	`bool`	If True, create float format; otherwise signed integer	`False`

Returns:

Type	Description
`'AudioFormat'`	AudioFormat with all ASBD fields correctly computed

Source code in src/coremusic/audio/core.py

@classmethod
def pcm(
    cls,
    sample_rate: float = 44100.0,
    channels: int = 2,
    bits: int = 16,
    is_float: bool = False,
) -> "AudioFormat":
    """Create a PCM AudioFormat with correctly computed derived fields.

    Args:
        sample_rate: Sample rate in Hz (default: 44100.0)
        channels: Number of channels (default: 2)
        bits: Bits per sample (default: 16)
        is_float: If True, create float format; otherwise signed integer

    Returns:
        AudioFormat with all ASBD fields correctly computed
    """
    bytes_per_sample = bits // 8
    bytes_per_frame = bytes_per_sample * channels
    flags = 0
    if is_float:
        flags |= 1  # kAudioFormatFlagIsFloat
    else:
        flags |= 4 | 2  # kAudioFormatFlagIsPacked | kAudioFormatFlagIsSignedInteger
    return cls(
        sample_rate=sample_rate,
        format_id="lpcm",
        format_flags=flags,
        bytes_per_packet=bytes_per_frame,
        frames_per_packet=1,
        bytes_per_frame=bytes_per_frame,
        channels_per_frame=channels,
        bits_per_channel=bits,
    )

`to_dict()` ¶

Convert to dictionary format for functional API

Source code in src/coremusic/audio/core.py

def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary format for functional API"""
    format_id_int = (
        capi.fourchar_to_int(self.format_id)
        if isinstance(self.format_id, str)
        else self.format_id
    )

    return {
        "sample_rate": self.sample_rate,
        "format_id": format_id_int,
        "format_flags": self.format_flags,
        "bytes_per_packet": self.bytes_per_packet,
        "frames_per_packet": self.frames_per_packet,
        "bytes_per_frame": self.bytes_per_frame,
        "channels_per_frame": self.channels_per_frame,
        "bits_per_channel": self.bits_per_channel,
    }

`to_numpy_dtype()` ¶

Convert audio format to NumPy dtype for audio data arrays.

Returns:

Type	Description
`'np.dtype[Any]'`	NumPy dtype object suitable for audio data representation

Raises:

Type	Description
`ImportError`	If NumPy is not available
`ValueError`	If format cannot be converted to NumPy dtype

Source code in src/coremusic/audio/core.py

def to_numpy_dtype(self) -> "np.dtype[Any]":
    """
    Convert audio format to NumPy dtype for audio data arrays.

    Returns:
        NumPy dtype object suitable for audio data representation

    Raises:
        ImportError: If NumPy is not available
        ValueError: If format cannot be converted to NumPy dtype
    """
    if not NUMPY_AVAILABLE:
        raise ImportError(
            "NumPy is not available. Install numpy to use this feature."
        )

    # Handle PCM formats
    if self.is_pcm:
        # Check if float or integer
        is_float = bool(self.format_flags & 1)  # kAudioFormatFlagIsFloat
        is_signed = not bool(
            self.format_flags & 2
        )  # kAudioFormatFlagIsSignedInteger

        if is_float:
            if self.bits_per_channel == 32:
                return np.dtype(np.float32)
            elif self.bits_per_channel == 64:
                return np.dtype(np.float64)
            else:
                raise ValueError(
                    f"Unsupported float bit depth: {self.bits_per_channel}"
                )
        else:
            # Integer formats
            if self.bits_per_channel == 8:
                return np.dtype(np.int8 if is_signed else np.uint8)
            elif self.bits_per_channel == 16:
                return np.dtype(np.int16)
            elif self.bits_per_channel == 24:
                # 24-bit audio is typically padded to 32-bit
                return np.dtype(np.int32)
            elif self.bits_per_channel == 32:
                return np.dtype(np.int32)
            else:
                raise ValueError(
                    f"Unsupported integer bit depth: {self.bits_per_channel}"
                )
    else:
        raise ValueError(
            f"Cannot convert non-PCM format '{self.format_id}' to NumPy dtype"
        )

`repr()` ¶

Source code in src/coremusic/audio/core.py

def __repr__(self) -> str:
    return (
        f"AudioFormat({self.sample_rate}Hz, {self.format_id}, "
        f"channels={self.channels_per_frame}, bits={self.bits_per_channel})"
    )

Functional API¶

The functional API provides direct access to CoreAudio file operations through the coremusic.capi module.

Note

The object-oriented AudioFile API is recommended for most use cases. Use the functional API only when you need fine-grained control.

Opening and Closing Files¶

Example:

import coremusic.capi as capi

# Open audio file
file_id = capi.audio_file_open_url("audio.wav")
try:
    # Use file...
    pass
finally:
    capi.audio_file_close(file_id)

Reading Audio Data¶

Example:

import coremusic.capi as capi

file_id = capi.audio_file_open_url("audio.wav")
try:
    # Read 1000 packets starting from packet 0
    data, packets_read = capi.audio_file_read_packets(file_id, 0, 1000)
    print(f"Read {packets_read} packets, {len(data)} bytes")
finally:
    capi.audio_file_close(file_id)

File Properties¶

Example:

import coremusic.capi as capi

file_id = capi.audio_file_open_url("audio.wav")
try:
    # Get audio format
    format_data = capi.audio_file_get_property(
        file_id,
        capi.get_audio_file_property_data_format()
    )
    print(f"Format: {format_data}")
finally:
    capi.audio_file_close(file_id)

Supported Formats¶

coremusic supports all audio formats supported by CoreAudio, including:

Common Formats¶

WAV (Waveform Audio File Format)
AIFF (Audio Interchange File Format)
MP3 (MPEG-1 Audio Layer 3)
AAC (Advanced Audio Coding)
ALAC (Apple Lossless Audio Codec)
FLAC (Free Lossless Audio Codec)

Format IDs¶

Common format IDs (FourCC codes):

'lpcm' - Linear PCM (uncompressed)
'aac ' - AAC
'.mp3' - MP3
'alac' - Apple Lossless
'flac' - FLAC

Format Flags¶

For Linear PCM, common format flags include:

Float vs Integer
Big Endian vs Little Endian
Packed vs Aligned
Signed vs Unsigned

Use the provided constant functions to get appropriate flags:

import coremusic.capi as capi

# Get standard format flags
flags = capi.get_audio_format_flag_is_float() | \
        capi.get_audio_format_flag_is_packed()

Examples¶

Read Entire Audio File¶

import coremusic as cm

def read_audio_file(filepath):
    """Read entire audio file into memory."""
    with cm.AudioFile(filepath) as audio:
        # Get total frame count
        total_frames = audio.frame_count

        # Read all data
        data, count = audio.read_packets(0, total_frames)

        return {
            'data': data,
            'sample_rate': audio.format.sample_rate,
            'channels': audio.format.channels_per_frame,
            'format': audio.format.format_id
        }

# Use the function
audio_data = read_audio_file("audio.wav")
print(f"Loaded {len(audio_data['data'])} bytes")

Process Audio in Chunks¶

import coremusic as cm

def process_audio_chunks(filepath, chunk_size=1024):
    """Process audio file in chunks."""
    with cm.AudioFile(filepath) as audio:
        total_frames = audio.frame_count
        current_frame = 0

        while current_frame < total_frames:
            # Calculate chunk size
            frames_to_read = min(chunk_size, total_frames - current_frame)

            # Read chunk
            data, count = audio.read_packets(current_frame, frames_to_read)

            # Process chunk
            process_audio_data(data)

            current_frame += count

def process_audio_data(data):
    """Process audio data chunk."""
    # Your processing logic here
    pass

Audio Format Conversion¶

import coremusic as cm

def convert_audio_format(input_path, output_path, target_format):
    """Convert audio file to different format."""
    # Open input file
    with cm.AudioFile(input_path) as input_audio:
        # Create converter
        converter = cm.AudioConverter(input_audio.format, target_format)

        # Read and convert
        data, count = input_audio.read_packets(0, input_audio.frame_count)
        converted_data = converter.convert(data, count)

        # Write to output file
        # (implementation depends on output requirements)

Audio File Operations¶

Object-Oriented API¶

AudioFile Class¶

Class Reference¶

coremusic.audio.AudioFile ¶

_path = str(path) instance-attribute ¶

_format = None instance-attribute ¶

_is_open = False instance-attribute ¶

_writable = writable instance-attribute ¶

format property ¶

metadata property ¶

duration property ¶

__init__(path, *, writable=False) ¶

open() ¶

close() ¶

__enter__() ¶

__exit__(exc_type, exc_val, exc_tb) ¶

read_packets(start_packet, packet_count) ¶

read_as_numpy(start_packet=0, packet_count=None) ¶

get_property(property_id) ¶

set_property(property_id, data) ¶

set_metadata(tags) ¶

__repr__() ¶

dispose() ¶

AudioFormat Class¶

Class Reference¶

coremusic.audio.AudioFormat ¶

sample_rate = sample_rate instance-attribute ¶

format_id = format_id instance-attribute ¶

format_flags = format_flags instance-attribute ¶

bytes_per_packet = bytes_per_packet instance-attribute ¶

frames_per_packet = frames_per_packet instance-attribute ¶

bytes_per_frame = bytes_per_frame instance-attribute ¶

channels_per_frame = channels_per_frame instance-attribute ¶

bits_per_channel = bits_per_channel instance-attribute ¶

is_pcm property ¶

is_stereo property ¶

is_mono property ¶

__init__(sample_rate, format_id, format_flags=0, bytes_per_packet=0, frames_per_packet=0, bytes_per_frame=0, channels_per_frame=2, bits_per_channel=16) ¶

from_asbd_bytes(data) classmethod ¶

pcm(sample_rate=44100.0, channels=2, bits=16, is_float=False) classmethod ¶

to_dict() ¶

to_numpy_dtype() ¶

__repr__() ¶

Functional API¶

Opening and Closing Files¶

Reading Audio Data¶

File Properties¶

Supported Formats¶

Common Formats¶

Format IDs¶

Format Flags¶

Examples¶

Read Entire Audio File¶

Process Audio in Chunks¶

Audio Format Conversion¶

See Also¶

`coremusic.audio.AudioFile` ¶

`_path = str(path)` `instance-attribute` ¶

`_format = None` `instance-attribute` ¶

`_is_open = False` `instance-attribute` ¶

`_writable = writable` `instance-attribute` ¶

`format` `property` ¶

`metadata` `property` ¶

`duration` `property` ¶

`init(path, *, writable=False)` ¶

`open()` ¶

`close()` ¶

`enter()` ¶

`exit(exc_type, exc_val, exc_tb)` ¶

`read_packets(start_packet, packet_count)` ¶

`read_as_numpy(start_packet=0, packet_count=None)` ¶

`get_property(property_id)` ¶

`set_property(property_id, data)` ¶

`set_metadata(tags)` ¶

`repr()` ¶

`dispose()` ¶

`coremusic.audio.AudioFormat` ¶

`sample_rate = sample_rate` `instance-attribute` ¶

`format_id = format_id` `instance-attribute` ¶

`format_flags = format_flags` `instance-attribute` ¶

`bytes_per_packet = bytes_per_packet` `instance-attribute` ¶

`frames_per_packet = frames_per_packet` `instance-attribute` ¶

`bytes_per_frame = bytes_per_frame` `instance-attribute` ¶

`channels_per_frame = channels_per_frame` `instance-attribute` ¶

`bits_per_channel = bits_per_channel` `instance-attribute` ¶

`is_pcm` `property` ¶

`is_stereo` `property` ¶

`is_mono` `property` ¶

`init(sample_rate, format_id, format_flags=0, bytes_per_packet=0, frames_per_packet=0, bytes_per_frame=0, channels_per_frame=2, bits_per_channel=16)` ¶

`from_asbd_bytes(data)` `classmethod` ¶

`pcm(sample_rate=44100.0, channels=2, bits=16, is_float=False)` `classmethod` ¶

`to_dict()` ¶

`to_numpy_dtype()` ¶

`repr()` ¶