Skip to content

skais_mapper.illustris.groupcat

Illustris file i/o for FoF and Subfind group catalog.

Adapted from: https://github.com/illustristng/illustris_python

Functions:

Name Description
get_offset_path

Get absolute path to a separate offset file (modify as needed).

get_path

Get absolute path to a group catalog HDF5 file (modify as needed).

load

Load complete group catalog all at once.

load_catalog

Load either halo or subhalo information from the group catalog.

load_group

Load a specified HDF5 group from a group catalog.

load_halos

Load all halo information from the entire group catalog for one snapshot.

load_header

Load the header of a group catalog.

load_single

Fetch the complete group catalog information for a single halo or subhalo.

load_subhalos

Load all subhalo information from the entire group catalog for one snapshot.

get_offset_path

get_offset_path(
    base_path: str, snapshot: int, *args
) -> str

Get absolute path to a separate offset file (modify as needed).

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
*args

Dummy arguments for compatibility.

()

Returns:

Type Description
str

Absolute path to a group catalog's offsets HDF5 file.

Source code in skais_mapper/illustris/groupcat.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def get_offset_path(base_path: str, snapshot: int, *args) -> str:
    """Get absolute path to a separate offset file (modify as needed).

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        *args: Dummy arguments for compatibility.

    Returns:
        (str): Absolute path to a group catalog's offsets HDF5 file.
    """
    basename = f"offsets/offsets_{snapshot:03d}.hdf5"
    offset_path = os.path.join(base_path, basename)
    return offset_path

get_path

get_path(
    base_path: str, snapshot: int, partition: int = 0
) -> str

Get absolute path to a group catalog HDF5 file (modify as needed).

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots

required
snapshot int

Snapshot ID {0-99}

required
partition int

Subfile partition ID {0-600+}

0

Returns:

Type Description
str

Absolute path to a group catalog HDF5 file

Source code in skais_mapper/illustris/groupcat.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def get_path(base_path: str, snapshot: int, partition: int = 0) -> str:
    """Get absolute path to a group catalog HDF5 file (modify as needed).

    Args:
        base_path: Base path to the Illustris(TNG) snapshots
        snapshot: Snapshot ID {0-99}
        partition: Subfile partition ID {0-600+}

    Returns:
        (str): Absolute path to a group catalog HDF5 file
    """
    gc_dir = os.path.join(base_path, f"groupcats/{snapshot:03d}")
    filepath = os.path.join(gc_dir, f"groups_{snapshot:03d}.{partition:d}.hdf5")
    filepath_alt = filepath.replace("groups_", "fof_subhalo_tab_")
    if os.path.isfile(filepath):
        return filepath
    return filepath_alt

load

load(
    base_path,
    snapshot,
    subhalos_kwargs: dict = None,
    halos_kwargs: dict = None,
    header_kwargs: dict = None,
    **kwargs,
) -> dict

Load complete group catalog all at once.

Parameters:

Name Type Description Default
base_path

Base path to the Illustris(TNG) snapshots.

required
snapshot

Snapshot ID {0-99}.

required
subhalos_kwargs dict

Keyword arguments for loading subhalos.

None
halos_kwargs dict

Keyword arguments for loading halos.

None
header_kwargs dict

Keyword arguments for loading the header.

None
**kwargs

Additional

{}

Returns:

Type Description
dict

A dictionary of the loaded data

Source code in skais_mapper/illustris/groupcat.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def load(
    base_path,
    snapshot,
    subhalos_kwargs: dict = None,
    halos_kwargs: dict = None,
    header_kwargs: dict = None,
    **kwargs,
) -> dict:
    """Load complete group catalog all at once.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        subhalos_kwargs: Keyword arguments for loading subhalos.
        halos_kwargs: Keyword arguments for loading halos.
        header_kwargs: Keyword arguments for loading the header.
        **kwargs: Additional 

    Returns:
        (dict): A dictionary of the loaded data
    """
    data = kwargs
    if subhalos_kwargs is None:
        subhalos_kwargs = {}
    if halos_kwargs is None:
        halos_kwargs = {}
    if header_kwargs is None:
        header_kwargs = {}
    data["subhalos"] = load_subhalos(base_path, snapshot, **subhalos_kwargs)
    data["halos"] = load_halos(base_path, snapshot, **halos_kwargs)
    data["header"] = load_header(base_path, snapshot, **header_kwargs)
    return data

load_catalog

load_catalog(
    base_path: str,
    snapshot: int,
    key: str,
    key_ref: str,
    fields: list = None,
    as_float32: bool = False,
    as_array: bool = True,
    with_pbar: bool = True,
) -> dict | NDArray

Load either halo or subhalo information from the group catalog.

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
key str

Group name from the HDF5 group catalog, e.g. 'Group' or 'Subhalo'.

required
key_ref str

Group name reference string in the HDF5 group catalog's header keys, e.g. 'groups' or 'subgroups'/'subhalos'

required
fields list

Fields to be loaded for the corresponding group, e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'.

None
as_float32 bool

Load float64 data types as float32 (to save memory).

False
as_array bool

Return a numpy array instead of a dictionary; takes effect only if a single field was requested.

True
with_pbar bool

If True, a progress bar will show the current status.

True

Returns:

Type Description
dict | numpy.ndarray

A dictionary of the loaded data

Source code in skais_mapper/illustris/groupcat.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def load_catalog(
    base_path: str,
    snapshot: int,
    key: str,
    key_ref: str,
    fields: list = None,
    as_float32: bool = False,
    as_array: bool = True,
    with_pbar: bool = True,
) -> dict | NDArray:
    """Load either halo or subhalo information from the group catalog.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        key: Group name from the HDF5 group catalog, e.g. 'Group' or 'Subhalo'.
        key_ref: Group name reference string in the HDF5 group catalog's
          header keys, e.g. 'groups' or 'subgroups'/'subhalos'
        fields: Fields to be loaded for the corresponding group,
          e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'.
        as_float32: Load float64 data types as float32 (to save memory).
        as_array: Return a numpy array instead of a dictionary; takes
          effect only if a single field was requested.
        with_pbar: If True, a progress bar will show the current status.

    Returns:
        (dict | numpy.ndarray): A dictionary of the loaded data
    """
    data = {}
    if fields is None:
        fields = []
    elif isinstance(fields, str | bytes):
        fields = [fields]
    # load header from first partition
    IllustrisH5File.path_func = get_path
    header = load_header(base_path, snapshot)
    group = load_group(base_path, snapshot, key)
    if f"N{key_ref}_Total" not in header and key_ref == "subgroups":
        key_ref = "subhalos"
    data["count"] = header.get(f"N{key_ref}_Total", None)
    if not data["count"]:
        print(f"Warning: zero groups, empty return (snap='{snapshot}').")
        return data
    if not fields:
        fields = list(group.keys())
    for field in fields:
        if field not in group.keys():
            raise KeyError(f"Group catalog does not have requested field [{field}]!")
        # replace local length with global
        shape = list(group[field].shape)
        shape[0] = data["count"]
        dtype = group[field].dtype
        if dtype == np.float64 and as_float32:
            dtype = np.float32
        # allocate data arrays
        data[field] = np.zeros(shape, dtype=dtype)
    group._id.close()
    # loop over partitions
    arr_offset = 0
    if with_pbar:
        partition_iterator = trange(header["NumFiles"])
    else:
        partition_iterator = range(header["NumFiles"])
    for i in partition_iterator:
        f = IllustrisH5File(base_path, snapshot, i)
        # if partition is empty
        if not f["Header"].attrs[f"N{key_ref}_ThisFile"]:
            continue
        # loop over each field
        for field in fields:
            if field not in f[key].keys():
                raise KeyError(f"Group catalog does not have requested field [{field}]!")
            # shape and type
            shape = f[key][field].shape
            if len(shape) == 1:
                data[field][arr_offset : arr_offset + shape[0]] = f[key][field][0 : shape[0]]
            else:
                data[field][arr_offset : arr_offset + shape[0], :] = f[key][field][0 : shape[0], :]
        arr_offset += shape[0]
        f.close()
    if as_array and len(fields) == 1:
        return data[fields[0]]
    return data

load_group

load_group(
    base_path: str,
    snapshot: int,
    key: str,
    as_dict: bool = False,
) -> dict | h5py.Group

Load a specified HDF5 group from a group catalog.

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
key str

Group descriptor, i.e. a group from the HDF5 catalog e.g. 'Group' or 'Subhalo'.

required
as_dict bool

If True, a dictionary is returned, otherwise as h5py.Group object.

False

Returns:

Type Description
dict

The HDF5 group from the group catalog HDF5 file as a dictionary

Note: Remember to close the HDF5 file afterwards (use ._id.close()).

Source code in skais_mapper/illustris/groupcat.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def load_group(base_path: str, snapshot: int, key: str, as_dict: bool = False) -> dict | h5py.Group:
    """Load a specified HDF5 group from a group catalog.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        key: Group descriptor, i.e. a group from the HDF5 catalog
          e.g. 'Group' or 'Subhalo'.
        as_dict: If True, a dictionary is returned, otherwise as
          h5py.Group object.

    Returns:
        (dict): The HDF5 group from the group catalog HDF5 file as a dictionary

    Note: Remember to close the HDF5 file afterwards (use <group>._id.close()).
    """
    f = IllustrisH5File(base_path, snapshot, path_func=get_path)
    if key in f:
        if as_dict:
            group = dict(f[key])
        else:
            group = f[key]
    else:
        f.close()
        return None
    return group

load_halos

load_halos(
    base_path: str, snapshot: int, **kwargs
) -> dict | NDArray

Load all halo information from the entire group catalog for one snapshot.

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
fields

Fields to be loaded for the corresponding group, e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'.

required
as_float32

Load float64 data types as float32 (to save memory).

required
as_array

Return a numpy array instead of a dictionary; takes effect only if a single field was requested.

required
with_pbar

If True, a progress bar will show the current status.

required
**kwargs

Additonal keywords for load_catalog

{}

Returns:

Type Description
dict | numpy.ndarray

A dictionary of the loaded data

Source code in skais_mapper/illustris/groupcat.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def load_halos(base_path: str, snapshot: int, **kwargs) -> dict | NDArray:
    """Load all halo information from the entire group catalog for one snapshot.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        fields: Fields to be loaded for the corresponding group,
          e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'.
        as_float32: Load float64 data types as float32 (to save memory).
        as_array: Return a numpy array instead of a dictionary; takes
          effect only if a single field was requested.
        with_pbar: If True, a progress bar will show the current status.
        **kwargs: Additonal keywords for `load_catalog`

    Returns:
        (dict | numpy.ndarray): A dictionary of the loaded data
    """
    return load_catalog(base_path, snapshot, "Group", "groups", **kwargs)

load_header

load_header(
    base_path: str, snapshot: int, as_dict: bool = True
) -> dict | h5py.Group

Load the header of a group catalog.

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
as_dict bool

If True, a dictionary is returned, otherwise as h5py.Group object.

True

Returns:

Type Description
dict

The header of the group catalog HDF5 file as a dictionary

Source code in skais_mapper/illustris/groupcat.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def load_header(
    base_path: str,
    snapshot: int,
    as_dict: bool = True,
) -> dict | h5py.Group:
    """Load the header of a group catalog.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        as_dict: If True, a dictionary is returned, otherwise as
          h5py.Group object.

    Returns:
        (dict): The header of the group catalog HDF5 file as a dictionary
    """
    with IllustrisH5File(base_path, snapshot, path_func=get_path) as f:
        if as_dict:
            header = dict(f["Header"].attrs.items())
        else:
            header = f["Header"]
    return header

load_single

load_single(
    base_path, snapshot, halo_id=-1, subhalo_id=-1
) -> dict

Fetch the complete group catalog information for a single halo or subhalo.

Parameters:

Name Type Description Default
base_path

Base path to the Illustris(TNG) snapshots.

required
snapshot

Snapshot ID {0-99}.

required
halo_id

Group ID, i.e. halo ID value from the FOF catalog.

-1
subhalo_id

Group ID, i.e. subhalo ID value from the FOF catalog.

-1

Returns:

Type Description
dict

A dictionary of the loaded data

Source code in skais_mapper/illustris/groupcat.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def load_single(base_path, snapshot, halo_id=-1, subhalo_id=-1) -> dict:
    """Fetch the complete group catalog information for a single halo or subhalo.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        halo_id: Group ID, i.e. halo ID value from the FOF catalog.
        subhalo_id: Group ID, i.e. subhalo ID value from the FOF catalog.

    Returns:
        (dict): A dictionary of the loaded data
    """
    if (halo_id < 0 and subhalo_id < 0) or (halo_id >= 0 and subhalo_id >= 0):
        raise ValueError("Must specify either halo_id or subhalo_id (not both).")
    key = "Subhalo" if subhalo_id >= 0 else "Group"
    group_id = subhalo_id if subhalo_id >= 0 else halo_id
    # old or new format
    if "fof_subhalo" in get_path(base_path, snapshot):
        # use separate 'offsets_nnn.hdf5' files
        with IllustrisH5File(base_path, snapshot, path_func=get_offset_path) as f:
            offsets = f["FileOffsets/" + key][()]
    else:
        # use header of group catalog
        with IllustrisH5File(base_path, snapshot, path_func=get_path) as f:
            offsets = f["Header"].attrs["FileOffsets_" + key]
    offsets = group_id - offsets
    file_id = np.max(np.where(offsets >= 0))
    group_offset = offsets[file_id]
    # load halo/subhalo fields into a dict
    data = {}
    with IllustrisH5File(base_path, snapshot, file_id, path_func=get_path) as f:
        for field in f[key].keys():
            data[field] = f[key][field][group_offset]
    return data

load_subhalos

load_subhalos(
    base_path: str, snapshot: int, **kwargs
) -> dict | NDArray

Load all subhalo information from the entire group catalog for one snapshot.

Parameters:

Name Type Description Default
base_path str

Base path to the Illustris(TNG) snapshots.

required
snapshot int

Snapshot ID {0-99}.

required
**kwargs

Additional keywords such as fields: Fields to be loaded for the corresponding group, e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'. as_float32: Load float64 data types as float32 (to save memory). as_array: Return a numpy array instead of a dictionary; takes effect only if a single field was requested. with_pbar: If True, a progress bar will show the current status.

{}

Returns:

Type Description
dict | numpy.ndarray

A dictionary of the loaded data

Source code in skais_mapper/illustris/groupcat.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def load_subhalos(base_path: str, snapshot: int, **kwargs) -> dict | NDArray:
    """Load all subhalo information from the entire group catalog for one snapshot.

    Args:
        base_path: Base path to the Illustris(TNG) snapshots.
        snapshot: Snapshot ID {0-99}.
        **kwargs: Additional keywords such as
          fields: Fields to be loaded for the corresponding group,
            e.g. ['GroupPos', 'GroupMass'] or 'SubhaloGasMetalFractions'.
          as_float32: Load float64 data types as float32 (to save memory).
          as_array: Return a numpy array instead of a dictionary; takes
            effect only if a single field was requested.
          with_pbar: If True, a progress bar will show the current status.

    Returns:
        (dict | numpy.ndarray): A dictionary of the loaded data
    """
    return load_catalog(base_path, snapshot, "Subhalo", "subgroups", **kwargs)