Module gamslib.objectcsv.objectcollection

Aggregate and manage CSV/XLSX metadata for multiple GAMS objects.

This module provides the ObjectCollection class to collect, save, load, and distribute object and datastream metadata across multiple GAMS object directories. Supports aggregation to CSV and XLSX formats and updating individual object folders from centralized metadata files.

Classes

class ObjectCollection
Expand source code
class ObjectCollection:
    """
    Represents a collection of metadata for multiple GAMS objects and their datastreams.

    Used to aggregate, save, load, and distribute object and datastream metadata
    between individual object directories and combined CSV/XLSX files.
    """

    def __init__(self):
        """
        Initialize an empty ObjectCollection.
        """
        self.objects: dict[str, ObjectData] = {}  # keys are recids (pid)
        self.datastreams: dict[str, list[DSData]] = {}  # keys are object ids (recids)

    def collect_from_objects(self, root_dir: Path) -> None:
        """
        Collect metadata from all object directories below root_dir.

        Args:
            root_dir (Path): Directory containing object folders.

        Raises:
            ValueError: If object metadata (CSV) is missing for any object directory.
        """
        for obj_dir in find_object_folders(root_dir):
            object_meta = ObjectCSVManager(obj_dir)
            if object_meta.is_empty():
                raise ValueError(
                    f"Object metadata (csv) is not set for {obj_dir}. "
                    "Please check the object directory."
                )
            self.objects[obj_dir.name] = object_meta.get_object()
            for dsdata in object_meta.get_datastreamdata():
                if obj_dir.name not in self.datastreams:
                    self.datastreams[obj_dir.name] = []
                self.datastreams[obj_dir.name].append(dsdata)

    def distribute_to_objects(self, root_dir: Path) -> tuple[int, int]:
        """
        Distribute aggregated metadata to individual object directories.

        Updates object.csv and datastreams.csv files in each object directory.

        Args:
            root_dir (Path): Directory containing object folders.

        Returns:
            tuple[int, int]: Number of updated objects and datastreams.

        Raises:
            UserWarning: If an object directory does not exist.
        """
        updated_objects_counter = 0
        updated_datastreams_counter = 0
        for obj_id, obj_data in self.objects.items():
            obj_dir = root_dir / obj_id
            if obj_dir.is_dir():
                obj_mgr = ObjectCSVManager(obj_dir, ignore_existing_csv_files=True)
                obj_mgr.set_object(obj_data, replace=True)
                updated_objects_counter += 1
                for dsdata in self.datastreams.get(obj_id, []):
                    obj_mgr.add_datastream(dsdata, replace=True)
                    updated_datastreams_counter += 1
                obj_mgr.save()
            else:
                raise UserWarning(
                    f"Object directory {obj_dir} does not exist. Skipping."
                )
        return updated_objects_counter, updated_datastreams_counter

    def count_objects(self) -> int:
        """
        Return the number of objects in the collection.

        Returns:
            int: Number of objects.
        """
        return len(self.objects)

    def count_datastreams(self) -> int:
        """
        Return the total number of datastreams in the collection.

        Returns:
            int: Number of datastreams.
        """
        return sum(len(ds) for ds in self.datastreams.values())

    def save_to_csv(
        self, obj_file: Path | None = None, ds_file: Path | None = None
    ) -> None:
        """
        Save object and datastream metadata to two CSV files.

        Args:
            obj_file (Path | None): Path for object metadata CSV. Defaults to 'all_objects.csv'.
            ds_file (Path | None): Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.
        """
        obj_file = obj_file or Path(ALL_OBJECTS_CSV)
        ds_file = ds_file or Path(ALL_DATASTREAMS_CSV)
        with obj_file.open("w", encoding="utf-8", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=ObjectData.fieldnames())
            writer.writeheader()
            for obj in self.objects.values():
                writer.writerow(asdict(obj))
        with ds_file.open("w", encoding="utf-8", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=DSData.fieldnames())
            writer.writeheader()
            for datastreams in self.datastreams.values():
                for dsdata in datastreams:
                    writer.writerow(asdict(dsdata))

    def save_to_xlsx(self, xlsx_file: Path | None = None) -> None:
        """
        Save object and datastream metadata to a single XLSX file with two sheets.

        Args:
            xlsx_file (Path | None): Path for XLSX file. Defaults to 'all_objects.xlsx'.
        """
        xlsx_file = xlsx_file or Path(ALL_OBJECTS_XLSX)
        with tempfile.TemporaryDirectory() as tmpdir:
            obj_file = Path(tmpdir) / ALL_OBJECTS_CSV
            ds_file = Path(tmpdir) / ALL_DATASTREAMS_CSV
            self.save_to_csv(obj_file, ds_file)
            xlsx.csv_to_xlsx(obj_file, ds_file, xlsx_file)

    def load_from_csv(
        self, obj_file: Path | None = None, ds_file: Path | None = None
    ) -> None:
        """
        Load object and datastream metadata from two CSV files.

        Args:
            obj_file (Path | None): Path for object metadata CSV. Defaults to 'all_objects.csv'.
            ds_file (Path | None): Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.

        Raises:
            FileNotFoundError: If either CSV file does not exist.
        """
        obj_file = obj_file or Path(ALL_OBJECTS_CSV)
        ds_file = ds_file or Path(ALL_DATASTREAMS_CSV)
        if not obj_file.is_file():
            raise FileNotFoundError(f"Required csv file {obj_file} does not exist.")
        if not ds_file.is_file():
            raise FileNotFoundError(f"Required csv file {ds_file} does not exist.")
        self.objects.clear()
        self.datastreams.clear()

        with obj_file.open("r", encoding="utf-8", newline="") as f:
            reader = csv.DictReader(f)
            for row in reader:
                obj_data = ObjectData(**row)
                self.objects[obj_data.recid] = obj_data

        with ds_file.open("r", encoding="utf-8", newline="") as f:
            reader = csv.DictReader(f)
            for row in reader:
                ds_data = DSData(**row)
                obj_id = ds_data.dspath.split("/")[0]  # Extract object id from dspath
                if obj_id not in self.datastreams:
                    self.datastreams[obj_id] = []
                self.datastreams[obj_id].append(ds_data)

    def load_from_xlsx(self, xlsx_file: Path | None = None) -> None:
        """
        Load object and datastream metadata from a single XLSX file with two sheets.

        Args:
            xlsx_file (Path | None): Path for XLSX file. Defaults to 'all_objects.xlsx'.

        Raises:
            FileNotFoundError: If the XLSX file does not exist.
        """
        xlsx_file = xlsx_file or Path(ALL_OBJECTS_XLSX)

        if not xlsx_file.is_file():
            raise FileNotFoundError(f"File {xlsx_file} does not exist.")
        with tempfile.TemporaryDirectory() as tmpdir:
            obj_file = Path(tmpdir) / ALL_OBJECTS_CSV
            ds_file = Path(tempfile.tempdir) / ALL_DATASTREAMS_CSV
            xlsx.xlsx_to_csv(xlsx_file, obj_file, ds_file)
            self.load_from_csv(obj_file, ds_file)

Represents a collection of metadata for multiple GAMS objects and their datastreams.

Used to aggregate, save, load, and distribute object and datastream metadata between individual object directories and combined CSV/XLSX files.

Initialize an empty ObjectCollection.

Methods

def collect_from_objects(self, root_dir: pathlib._local.Path) ‑> None
Expand source code
def collect_from_objects(self, root_dir: Path) -> None:
    """
    Collect metadata from all object directories below root_dir.

    Args:
        root_dir (Path): Directory containing object folders.

    Raises:
        ValueError: If object metadata (CSV) is missing for any object directory.
    """
    for obj_dir in find_object_folders(root_dir):
        object_meta = ObjectCSVManager(obj_dir)
        if object_meta.is_empty():
            raise ValueError(
                f"Object metadata (csv) is not set for {obj_dir}. "
                "Please check the object directory."
            )
        self.objects[obj_dir.name] = object_meta.get_object()
        for dsdata in object_meta.get_datastreamdata():
            if obj_dir.name not in self.datastreams:
                self.datastreams[obj_dir.name] = []
            self.datastreams[obj_dir.name].append(dsdata)

Collect metadata from all object directories below root_dir.

Args

root_dir : Path
Directory containing object folders.

Raises

ValueError
If object metadata (CSV) is missing for any object directory.
def count_datastreams(self) ‑> int
Expand source code
def count_datastreams(self) -> int:
    """
    Return the total number of datastreams in the collection.

    Returns:
        int: Number of datastreams.
    """
    return sum(len(ds) for ds in self.datastreams.values())

Return the total number of datastreams in the collection.

Returns

int
Number of datastreams.
def count_objects(self) ‑> int
Expand source code
def count_objects(self) -> int:
    """
    Return the number of objects in the collection.

    Returns:
        int: Number of objects.
    """
    return len(self.objects)

Return the number of objects in the collection.

Returns

int
Number of objects.
def distribute_to_objects(self, root_dir: pathlib._local.Path) ‑> tuple[int, int]
Expand source code
def distribute_to_objects(self, root_dir: Path) -> tuple[int, int]:
    """
    Distribute aggregated metadata to individual object directories.

    Updates object.csv and datastreams.csv files in each object directory.

    Args:
        root_dir (Path): Directory containing object folders.

    Returns:
        tuple[int, int]: Number of updated objects and datastreams.

    Raises:
        UserWarning: If an object directory does not exist.
    """
    updated_objects_counter = 0
    updated_datastreams_counter = 0
    for obj_id, obj_data in self.objects.items():
        obj_dir = root_dir / obj_id
        if obj_dir.is_dir():
            obj_mgr = ObjectCSVManager(obj_dir, ignore_existing_csv_files=True)
            obj_mgr.set_object(obj_data, replace=True)
            updated_objects_counter += 1
            for dsdata in self.datastreams.get(obj_id, []):
                obj_mgr.add_datastream(dsdata, replace=True)
                updated_datastreams_counter += 1
            obj_mgr.save()
        else:
            raise UserWarning(
                f"Object directory {obj_dir} does not exist. Skipping."
            )
    return updated_objects_counter, updated_datastreams_counter

Distribute aggregated metadata to individual object directories.

Updates object.csv and datastreams.csv files in each object directory.

Args

root_dir : Path
Directory containing object folders.

Returns

tuple[int, int]
Number of updated objects and datastreams.

Raises

UserWarning
If an object directory does not exist.
def load_from_csv(self,
obj_file: pathlib._local.Path | None = None,
ds_file: pathlib._local.Path | None = None) ‑> None
Expand source code
def load_from_csv(
    self, obj_file: Path | None = None, ds_file: Path | None = None
) -> None:
    """
    Load object and datastream metadata from two CSV files.

    Args:
        obj_file (Path | None): Path for object metadata CSV. Defaults to 'all_objects.csv'.
        ds_file (Path | None): Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.

    Raises:
        FileNotFoundError: If either CSV file does not exist.
    """
    obj_file = obj_file or Path(ALL_OBJECTS_CSV)
    ds_file = ds_file or Path(ALL_DATASTREAMS_CSV)
    if not obj_file.is_file():
        raise FileNotFoundError(f"Required csv file {obj_file} does not exist.")
    if not ds_file.is_file():
        raise FileNotFoundError(f"Required csv file {ds_file} does not exist.")
    self.objects.clear()
    self.datastreams.clear()

    with obj_file.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            obj_data = ObjectData(**row)
            self.objects[obj_data.recid] = obj_data

    with ds_file.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            ds_data = DSData(**row)
            obj_id = ds_data.dspath.split("/")[0]  # Extract object id from dspath
            if obj_id not in self.datastreams:
                self.datastreams[obj_id] = []
            self.datastreams[obj_id].append(ds_data)

Load object and datastream metadata from two CSV files.

Args

obj_file : Path | None
Path for object metadata CSV. Defaults to 'all_objects.csv'.
ds_file : Path | None
Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.

Raises

FileNotFoundError
If either CSV file does not exist.
def load_from_xlsx(self, xlsx_file: pathlib._local.Path | None = None) ‑> None
Expand source code
def load_from_xlsx(self, xlsx_file: Path | None = None) -> None:
    """
    Load object and datastream metadata from a single XLSX file with two sheets.

    Args:
        xlsx_file (Path | None): Path for XLSX file. Defaults to 'all_objects.xlsx'.

    Raises:
        FileNotFoundError: If the XLSX file does not exist.
    """
    xlsx_file = xlsx_file or Path(ALL_OBJECTS_XLSX)

    if not xlsx_file.is_file():
        raise FileNotFoundError(f"File {xlsx_file} does not exist.")
    with tempfile.TemporaryDirectory() as tmpdir:
        obj_file = Path(tmpdir) / ALL_OBJECTS_CSV
        ds_file = Path(tempfile.tempdir) / ALL_DATASTREAMS_CSV
        xlsx.xlsx_to_csv(xlsx_file, obj_file, ds_file)
        self.load_from_csv(obj_file, ds_file)

Load object and datastream metadata from a single XLSX file with two sheets.

Args

xlsx_file : Path | None
Path for XLSX file. Defaults to 'all_objects.xlsx'.

Raises

FileNotFoundError
If the XLSX file does not exist.
def save_to_csv(self,
obj_file: pathlib._local.Path | None = None,
ds_file: pathlib._local.Path | None = None) ‑> None
Expand source code
def save_to_csv(
    self, obj_file: Path | None = None, ds_file: Path | None = None
) -> None:
    """
    Save object and datastream metadata to two CSV files.

    Args:
        obj_file (Path | None): Path for object metadata CSV. Defaults to 'all_objects.csv'.
        ds_file (Path | None): Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.
    """
    obj_file = obj_file or Path(ALL_OBJECTS_CSV)
    ds_file = ds_file or Path(ALL_DATASTREAMS_CSV)
    with obj_file.open("w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=ObjectData.fieldnames())
        writer.writeheader()
        for obj in self.objects.values():
            writer.writerow(asdict(obj))
    with ds_file.open("w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=DSData.fieldnames())
        writer.writeheader()
        for datastreams in self.datastreams.values():
            for dsdata in datastreams:
                writer.writerow(asdict(dsdata))

Save object and datastream metadata to two CSV files.

Args

obj_file : Path | None
Path for object metadata CSV. Defaults to 'all_objects.csv'.
ds_file : Path | None
Path for datastream metadata CSV. Defaults to 'all_datastreams.csv'.
def save_to_xlsx(self, xlsx_file: pathlib._local.Path | None = None) ‑> None
Expand source code
def save_to_xlsx(self, xlsx_file: Path | None = None) -> None:
    """
    Save object and datastream metadata to a single XLSX file with two sheets.

    Args:
        xlsx_file (Path | None): Path for XLSX file. Defaults to 'all_objects.xlsx'.
    """
    xlsx_file = xlsx_file or Path(ALL_OBJECTS_XLSX)
    with tempfile.TemporaryDirectory() as tmpdir:
        obj_file = Path(tmpdir) / ALL_OBJECTS_CSV
        ds_file = Path(tmpdir) / ALL_DATASTREAMS_CSV
        self.save_to_csv(obj_file, ds_file)
        xlsx.csv_to_xlsx(obj_file, ds_file, xlsx_file)

Save object and datastream metadata to a single XLSX file with two sheets.

Args

xlsx_file : Path | None
Path for XLSX file. Defaults to 'all_objects.xlsx'.