from typing import Iterable, List, Dict, Union, Tuple
import os
import numpy as np
from opatio.base.header import Header, make_default_header
from opatio.index.floatvectorindex import FloatVectorIndex
from opatio.catalog.entry import CardCatalogEntry
from opatio.card.datacard import DataCard
from opatio.card.datacard import OPATTable
[docs]
class OPAT():
"""
A class representing an OPAT (Open Parameterized Array Table) instance. OPAT
is a structured binary file format developed by the 4D-STAR collaboration for
storing all tabular data needed for 4DSSE. OPAT is liscensed under the
GNU General Public License v3.0. You should have received a copy of the
GNU General Public License along with this program. If not, see
<http://www.gnu.org/licenses/>.
This class provides methods for managing headers, catalogs, and data cards,
as well as saving OPAT files.
Attributes
----------
header : Header
The header object containing metadata for the OPAT instance.
catalog : Dict[FloatVectorIndex, CardCatalogEntry]
A dictionary mapping index vectors to catalog entries.
cards : Dict[FloatVectorIndex, DataCard]
A dictionary mapping index vectors to data cards.
Methods
-------
set_version(version: int) -> int
Sets the version of the OPAT header.
set_source(source: str) -> str
Sets the source information in the OPAT header.
set_comment(comment: str) -> str
Sets the comment in the OPAT header.
set_numIndex(numIndex: int) -> int
Sets the number of indices in the OPAT header.
pop_card(indexVector: Union[FloatVectorIndex, Iterable[float]]) -> DataCard
Removes and returns a card from the catalog and cards dictionary. The
organization of the file will be updated to reflect the removal of the card.
add_card(indexVector: Union[FloatVectorIndex, Iterable[float]], card: DataCard)
Adds a data card to the catalog and cards dictionary. The organization
of the file will be updated to reflect the addition of the card.
Notes
-----
If a card already exists at the given indexVector, it will first be popped then readded with any
updates applied
add_table(indexVector: Union[FloatVectorIndex, Iterable[float]], tag: str, columnValues: Iterable[float], rowValues: Iterable[float], data: Iterable[Iterable[float]], card: DataCard = ..., columnName: str = "columnValues", rowName: str = "rowValues") -> DataCard
Adds a table to a data card and stores it in the catalog.
The organization of the file will be updated to reflect the addition of the table.
The card will be added to the catalog and the organization of the file will be updated to reflect
the addition of the table.
Notes
-----
If a card already exists at the given indexVector, it will first be popped then readded with any
updates applied
save_as_ascii(filename: str) -> str
Saves the OPAT instance as a human-readable ASCII file. This file is not
a valid OPAT file in and of itself, but is meant to be human-readable for
debugging purposes.
save(filename: str) -> str
Saves the OPAT instance as a binary file. The file will be saved in the
specified format and will be a valid OPAT file.
Examples
--------
>>> opat = OPAT()
>>> opat.set_version(1)
>>> opat.set_source("example_source")
>>> opat.set_comment("This is a test comment.")
"""
def __init__(self):
"""
Initializes an OPAT instance with default header, catalog, and cards.
"""
self.header: Header = make_default_header()
self.catalog: Dict[FloatVectorIndex, CardCatalogEntry] = {}
self.cards: Dict[FloatVectorIndex, DataCard] = {}
[docs]
def set_version(self, version: int) -> int:
"""
Sets the version of the OPAT header.
Parameters
----------
version : int
The version number to set.
Returns
-------
int
The updated version number.
"""
self.header.version = version
return self.header.version
[docs]
def set_source(self, source: str) -> str:
"""
Sets the source information in the OPAT header.
Parameters
----------
source : str
The source information to set.
Returns
-------
str
The updated source information.
"""
self.header.set_source(source)
return self.header.sourceInfo
[docs]
def set_numIndex(self, numIndex: int) -> int:
"""
Sets the number of indices in the OPAT header.
Parameters
----------
numIndex : int
The number of indices to set. Must be greater than 0.
Returns
-------
int
The updated number of indices.
Raises
------
ValueError
If numIndex is less than 1.
"""
if numIndex < 1:
raise ValueError(f"numIndex must be greater than 0! It is currently {numIndex}")
self.header.numIndex = numIndex
return self.header.numIndex
def _validate_indexVector(self, indexVector: Union[FloatVectorIndex, Iterable[float]]) -> FloatVectorIndex:
"""
Validates and converts an index vector to a FloatVectorIndex.
Parameters
----------
indexVector : Union[FloatVectorIndex, Iterable[float]]
The index vector to validate.
Returns
-------
FloatVectorIndex
The validated and converted index vector.
Raises
------
ValueError
If the index vector cannot be cast to a FloatVectorIndex or has an invalid length.
"""
if not isinstance(indexVector, FloatVectorIndex):
try:
indexTuple = tuple(float(i) for i in indexVector)
indexVector = FloatVectorIndex(
vector=indexTuple,
hashPrecision=self.header.hashPrecision
)
except ValueError as e:
raise ValueError(f"indexVector must be castable as a tuple of floats or a FloatIndexVector! Currently it is {type(indexVector)}. {e}")
if len(indexVector) != self.header.numIndex:
raise ValueError(f"indexVector must have length {self.header.numIndex}! Currently it has length {len(indexVector)}")
return indexVector
def _recaclulate_index(self):
"""
Recalculates the index for all cards in the catalog.
Updates the catalog and header with the new byte offsets and card counts.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = [1.0, 2.0]
>>> card = DataCard()
>>> opat.add_card(indexVector, card)
>>> opat._recaclulate_index()
"""
currentByteStart = self.header.headerSize
for indexVector, card in self.cards.items():
currentByteEnd = currentByteStart + len(card)
self.catalog[indexVector] = CardCatalogEntry(
index=indexVector,
byteStart=currentByteStart,
byteEnd=currentByteEnd,
sha256=card.sha256()
)
currentByteStart = currentByteEnd
self.header.catalogOffset = currentByteStart
self.header.numCards = len(self.catalog)
[docs]
def pop_card(self, indexVector: Union[FloatVectorIndex, Iterable[float]]) -> DataCard:
"""
Removes and returns a card from the catalog and cards dictionary.
Parameters
----------
indexVector : Union[FloatVectorIndex, Iterable[float]]
The index vector of the card to remove.
Returns
-------
DataCard
The removed data card.
Raises
------
KeyError
If the index vector is not found in the catalog.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = [1.0, 2.0]
>>> card = DataCard()
>>> opat.add_card(indexVector, card)
>>> removed_card = opat.pop_card(indexVector)
>>> print(removed_card)
"""
indexVector = self._validate_indexVector(indexVector)
if indexVector not in self.catalog:
raise KeyError(f"indexVector {indexVector} not found in catalog!")
card = self.cards.pop(indexVector)
self._recaclulate_index()
return card
[docs]
def add_card(self, indexVector: Union[FloatVectorIndex, Iterable[float]], card: DataCard):
"""
Adds a data card to the catalog and cards dictionary.
Parameters
----------
indexVector : Union[FloatVectorIndex, Iterable[float]]
The index vector for the card.
card : DataCard
The data card to add.
Raises
------
TypeError
If the card is not an instance of DataCard.
RuntimeError
If an error occurs while removing an existing card.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = [1.0, 2.0]
>>> card = DataCard()
>>> opat.add_card(indexVector, card)
"""
indexVector = self._validate_indexVector(indexVector)
if not isinstance(card, DataCard):
raise TypeError(f"card must be a DataCard! Currently it is {type(card)}")
try:
self.pop_card(indexVector)
except KeyError as e:
pass
except Exception as e:
raise RuntimeError(f"Unable to pop card {indexVector}. {e}")
self.cards[indexVector] = card
self._recaclulate_index()
[docs]
def add_table(
self,
indexVector: Union[FloatVectorIndex, Iterable[float]],
tag: str,
columnValues: Iterable[float],
rowValues: Iterable[float],
data: Iterable[Iterable[float]],
card: DataCard = ...,
columnName: str = "columnValues",
rowName: str = "rowValues",
) -> DataCard:
"""
Adds a table to a data card and stores it in the catalog.
Parameters
----------
indexVector : Union[FloatVectorIndex, Iterable[float]]
The index vector for the card.
tag : str
The tag for the table.
columnValues : Iterable[float]
The column values for the table.
rowValues : Iterable[float]
The row values for the table.
data : Iterable[Iterable[float]]
The 2D data array for the table.
card : DataCard, optional
The data card to add the table to. If not provided, a new card is created.
columnName : str, optional
The name of the column values. Default is "columnValues".
rowName : str, optional
The name of the row values. Default is "rowValues".
Returns
-------
DataCard
The updated data card.
Raises
------
ValueError
If the data cannot be converted to float64 or has invalid dimensions.
TypeError
If the tag is not a string.
AssertionError
If the dimensions of the data, columnValues, or rowValues are invalid.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = [1.0, 2.0]
>>> tag = "data"
>>> columnValues = [1.0, 2.0, 3.0]
>>> rowValues = [4.0, 5.0]
>>> data = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]
>>> opat.add_table(indexVector, tag, columnValues, rowValues, data)
"""
try:
cV = np.array(columnValues, dtype=np.float64)
rV = np.array(rowValues, dtype=np.float64)
d = np.array(data, dtype=np.float64)
except ValueError as e:
raise ValueError(f"Unable to convert data to float64. {e}")
if not isinstance(tag, str):
raise TypeError(f"tag must be a string! Currently it is {type(tag)}")
assert cV.ndim == 1, f"columnValues must be a 1D array! Currently it has {cV.ndim} dimensions"
assert rV.ndim == 1, f"rowValues must be a 1D array! Currently it has {rV.ndim} dimensions"
assert d.ndim == 2 or d.ndim == 3, f"data must be a 2D or 3D array! Currently it has {d.ndim} dimensions"
assert d.shape[1] == len(cV), f"data must have the same number of rows as columnValues! Currently it has {d.shape[0]} rows and {len(cV)} columns"
assert d.shape[0] == len(rV), f"data must have the same number of columns as rowValues! Currently it has {d.shape[1]} columns and {len(rV)} rows"
table = OPATTable(columnValues = cV, rowValues = rV, data = d)
if card == ...:
newCard = DataCard()
else:
newCard = card.copy()
newCard.add_table(tag, table, columnName = columnName, rowName = rowName)
self.add_card(indexVector, newCard)
return newCard.copy()
def __repr__(self) -> str:
"""
Returns a string representation of the OPAT instance.
Returns
-------
str
The string representation of the OPAT instance.
Examples
--------
>>> opat = OPAT()
>>> print(opat)
OPAT(
version: 1
numCards: 0
headerSize: 128
indexOffset: 128
creationDate: 2023-01-01
sourceInfo: example_source
comment: This is a test comment.
numIndex: 2
hashPrecision: 0.01
reserved: None
)
"""
reprString = f"""OPAT(
version: {self.header.version}
numCards: {self.header.numCards}
headerSize: {self.header.headerSize}
indexOffset: {self.header.catalogOffset}
creationDate: {self.header.creationDate}
sourceInfo: {self.header.sourceInfo}
comment: {self.header.comment}
numIndex: {self.header.numIndex}
hashPrecision: {self.header.hashPrecision}
reserved: {self.header.reserved}
)"""
return reprString
[docs]
def save_as_ascii(self, filename: str) -> str:
"""
Saves the OPAT instance as a human-readable ASCII file.
Parameters
----------
filename : str
The name of the file to save.
Returns
-------
str
The name of the saved file.
Examples
--------
>>> opat = OPAT()
>>> filename = "opat_ascii.txt"
>>> opat.save_as_ascii(filename)
>>> print(f"File saved as {filename}")
"""
with open(filename, 'w') as f:
f.write("This is an ASCII representation of an OPAT file, it is not a valid OPAT file in and of itself.\n")
f.write("This file is meant to be human readable and is not meant to be read by a computer.\n")
f.write("The purpose of this file is to provide a human readable representation of the OPAT file which can be used for debugging purposes.\n")
f.write("The full binary specification of the OPAT file can be found in the OPAT file format documentation at:\n")
f.write(" https://github.com/4D-STAR/4DSSE/blob/main/specs/OPAT/OPAT.pdf\n")
f.write("="*35 + " HEADER " + "="*36 + "\n")
f.write(f">> {self.header.magic}\n")
f.write(f">> Version: {self.header.version}\n")
f.write(f">> numCards: {self.header.numCards}\n")
f.write(f">> headerSize (bytes): {self.header.headerSize}\n")
f.write(f">> Card Catalog Offset (bytes): {self.header.catalogOffset}\n")
f.write(f">> Creation Date: {self.header.creationDate}\n")
f.write(f">> Source Info: {self.header.sourceInfo}\n")
f.write(f">> Comment: {self.header.comment}\n")
f.write(f">> numIndex: {self.header.numIndex}\n")
f.write(f">> hashPrecision: {self.header.hashPrecision}\n")
f.write("="*37 + " DATA " + "="*37 + "\n")
f.write("="*80 + "\n")
for card in self.cards.values():
f.write(card.ascii())
f.write("="*36 + " INDEX " + "="*37 + "\n")
indexHeader = ""
for indexID in range(self.header.numIndex):
indexKey = f"Index {indexID}"
indexHeader += f"{indexKey:<8} | "
indexHeader += f"{'Byte Start':<15} {'Byte End':<15} {'Checksum (SHA-256)'}\n"
f.write(indexHeader)
f.write("="*80 + "\n")
for indexID, index in self.catalog.items():
f.write(index.ascii())
[docs]
def save(self, filename: str) -> str:
"""
Saves the OPAT instance as a binary file.
Parameters
----------
filename : str
The name of the file to save.
Returns
-------
str
The name of the saved file.
Raises
------
RuntimeError
If the file cannot be saved.
Examples
--------
>>> opat = OPAT()
>>> filename = "opat_binary.opat"
>>> opat.save(filename)
>>> print(f"File saved as {filename}")
"""
with open(filename, 'wb') as f:
f.write(bytes(self))
if os.path.exists(filename):
return filename
else:
raise RuntimeError(f"Unable to save file {filename}!")
def __bytes__(self) -> bytes:
"""
Converts the OPAT instance to bytes.
Returns
-------
bytes
The byte representation of the OPAT instance.
Examples
--------
>>> opat = OPAT()
>>> byte_data = bytes(opat)
>>> print(byte_data)
"""
outBytes = b""
outBytes += bytes(self.header)
for card in self.cards.values():
outBytes += bytes(card)
for index in self.catalog.values():
outBytes += bytes(index)
return outBytes
def __getitem__(self, key: Tuple[float, ...]):
"""
Retrieves a data card using an index vector.
Parameters
----------
key : Tuple[float, ...]
The index vector to retrieve the data card.
Returns
-------
DataCard
The data card associated with the given index vector.
Raises
------
KeyError
If the index vector is not found in the catalog.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = (1.0, 2.0)
>>> card = DataCard()
>>> opat.add_card(indexVector, card)
>>> retrieved_card = opat[(1.0, 2.0)]
>>> print(retrieved_card)
"""
fiv = FloatVectorIndex(key, hashPrecision=self.header.hashPrecision)
if fiv not in self.catalog:
raise KeyError(f"indexVector {fiv} not found in catalog!")
return self.cards[fiv]
[docs]
def size(self) -> Tuple[int, int]:
"""
Returns the size of the OPAT instance.
The size is defined as the number of indexes per card and the number of cards.
For example, an OPAT file might have a size of (2, 126).
Returns
-------
Tuple[int, int]
A tuple representing the size of the OPAT instance.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector = (1.0, 2.0)
>>> card = DataCard()
>>> opat.add_card(indexVector, card)
>>> print(opat.size())
(2, 1)
"""
return self.header.numIndex, self.header.numCards
@property
def indexVectors(self) -> List[FloatVectorIndex]:
"""
Returns a list of index vectors in the catalog.
This property provides a convenient way to access all index vectors stored in the catalog.
Returns
-------
List[FloatVectorIndex]
A list of index vectors.
Examples
--------
>>> opat = OPAT()
>>> opat.set_numIndex(2)
>>> indexVector1 = (1.0, 2.0)
>>> indexVector2 = (3.0, 4.0)
>>> card1 = DataCard()
>>> card2 = DataCard()
>>> opat.add_card(indexVector1, card1)
>>> opat.add_card(indexVector2, card2)
>>> print(opat.indexVectors)
[FloatVectorIndex((1.0, 2.0)), FloatVectorIndex((3.0, 4.0))]
"""
return list(self.catalog.keys())