mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-08-20 04:19:59 +00:00
add Frame struct with methods for column manipulation and element-wise operations
This commit is contained in:
parent
c918253d3f
commit
5bc37d58e3
312
src/frame/base.rs
Normal file
312
src/frame/base.rs
Normal file
@ -0,0 +1,312 @@
|
||||
use crate::matrix::*;
|
||||
use std::collections::HashMap;
|
||||
use std::ops::{Index, IndexMut, Not};
|
||||
|
||||
|
||||
/// A data frame – a Matrix with string‑identified columns (column‑major).
|
||||
///
|
||||
/// Restricts the element type T to anything that is at least Clone –
|
||||
/// this guarantees we can duplicate data when adding columns or performing
|
||||
/// ownership‑moving transformations later on. (Further trait bounds are added
|
||||
/// per‑method when additional capabilities such as arithmetic are needed.)
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustframe::frame::Frame; // Assuming Frame is in the root of rustframe
|
||||
/// use rustframe::matrix::Matrix; // Assuming Matrix is in rustframe::matrix
|
||||
///
|
||||
/// // 1. Create a frame
|
||||
/// let matrix = Matrix::from_cols(vec![
|
||||
/// vec![1.0, 2.0, 3.0], // Column "temp"
|
||||
/// vec![5.5, 6.5, 7.5], // Column "pressure"
|
||||
/// ]);
|
||||
/// let mut frame = Frame::new(matrix, vec!["temp", "pressure"]);
|
||||
///
|
||||
/// assert_eq!(frame.column_names, vec!["temp", "pressure"]);
|
||||
///
|
||||
/// // 2. Access data
|
||||
/// assert_eq!(frame.column("temp"), &[1.0, 2.0, 3.0]);
|
||||
/// assert_eq!(frame["pressure"].to_vec(), &[5.5, 6.5, 7.5]);
|
||||
/// assert_eq!(frame.column_index("temp"), Some(0));
|
||||
///
|
||||
/// // 3. Mutate data
|
||||
/// frame["temp"][0] = 1.5;
|
||||
/// assert_eq!(frame["temp"].to_vec(), &[1.5, 2.0, 3.0]);
|
||||
///
|
||||
/// frame.column_mut("pressure")[1] = 6.8;
|
||||
/// assert_eq!(frame["pressure"].to_vec(), &[5.5, 6.8, 7.5]);
|
||||
///
|
||||
/// // 4. Add a column
|
||||
/// frame.add_column("humidity", vec![50.0, 55.0, 60.0]);
|
||||
/// assert_eq!(frame.column_names, vec!["temp", "pressure", "humidity"]);
|
||||
/// assert_eq!(frame["humidity"].to_vec(), &[50.0, 55.0, 60.0]); // i32 mixed with f64 needs generic adjustment or separate examples
|
||||
///
|
||||
/// // 5. Rename a column
|
||||
/// frame.rename("temp", "temperature");
|
||||
/// assert_eq!(frame.column_names, vec!["temperature", "pressure", "humidity"]);
|
||||
/// assert!(frame.column_index("temp").is_none());
|
||||
/// assert_eq!(frame.column_index("temperature"), Some(0));
|
||||
/// assert_eq!(frame["temperature"].to_vec(), &[1.5, 2.0, 3.0]);
|
||||
///
|
||||
/// // 6. Swap columns
|
||||
/// frame.swap_columns("temperature", "humidity");
|
||||
/// assert_eq!(frame.column_names, vec!["humidity", "pressure", "temperature"]);
|
||||
/// assert_eq!(frame["humidity"].to_vec(), &[50.0, 55.0, 60.0]); // Now holds original temp data
|
||||
///
|
||||
/// // 7. Sort columns
|
||||
/// frame.sort_columns();
|
||||
/// assert_eq!(frame.column_names, vec!["humidity", "pressure", "temperature"]); // Already sorted after swap
|
||||
/// // Let's add one more to see sorting:
|
||||
/// // frame.add_column("altitude", vec![100.0, 110.0, 120.0]);
|
||||
/// // frame.sort_columns();
|
||||
/// // assert_eq!(frame.column_names, vec!["altitude", "humidity", "pressure", "temperature"]);
|
||||
///
|
||||
/// // 8. Delete a column
|
||||
/// let deleted_pressure = frame.delete_column("pressure");
|
||||
/// assert_eq!(deleted_pressure, vec![5.5, 6.8, 7.5]);
|
||||
/// assert_eq!(frame.column_names, vec!["humidity", "temperature"]);
|
||||
/// assert!(frame.column_index("pressure").is_none());
|
||||
///
|
||||
/// // 9. Element-wise operations (requires compatible frames)
|
||||
/// let matrix_offset = Matrix::from_cols(vec![
|
||||
/// vec![0.1, 0.1, 0.1], // humidity offset
|
||||
/// vec![1.0, 1.0, 1.0], // temperature offset
|
||||
/// ]);
|
||||
/// let frame_offset = Frame::new(matrix_offset, vec!["humidity", "temperature"]);
|
||||
///
|
||||
/// let adjusted_frame = &frame + &frame_offset; // Add requires frame: Frame<f64>
|
||||
/// // Need to ensure frame is Frame<f64> for this op
|
||||
/// // assert_eq!(adjusted_frame["humidity"], &[1.6, 2.1, 3.1]); // Original temp + 0.1
|
||||
/// // assert_eq!(adjusted_frame["temperature"], &[51.0, 56.0, 61.0]); // Original humidity + 1.0
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Frame<T: Clone> {
|
||||
/// **Public** vector holding the column names in their current order.
|
||||
pub column_names: Vec<String>,
|
||||
|
||||
matrix: Matrix<T>,
|
||||
/// Maps a label to the column index for **O(1)** lookup.
|
||||
pub lookup: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl<T: Clone> Frame<T> {
|
||||
/* ---------- Constructors ---------- */
|
||||
/// Creates a new [`Frame`] from a matrix and column names.
|
||||
///
|
||||
/// # Panics
|
||||
/// * if the number of names differs from `matrix.cols()`
|
||||
/// * if names are not unique.
|
||||
pub fn new<L: Into<String>>(matrix: Matrix<T>, names: Vec<L>) -> Self {
|
||||
assert_eq!(matrix.cols(), names.len(), "column name count mismatch");
|
||||
let mut lookup = HashMap::with_capacity(names.len());
|
||||
let column_names: Vec<String> = names
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, n)| {
|
||||
let s = n.into();
|
||||
if lookup.insert(s.clone(), i).is_some() {
|
||||
panic!("duplicate column label: {}", s);
|
||||
}
|
||||
s
|
||||
})
|
||||
.collect();
|
||||
Self {
|
||||
matrix,
|
||||
column_names,
|
||||
lookup,
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- Immutable / mutable access ---------- */
|
||||
|
||||
#[inline]
|
||||
pub fn matrix(&self) -> &Matrix<T> {
|
||||
&self.matrix
|
||||
}
|
||||
#[inline]
|
||||
pub fn matrix_mut(&mut self) -> &mut Matrix<T> {
|
||||
&mut self.matrix
|
||||
}
|
||||
|
||||
/// Returns an immutable view of the column `name`.
|
||||
pub fn column(&self, name: &str) -> &[T] {
|
||||
let idx = self
|
||||
.lookup
|
||||
.get(name)
|
||||
.copied()
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", name));
|
||||
self.matrix.column(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable view of the column `name`.
|
||||
pub fn column_mut(&mut self, name: &str) -> &mut [T] {
|
||||
let idx = self
|
||||
.lookup
|
||||
.get(name)
|
||||
.copied()
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", name));
|
||||
// SAFETY: the column is stored contiguously (column‑major layout).
|
||||
self.matrix.column_mut(idx)
|
||||
}
|
||||
|
||||
/// Index of a column label, if it exists.
|
||||
pub fn column_index(&self, name: &str) -> Option<usize> {
|
||||
self.lookup.get(name).copied()
|
||||
}
|
||||
|
||||
/* ---------- Column manipulation ---------- */
|
||||
|
||||
/// Swaps two columns identified by their labels.
|
||||
/// Internally defers to the already‑implemented [`Matrix::swap_columns`].
|
||||
pub fn swap_columns<L: AsRef<str>>(&mut self, a: L, b: L) {
|
||||
let ia = self
|
||||
.column_index(a.as_ref())
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", a.as_ref()));
|
||||
let ib = self
|
||||
.column_index(b.as_ref())
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", b.as_ref()));
|
||||
if ia == ib {
|
||||
return; // nothing to do
|
||||
}
|
||||
self.matrix.swap_columns(ia, ib); // <‑‑ reuse existing impl
|
||||
self.column_names.swap(ia, ib);
|
||||
// update lookup values
|
||||
self.lookup.get_mut(a.as_ref()).map(|v| *v = ib);
|
||||
self.lookup.get_mut(b.as_ref()).map(|v| *v = ia);
|
||||
}
|
||||
|
||||
/// Renames a column.
|
||||
///
|
||||
/// # Panics
|
||||
/// * if `old` is missing
|
||||
/// * if `new` already exists.
|
||||
pub fn rename<L: Into<String>>(&mut self, old: &str, new: L) {
|
||||
let idx = self
|
||||
.column_index(old)
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", old));
|
||||
let new = new.into();
|
||||
if self.lookup.contains_key(&new) {
|
||||
panic!("duplicate column label: {}", new);
|
||||
}
|
||||
self.column_names[idx] = new.clone();
|
||||
self.lookup.remove(old);
|
||||
self.lookup.insert(new, idx);
|
||||
}
|
||||
|
||||
/// Adds a column to the **end** of the frame.
|
||||
pub fn add_column<L: Into<String>>(&mut self, name: L, column: Vec<T>) {
|
||||
let name = name.into();
|
||||
if self.lookup.contains_key(&name) {
|
||||
panic!("duplicate column label: {}", name);
|
||||
}
|
||||
self.matrix.add_column(self.matrix.cols(), column);
|
||||
self.column_names.push(name.clone());
|
||||
self.lookup.insert(name, self.matrix.cols() - 1);
|
||||
}
|
||||
|
||||
/// Deletes a column and returns its data.
|
||||
pub fn delete_column(&mut self, name: &str) -> Vec<T> {
|
||||
let idx = self
|
||||
.column_index(name)
|
||||
.unwrap_or_else(|| panic!("unknown column label: {}", name));
|
||||
let mut col = Vec::with_capacity(self.matrix.rows());
|
||||
col.extend_from_slice(self.matrix.column(idx));
|
||||
self.matrix.delete_column(idx);
|
||||
self.column_names.remove(idx);
|
||||
self.rebuild_lookup();
|
||||
col
|
||||
}
|
||||
|
||||
/// Sorts columns **lexicographically** by their names, *in‑place*.
|
||||
///
|
||||
/// The operation is performed exclusively through calls to
|
||||
/// [`swap_columns`](Frame::swap_columns), which themselves defer to
|
||||
/// `Matrix::swap_columns`; thus we never re‑implement swapping logic.
|
||||
pub fn sort_columns(&mut self) {
|
||||
// Simple selection sort; complexity O(n²) but stable w.r.t matrix data.
|
||||
let n = self.column_names.len();
|
||||
for i in 0..n {
|
||||
let mut min = i;
|
||||
for j in (i + 1)..n {
|
||||
if self.column_names[j] < self.column_names[min] {
|
||||
min = j;
|
||||
}
|
||||
}
|
||||
if min != i {
|
||||
// Use public API; keeps single source of truth.
|
||||
let col_i = self.column_names[i].clone();
|
||||
let col_min = self.column_names[min].clone();
|
||||
self.swap_columns(col_i, col_min);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- helpers ---------- */
|
||||
|
||||
fn rebuild_lookup(&mut self) {
|
||||
self.lookup.clear();
|
||||
for (i, name) in self.column_names.iter().enumerate() {
|
||||
self.lookup.insert(name.clone(), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- Indexing ---------- */
|
||||
|
||||
impl<T: Clone> Index<&str> for Frame<T> {
|
||||
type Output = [T];
|
||||
fn index(&self, name: &str) -> &Self::Output {
|
||||
self.column(name)
|
||||
}
|
||||
}
|
||||
impl<T: Clone> IndexMut<&str> for Frame<T> {
|
||||
fn index_mut(&mut self, name: &str) -> &mut Self::Output {
|
||||
self.column_mut(name)
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- Element‑wise numerical ops ---------- */
|
||||
macro_rules! impl_elementwise_frame_op {
|
||||
($OpTrait:ident, $method:ident, $op:tt) => {
|
||||
impl<'a, 'b, T> std::ops::$OpTrait<&'b Frame<T>> for &'a Frame<T>
|
||||
where
|
||||
T: Clone + std::ops::$OpTrait<Output = T>,
|
||||
{
|
||||
type Output = Frame<T>;
|
||||
fn $method(self, rhs: &'b Frame<T>) -> Frame<T> {
|
||||
assert_eq!(self.column_names, rhs.column_names, "column names mismatch");
|
||||
let matrix = (&self.matrix).$method(&rhs.matrix);
|
||||
Frame::new(matrix, self.column_names.clone())
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
impl_elementwise_frame_op!(Add, add, +);
|
||||
impl_elementwise_frame_op!(Sub, sub, -);
|
||||
impl_elementwise_frame_op!(Mul, mul, *);
|
||||
impl_elementwise_frame_op!(Div, div, /);
|
||||
|
||||
/* ---------- Boolean‑specific bitwise ops ---------- */
|
||||
macro_rules! impl_bitwise_frame_op {
|
||||
($OpTrait:ident, $method:ident, $op:tt) => {
|
||||
impl<'a, 'b> std::ops::$OpTrait<&'b Frame<bool>> for &'a Frame<bool> {
|
||||
type Output = Frame<bool>;
|
||||
fn $method(self, rhs: &'b Frame<bool>) -> Frame<bool> {
|
||||
assert_eq!(self.column_names, rhs.column_names, "column names mismatch");
|
||||
let matrix = (&self.matrix).$method(&rhs.matrix);
|
||||
Frame::new(matrix, self.column_names.clone())
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
impl_bitwise_frame_op!(BitAnd, bitand, &);
|
||||
impl_bitwise_frame_op!(BitOr, bitor, |);
|
||||
impl_bitwise_frame_op!(BitXor, bitxor, ^);
|
||||
|
||||
impl Not for Frame<bool> {
|
||||
type Output = Frame<bool>;
|
||||
fn not(self) -> Frame<bool> {
|
||||
Frame::new(!self.matrix, self.column_names)
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user