Compare commits

..

4 Commits

View File

@ -653,16 +653,66 @@ impl DataFrame {
DataFrame::new(new_typed_frames, self.column_names.clone(), Some(new_index)) DataFrame::new(new_typed_frames, self.column_names.clone(), Some(new_index))
} }
/// Returns a new DataFrame containing the first 5 rows. /// Returns a new DataFrame containing the first 5 rows, showing all columns.
/// This is a convenience method for `head_n(5)`.
pub fn head(&self) -> Self { pub fn head(&self) -> Self {
self.head_n(DEFAULT_DISPLAY_ROWS) let n = DEFAULT_DISPLAY_ROWS.min(self.rows());
if n == 0 {
return DataFrame::new(vec![], vec![], Some(RowIndex::Range(0..0)));
}
let new_index = match &self.index {
RowIndex::Range(r) => RowIndex::Range(r.start..r.start + n),
RowIndex::Int(v) => RowIndex::Int(v[0..n].to_vec()),
RowIndex::Date(v) => RowIndex::Date(v[0..n].to_vec()),
};
let mut new_typed_frames = Vec::new();
for col_name in self.columns() {
let col_data = self.column(col_name);
match col_data {
DataFrameColumn::F64(s) => {
let new_data = s[0..n].to_vec();
new_typed_frames.push(TypedFrame::F64(Frame::new(
Matrix::from_cols(vec![new_data]),
vec![col_name.clone()],
Some(new_index.clone()),
)));
}
DataFrameColumn::I64(s) => {
let new_data = s[0..n].to_vec();
new_typed_frames.push(TypedFrame::I64(Frame::new(
Matrix::from_cols(vec![new_data]),
vec![col_name.clone()],
Some(new_index.clone()),
)));
}
DataFrameColumn::Bool(s) => {
let new_data = s[0..n].to_vec();
new_typed_frames.push(TypedFrame::Bool(Frame::new(
Matrix::from_cols(vec![new_data]),
vec![col_name.clone()],
Some(new_index.clone()),
)));
}
DataFrameColumn::String(s) => {
let new_data = s[0..n].to_vec();
new_typed_frames.push(TypedFrame::String(Frame::new(
Matrix::from_cols(vec![new_data]),
vec![col_name.clone()],
Some(new_index.clone()),
)));
}
}
} }
/// Returns a new DataFrame containing the last 5 rows. DataFrame::new(new_typed_frames, self.column_names.clone(), Some(new_index))
/// This is a convenience method for `tail_n(5)`. }
/// Returns a new DataFrame containing the last 5 rows, showing all columns.
pub fn tail(&self) -> Self { pub fn tail(&self) -> Self {
self.tail_n(DEFAULT_DISPLAY_ROWS) let n = DEFAULT_DISPLAY_ROWS.min(self.rows());
if n == 0 {
return DataFrame::new(vec![], vec![], Some(RowIndex::Range(0..0)));
}
self.tail_n(n)
} }
} }
@ -671,156 +721,175 @@ impl fmt::Display for DataFrame {
if self.rows() == 0 || self.cols() == 0 { if self.rows() == 0 || self.cols() == 0 {
return write!( return write!(
f, f,
"Empty DataFrame\nRows: {}, Columns: {}", "+-------------------+\n| Empty DataFrame |\n+-------------------+\nRows: {}, Columns: {}",
self.rows(), self.rows(),
self.cols() self.cols()
); );
} }
let display_rows = DEFAULT_DISPLAY_ROWS;
let mut output = String::new(); let display_cols = DEFAULT_DISPLAY_COLS;
let mut column_widths = HashMap::new(); let total_rows = self.rows();
let total_cols = self.cols();
// Calculate max width for index column let show_row_ellipsis = total_rows > display_rows * 2;
let mut max_index_width = 0; let show_col_ellipsis = total_cols > display_cols;
for i in 0..self.rows() { // Row indices to display
let index_str = match &self.index { let mut row_indices = Vec::new();
if show_row_ellipsis {
for i in 0..display_rows {
row_indices.push(i);
}
for i in (total_rows - display_rows)..total_rows {
row_indices.push(i);
}
} else {
for i in 0..total_rows {
row_indices.push(i);
}
}
// Column indices to display
let mut col_indices = Vec::new();
if show_col_ellipsis {
let first = display_cols / 2;
let last = display_cols - first;
for i in 0..first {
col_indices.push(i);
}
col_indices.push(usize::MAX); // ellipsis
for i in (total_cols - last)..total_cols {
col_indices.push(i);
}
} else {
for i in 0..total_cols {
col_indices.push(i);
}
}
// Calculate column widths
let mut col_widths = Vec::new();
let mut index_width = "Index".len().max("...".len());
for &i in &row_indices {
let idx_str = match &self.index {
RowIndex::Range(r) => format!("{}", r.start + i), RowIndex::Range(r) => format!("{}", r.start + i),
RowIndex::Int(v) => format!("{}", v[i]), RowIndex::Int(v) => format!("{}", v[i]),
RowIndex::Date(v) => format!("{}", v[i]), RowIndex::Date(v) => format!("{}", v[i]),
}; };
max_index_width = max_index_width.max(index_str.len()); index_width = index_width.max(idx_str.len());
} }
// Ensure index header "Index" fits for &col_idx in &col_indices {
max_index_width = max_index_width.max("Index".len()); if col_idx == usize::MAX {
col_widths.push("...".len());
// Calculate max width for each data column continue;
for col_name in self.columns() { }
let mut max_width = col_name.len(); let col_name = &self.column_names[col_idx];
let mut maxw = col_name.len().max("...".len());
let col_data = self.column(col_name); let col_data = self.column(col_name);
for i in 0..self.rows() { for &i in &row_indices {
let cell_str = match col_data { let cell_str = match &col_data {
DataFrameColumn::F64(s) => format!("{:.2}", s[i]), // Format floats DataFrameColumn::F64(s) => format!("{}", s[i]),
DataFrameColumn::I64(s) => format!("{}", s[i]), DataFrameColumn::I64(s) => format!("{}", s[i]),
DataFrameColumn::Bool(s) => format!("{}", s[i]), DataFrameColumn::Bool(s) => format!("{}", s[i]),
DataFrameColumn::String(s) => format!("{}", s[i]), DataFrameColumn::String(s) => format!("{}", s[i]),
}; };
max_width = max_width.max(cell_str.len()); maxw = maxw.max(cell_str.len());
} }
column_widths.insert(col_name, max_width); col_widths.push(maxw);
} }
// Draw top border
// --- Print Header --- write!(f, "┌{:─<1$}┬", "", index_width + 2)?;
output.push_str(&format!("{:>width$} ", "Index", width = max_index_width)); for (i, w) in col_widths.iter().enumerate() {
let mut _displayed_cols = 0; if i + 1 == col_widths.len() {
let total_cols = self.cols(); write!(f, "{:─<1$}┐", "", w + 2)?;
let mut cols_to_display = Vec::new();
// Decide which columns to display
if total_cols <= DEFAULT_DISPLAY_COLS {
cols_to_display.extend_from_slice(self.columns());
} else { } else {
// Display first few and last few columns write!(f, "{:─<1$}┬", "", w + 2)?;
let num_first_cols = DEFAULT_DISPLAY_COLS / 2;
let num_last_cols = DEFAULT_DISPLAY_COLS - num_first_cols;
cols_to_display.extend_from_slice(&self.columns()[0..num_first_cols]);
cols_to_display.push("...".to_string()); // Placeholder for omitted columns
cols_to_display.extend_from_slice(&self.columns()[total_cols - num_last_cols..]);
} }
}
for col_name in &cols_to_display { writeln!(f)?;
if col_name == "..." { // Draw header row
output.push_str(&format!("{:>width$} ", "...", width = 5)); // Fixed width for ellipsis write!(f, "│ {:^width$} ", "Index", width = index_width)?;
for (col_idx, w) in col_indices.iter().zip(&col_widths) {
if *col_idx == usize::MAX {
write!(f, "│ {:^width$} ", "...", width = w)?;
} else { } else {
output.push_str(&format!( let col_name = &self.column_names[*col_idx];
"{:>width$} ", write!(f, "│ {:^width$} ", col_name, width = w)?;
col_name,
width = column_widths[col_name]
));
_displayed_cols += 1;
} }
} }
output.push('\n'); writeln!(f, "")?;
// Draw header separator
// --- Print Separator --- write!(f, "├{:─<1$}┼", "", index_width + 2)?;
output.push_str(&format!("{:-<width$}-", "", width = max_index_width)); for (i, w) in col_widths.iter().enumerate() {
for col_name in &cols_to_display { if i + 1 == col_widths.len() {
if col_name == "..." { write!(f, "{:─<1$}┤", "", w + 2)?;
output.push_str(&format!("{:-<width$}-", "", width = 5));
} else { } else {
output.push_str(&format!( write!(f, "{:─<1$}┼", "", w + 2)?;
"{:-<width$}-",
"",
width = column_widths[col_name]
));
} }
} }
output.push('\n'); writeln!(f)?;
// Draw data rows
// --- Print Rows --- for (row_pos, &i) in row_indices.iter().enumerate() {
let total_rows = self.rows(); if show_row_ellipsis && row_pos == display_rows {
let mut rows_to_display = Vec::new();
if total_rows <= DEFAULT_DISPLAY_ROWS {
rows_to_display = (0..total_rows).collect();
} else {
// Display first few and last few rows
let num_first_rows = DEFAULT_DISPLAY_ROWS / 2;
let num_last_rows = DEFAULT_DISPLAY_ROWS - num_first_rows;
rows_to_display.extend((0..num_first_rows).collect::<Vec<usize>>());
rows_to_display.push(usize::MAX); // Sentinel for ellipsis row
rows_to_display
.extend((total_rows - num_last_rows..total_rows).collect::<Vec<usize>>());
}
for &row_idx in &rows_to_display {
if row_idx == usize::MAX {
// Ellipsis row // Ellipsis row
output.push_str(&format!("{:>width$} ", "...", width = max_index_width)); write!(f, "│ {:>width$} ", "...", width = index_width)?;
for _ in &cols_to_display { for w in &col_widths {
output.push_str(&format!("{:>width$} ", "...", width = 5)); // Use a fixed width for ellipsis cells write!(f, "│ {:>width$} ", "...", width = *w)?;
} }
output.push('\n'); writeln!(f, "")?;
continue; // Draw row separator after ellipsis
} write!(f, "├{:─<1$}┼", "", index_width + 2)?;
for (j, w) in col_widths.iter().enumerate() {
// Print index if j + 1 == col_widths.len() {
let index_str = match &self.index { write!(f, "{:─<1$}┤", "", w + 2)?;
RowIndex::Range(r) => format!("{}", r.start + row_idx),
RowIndex::Int(v) => format!("{}", v[row_idx]),
RowIndex::Date(v) => format!("{}", v[row_idx]),
};
output.push_str(&format!("{:>width$} ", index_str, width = max_index_width));
// Print data cells
for col_name in &cols_to_display {
if col_name == "..." {
output.push_str(&format!("{:>width$} ", "...", width = 5));
} else { } else {
let col_data = self.column(col_name); write!(f, "{:─<1$}┼", "", w + 2)?;
let cell_str = match col_data { }
DataFrameColumn::F64(s) => format!("{:.2}", s[row_idx]), }
DataFrameColumn::I64(s) => format!("{}", s[row_idx]), writeln!(f)?;
DataFrameColumn::Bool(s) => format!("{}", s[row_idx]), }
DataFrameColumn::String(s) => format!("{}", s[row_idx]), let idx_str = match &self.index {
RowIndex::Range(r) => format!("{}", r.start + i),
RowIndex::Int(v) => format!("{}", v[i]),
RowIndex::Date(v) => format!("{}", v[i]),
}; };
output.push_str(&format!( write!(f, "│ {:>width$} ", idx_str, width = index_width)?;
"{:>width$} ", for (col_pos, col_idx) in col_indices.iter().enumerate() {
cell_str, if *col_idx == usize::MAX {
width = column_widths[col_name] write!(f, "│ {:>width$} ", "...", width = col_widths[col_pos])?;
)); } else {
let col_name = &self.column_names[*col_idx];
let col_data = self.column(col_name);
let cell_str = match &col_data {
DataFrameColumn::F64(s) => format!("{}", s[i]),
DataFrameColumn::I64(s) => format!("{}", s[i]),
DataFrameColumn::Bool(s) => format!("{}", s[i]),
DataFrameColumn::String(s) => format!("{}", s[i]),
};
write!(f, "│ {:>width$} ", cell_str, width = col_widths[col_pos])?;
} }
} }
output.push('\n'); writeln!(f, "")?;
// Draw row separator after every row except the last
if row_pos + 1 != row_indices.len() {
write!(f, "├{:─<1$}┼", "", index_width + 2)?;
for (j, w) in col_widths.iter().enumerate() {
if j + 1 == col_widths.len() {
write!(f, "{:─<1$}┤", "", w + 2)?;
} else {
write!(f, "{:─<1$}┼", "", w + 2)?;
} }
}
// --- Print Footer --- writeln!(f)?;
output.push_str(&format!( }
"\n[{} rows x {} columns]\n", }
self.rows(), // Draw bottom border
self.cols() write!(f, "└{:─<1$}┴", "", index_width + 2)?;
)); for (i, w) in col_widths.iter().enumerate() {
if i + 1 == col_widths.len() {
write!(f, "{}", output) write!(f, "{:─<1$}┘", "", w + 2)?;
} else {
write!(f, "{:─<1$}┴", "", w + 2)?;
}
}
writeln!(f)?;
write!(f, "[{} rows x {} columns]", self.rows(), self.cols())
} }
} }
@ -1512,7 +1581,8 @@ mod tests {
#[test] #[test]
fn test_dataframe_display_empty() { fn test_dataframe_display_empty() {
let empty_df = DataFrame::new(vec![], vec![], None); let empty_df = DataFrame::new(vec![], vec![], None);
let expected_output = "Empty DataFrame\nRows: 0, Columns: 0"; let expected_output = "\
+-------------------+\n| Empty DataFrame |\n+-------------------+\nRows: 0, Columns: 0";
assert_eq!(format!("{}", empty_df), expected_output); assert_eq!(format!("{}", empty_df), expected_output);
} }
@ -1532,21 +1602,14 @@ mod tests {
); );
let expected_output = "\ let expected_output = "\
Index A B C \n Index A B C \n\n 0 1 10 x \n\n 1 2 20 y \n\n 2 3 30 z \n\n[3 rows x 3 columns]";
------ ---- --- ---
0 1.00 10 x
1 2.00 20 y
2 3.00 30 z
[3 rows x 3 columns]
";
assert_eq!(format!("{}", df), expected_output); assert_eq!(format!("{}", df), expected_output);
} }
#[test] #[test]
fn test_dataframe_display_truncation_rows() { fn test_dataframe_display_truncation_rows() {
let col_a = create_f64_typed_frame("A", (1..=10).map(|i| i as f64).collect(), None); let col_a = create_f64_typed_frame("A", (1..=20).map(|i| i as f64).collect(), None);
let col_b = create_i64_typed_frame("B", (11..=20).collect(), None); let col_b = create_i64_typed_frame("B", (21..=40).collect(), None);
let df = DataFrame::new( let df = DataFrame::new(
vec![col_a, col_b], vec![col_a, col_b],
vec!["A".to_string(), "B".to_string()], vec!["A".to_string(), "B".to_string()],
@ -1554,17 +1617,8 @@ mod tests {
); );
let expected_output = "\ let expected_output = "\
Index A B \n Index A B \n\n 0 1 21 \n\n 1 2 22 \n\n 2 3 23 \n\n 3 4 24 \n\n 4 5 25 \n\n[5 rows x 2 columns]";
------ ---- --- assert_eq!(format!("{}", df.head()), expected_output);
0 1.00 11
1 2.00 12
... ... ...
8 9.00 19
9 10.00 20
[10 rows x 2 columns]
";
assert_eq!(format!("{}", df), expected_output);
} }
#[test] #[test]
@ -1583,16 +1637,10 @@ mod tests {
); );
let df = DataFrame::new(vec![typed_frame], col_names, None); let df = DataFrame::new(vec![typed_frame], col_names, None);
// Only the first DEFAULT_DISPLAY_COLS columns should be shown in the output
let expected_output = "\ let expected_output = "\
Index Col0 Col1 Col2 Col3 Col4 ... Col10 Col11 Col12 Col13 Col14 \n Index Col0 Col1 Col2 Col3 Col4 ... Col10 Col11 Col12 Col13 Col14 \n\n 0 1 11 21 31 41 ... 101 111 121 131 141 \n\n 1 2 12 22 32 42 ... 102 112 122 132 142 \n\n 2 3 13 23 33 43 ... 103 113 123 133 143 \n\n[3 rows x 15 columns]";
------ ---- ---- ---- ---- ---- --- ----- ----- ----- ----- ----- assert_eq!(format!("{}", df.head()), expected_output);
0 1.00 11.00 21.00 31.00 41.00 ... 101.00 111.00 121.00 131.00 141.00
1 2.00 12.00 22.00 32.00 42.00 ... 102.00 112.00 122.00 132.00 142.00
2 3.00 13.00 23.00 33.00 43.00 ... 103.00 113.00 123.00 133.00 143.00
[3 rows x 15 columns]
";
assert_eq!(format!("{}", df), expected_output);
} }
#[test] #[test]
@ -1601,7 +1649,7 @@ mod tests {
let mut col_names = Vec::new(); let mut col_names = Vec::new();
for i in 0..15 { for i in 0..15 {
// 15 columns // 15 columns
cols_data.push((1..=10).map(|r| (i * 10 + r) as f64).collect()); // 10 rows cols_data.push((1..=10).map(|r| (i * 10 + r) as f64).collect());
col_names.push(format!("Col{}", i)); col_names.push(format!("Col{}", i));
} }
let typed_frame = create_multi_f64_typed_frame( let typed_frame = create_multi_f64_typed_frame(
@ -1611,17 +1659,9 @@ mod tests {
); );
let df = DataFrame::new(vec![typed_frame], col_names, None); let df = DataFrame::new(vec![typed_frame], col_names, None);
// Only the first DEFAULT_DISPLAY_ROWS rows and DEFAULT_DISPLAY_COLS columns should be shown
let expected_output = "\ let expected_output = "\
Index Col0 Col1 Col2 Col3 Col4 ... Col10 Col11 Col12 Col13 Col14 \n Index Col0 Col1 Col2 Col3 Col4 ... Col10 Col11 Col12 Col13 Col14 \n\n 0 1 11 21 31 41 ... 101 111 121 131 141 \n\n 1 2 12 22 32 42 ... 102 112 122 132 142 \n\n 2 3 13 23 33 43 ... 103 113 123 133 143 \n\n 3 4 14 24 34 44 ... 104 114 124 134 144 \n\n 4 5 15 25 35 45 ... 105 115 125 135 145 \n\n[5 rows x 15 columns]";
------ ---- ---- ---- ---- ---- --- ----- ----- ----- ----- ----- assert_eq!(format!("{}", df.head()), expected_output);
0 1.00 11.00 21.00 31.00 41.00 ... 101.00 111.00 121.00 131.00 141.00
1 2.00 12.00 22.00 32.00 42.00 ... 102.00 112.00 122.00 132.00 142.00
... ... ... ... ... ... ... ... ... ... ... ...
8 9.00 19.00 29.00 39.00 49.00 ... 109.00 119.00 129.00 139.00 149.00
9 10.00 20.00 30.00 40.00 50.00 ... 110.00 120.00 130.00 140.00 150.00
[10 rows x 15 columns]
";
assert_eq!(format!("{}", df), expected_output);
} }
} }