Skip to content

Commit

Permalink
Starting to refactor Storage and Operations (#9076)
Browse files Browse the repository at this point in the history
Cleaning up some of the structures in Storage before working on UnaryOperations.

- Removed some legacy code: `countMask`, `Index` and `DefaultIndex`.
- Renamed `mask` to `applyFilter` on `Column` and `Storage`.
- Renamed `Table.mask` to `Table.filter`.
  • Loading branch information
jdunkerley authored Feb 15, 2024
1 parent d29c2cd commit f2d2f73
Show file tree
Hide file tree
Showing 19 changed files with 67 additions and 439 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Polyglot_Helpers
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Metadata.Widget import Numeric_Input
from Standard.Base.Widget_Helpers import make_format_chooser, make_regex_text_widget

import project.Data.Constants.Previous_Value
Expand Down Expand Up @@ -144,16 +145,15 @@ type Column
display : Integer -> Boolean -> Text
display self show_rows=10 format_terminal=False =
java_col = self.java_column
index = java_col.getIndex
col_name = normalize_string_for_display java_col.getName
storage = java_col.getStorage
num_rows = java_col.getSize
display_rows = num_rows.min show_rows
items = Vector.new display_rows num->
row = if storage.isNa num then "Nothing" else
get_item_string storage num
[index.ilocString num, row]
table = print_table [index.getName, col_name] items 1 format_terminal
[num.to_text, row]
table = print_table ["", col_name] items 1 format_terminal
if num_rows - display_rows <= 0 then table else
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
table + missing
Expand Down Expand Up @@ -2093,6 +2093,7 @@ type Column
import Standard.Examples

example_at = Examples.integer_column.at 0
@index (self-> Numeric_Input minimum=0 maximum=self.length-1)
at : Integer -> (Any | Nothing) ! Index_Out_Of_Bounds
at self (index : Integer) =
self.get index (Error.throw (Index_Out_Of_Bounds.Error index self.length))
Expand All @@ -2111,6 +2112,7 @@ type Column
import Standard.Examples

example_at = Examples.integer_column.get 0 -1
@index (self-> Numeric_Input minimum=0 maximum=self.length-1)
get : Integer -> Any -> Any | Nothing
get self (index : Integer) (~default=Nothing) =
valid_index = (index >= 0) && (index < self.length)
Expand Down
7 changes: 3 additions & 4 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,14 @@ type Table
display : Integer -> Boolean -> Text
display self show_rows=10 format_terminal=False =
cols = Vector.from_polyglot_array self.java_table.getColumns
index = self.java_table.getIndex
col_names = ([index.getName] + cols.map .getName) . map normalize_string_for_display
col_names = ([""] + cols.map .getName) . map normalize_string_for_display
col_vals = cols.map .getStorage
num_rows = self.row_count
display_rows = num_rows.min show_rows
rows = Vector.new display_rows row_num->
cols = col_vals.map col->
if col.isNa row_num then "Nothing" else get_item_string col row_num
[index.ilocString row_num] + cols
[row_num.to_text] + cols
table = print_table col_names rows 1 format_terminal
if num_rows - display_rows <= 0 then table else
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
Expand Down Expand Up @@ -1419,7 +1418,7 @@ type Table
filter : (Column | Text | Integer) -> (Filter_Condition | (Any -> Boolean)) -> Problem_Behavior -> Table ! No_Such_Column | Index_Out_Of_Bounds | Invalid_Value_Type
filter self column (filter : Filter_Condition | (Any -> Boolean) = Filter_Condition.Equal True) on_problems=Report_Warning = case column of
_ : Column ->
mask filter_column = Table.Value (self.java_table.mask filter_column.java_column)
mask filter_column = Table.Value (self.java_table.filter filter_column.java_column)
case filter of
_ : Filter_Condition ->
resolved = (self:Table_Ref).resolve_condition filter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,7 @@ fan_out_to_rows_and_columns table input_column_id function column_names at_least
new_columns_unflattened = table.columns.map column->
# Replace the input column with the output columns.
if column.name == input_column_id then output_columns else
# Build a new column from the old one with the mask
old_storage = column.java_column.getStorage
new_storage = old_storage.applyMask order_mask
[Column.from_storage column.name new_storage]
[Column.Value (column.java_column.applyMask order_mask)]
new_table = Table.new new_columns_unflattened.flatten
problem_builder.attach_problems_after on_problems new_table

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.enso.table.data.column.operation.map.bool.BooleanIsInOp;
import org.enso.table.data.column.storage.type.BooleanType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.enso.table.error.UnexpectedColumnTypeException;
Expand Down Expand Up @@ -177,13 +176,13 @@ public Storage<?> fillMissingFromPrevious(BoolStorage missingIndicator) {
}

@Override
public BoolStorage mask(BitSet mask, int cardinality) {
public BoolStorage applyFilter(BitSet filterMask, int newLength) {
Context context = Context.getCurrent();
BitSet newMissing = new BitSet();
BitSet newValues = new BitSet();
int resultIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (filterMask.get(i)) {
if (isMissing.get(i)) {
newMissing.set(resultIx++);
} else if (values.get(i)) {
Expand All @@ -197,7 +196,7 @@ public BoolStorage mask(BitSet mask, int cardinality) {

context.safepoint();
}
return new BoolStorage(newValues, newMissing, cardinality, negated);
return new BoolStorage(newValues, newMissing, newLength, negated);
}

@Override
Expand All @@ -207,7 +206,7 @@ public BoolStorage applyMask(OrderMask mask) {
BitSet newVals = new BitSet();
for (int i = 0; i < mask.length(); i++) {
int position = mask.get(i);
if (position == Index.NOT_FOUND || isMissing.get(position)) {
if (position == Storage.NOT_FOUND_INDEX || isMissing.get(position)) {
newNa.set(i);
} else if (values.get(position)) {
newVals.set(i);
Expand All @@ -218,25 +217,6 @@ public BoolStorage applyMask(OrderMask mask) {
return new BoolStorage(newVals, newNa, mask.length(), negated);
}

@Override
public BoolStorage countMask(int[] counts, int total) {
Context context = Context.getCurrent();
BitSet newNa = new BitSet();
BitSet newVals = new BitSet();
int pos = 0;
for (int i = 0; i < counts.length; i++) {
if (isMissing.get(i)) {
newNa.set(pos, pos + counts[i]);
} else if (values.get(i)) {
newVals.set(pos, pos + counts[i]);
}
pos += counts[i];

context.safepoint();
}
return new BoolStorage(newVals, newNa, total, negated);
}

public boolean isNegated() {
return negated;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ public Storage<?> fillMissingFromPrevious(BoolStorage missingIndicator) {
}

@Override
public Storage<Object> mask(BitSet mask, int cardinality) {
Storage<?> newStorage = underlyingStorage.mask(mask, cardinality);
public Storage<Object> applyFilter(BitSet filterMask, int newLength) {
Storage<?> newStorage = underlyingStorage.applyFilter(filterMask, newLength);
return new MixedStorageFacade(newStorage);
}

Expand All @@ -102,12 +102,6 @@ public Storage<Object> applyMask(OrderMask mask) {
return new MixedStorageFacade(newStorage);
}

@Override
public Storage<Object> countMask(int[] counts, int total) {
Storage<?> newStorage = underlyingStorage.countMask(counts, total);
return new MixedStorageFacade(newStorage);
}

@Override
public Storage<Object> slice(int offset, int limit) {
Storage<?> newStorage = underlyingStorage.slice(offset, limit);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Context;
Expand Down Expand Up @@ -110,18 +109,18 @@ public Storage<?> runVectorizedZip(
}

@Override
public SpecializedStorage<T> mask(BitSet mask, int cardinality) {
public SpecializedStorage<T> applyFilter(BitSet filterMask, int newLength) {
Context context = Context.getCurrent();
T[] newData = newUnderlyingArray(cardinality);
T[] newData = newUnderlyingArray(newLength);
int resIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (filterMask.get(i)) {
newData[resIx++] = data[i];
}

context.safepoint();
}
return newInstance(newData, cardinality);
return newInstance(newData, newLength);
}

@Override
Expand All @@ -130,26 +129,12 @@ public SpecializedStorage<T> applyMask(OrderMask mask) {
T[] newData = newUnderlyingArray(mask.length());
for (int i = 0; i < mask.length(); i++) {
int position = mask.get(i);
newData[i] = position == Index.NOT_FOUND ? null : data[position];
newData[i] = position == Storage.NOT_FOUND_INDEX ? null : data[position];
context.safepoint();
}
return newInstance(newData, newData.length);
}

@Override
public SpecializedStorage<T> countMask(int[] counts, int total) {
Context context = Context.getCurrent();
T[] newData = newUnderlyingArray(total);
int pos = 0;
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[i]; j++) {
newData[pos++] = data[i];
context.safepoint();
}
}
return newInstance(newData, total);
}

public T[] getData() {
return data;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

/** An abstract representation of a data column. */
public abstract class Storage<T> {
/** A constant representing the index of a missing value in a column. */
public static final int NOT_FOUND_INDEX = -1;

/**
* @return the number of elements in this column (including NAs)
*/
Expand Down Expand Up @@ -472,11 +475,11 @@ public Storage<?> fillMissingFrom(
/**
* Return a new storage, containing only the items marked true in the mask.
*
* @param mask the mask to use
* @param cardinality the number of true values in mask
* @return a new storage, masked with the given mask
* @param filterMask the mask to use
* @param newLength the number of true values in mask
* @return a new storage, filtered with the given mask
*/
public abstract Storage<T> mask(BitSet mask, int cardinality);
public abstract Storage<T> applyFilter(BitSet filterMask, int newLength);

/**
* Returns a new storage, ordered according to the rules specified in a mask.
Expand All @@ -485,19 +488,6 @@ public Storage<?> fillMissingFrom(
*/
public abstract Storage<T> applyMask(OrderMask mask);

/**
* Returns a new storage, resulting from applying the rules specified in a mask. The resulting
* storage should contain the elements of the original storage, in the same order. However, the
* number of consecutive copies of the i-th element of the original storage should be {@code
* counts[i]}.
*
* @param counts the mask specifying elements duplication
* @param total the sum of all elements in the mask, also interpreted as the length of the
* resulting storage
* @return the storage masked according to the specified rules
*/
public abstract Storage<T> countMask(int[] counts, int total);

/**
* @return a copy of the storage containing a slice of the original data
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.List;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Context;
Expand Down Expand Up @@ -63,19 +62,19 @@ public BitSet getIsMissing() {
}

@Override
public Storage<Long> mask(BitSet mask, int cardinality) {
public Storage<Long> applyFilter(BitSet filterMask, int newLength) {
BitSet newMissing = new BitSet();
long[] newData = new long[cardinality];
long[] newData = new long[newLength];
int resIx = 0;
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (filterMask.get(i)) {
newData[resIx++] = getItem(i);
}

context.safepoint();
}
return new LongStorage(newData, cardinality, newMissing, getType());
return new LongStorage(newData, newLength, newMissing, getType());
}

@Override
Expand All @@ -85,7 +84,7 @@ public Storage<Long> applyMask(OrderMask mask) {
Context context = Context.getCurrent();
for (int i = 0; i < mask.length(); i++) {
int position = mask.get(i);
if (position == Index.NOT_FOUND) {
if (position == Storage.NOT_FOUND_INDEX) {
newMissing.set(i);
} else {
newData[i] = getItem(position);
Expand All @@ -96,23 +95,6 @@ public Storage<Long> applyMask(OrderMask mask) {
return new LongStorage(newData, newData.length, newMissing, getType());
}

@Override
public Storage<Long> countMask(int[] counts, int total) {
long[] newData = new long[total];
BitSet newMissing = new BitSet();
int pos = 0;
Context context = Context.getCurrent();
for (int i = 0; i < counts.length; i++) {
long item = getItem(i);
for (int j = 0; j < counts[i]; j++) {
newData[pos++] = item;
}

context.safepoint();
}
return new LongStorage(newData, total, newMissing, getType());
}

@Override
public Storage<Long> slice(int offset, int limit) {
int newSize = Math.min(size - offset, limit);
Expand Down
Loading

0 comments on commit f2d2f73

Please sign in to comment.