Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack Dermody committed Aug 16, 2024
1 parent 4fc4d36 commit 2a09d03
Show file tree
Hide file tree
Showing 52 changed files with 1,316 additions and 1,070 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,5 @@
/Benchmarks/obj
/BrightData.Parquet/bin
/BrightData.Parquet/obj
/BrightData.DataFrame/bin/Debug/net8.0
/BrightData.DataFrame/obj
/BrightData.DataFrame/bin
1 change: 1 addition & 0 deletions Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ static void Main(string[] args)
BenchmarkSetProperty();
BenchmarkSum();
BenchmarkDot();
BenchmarkRunner.Run<SortedArrayBenchmarks>();
}

static void BenchmarkSetProperty()
Expand Down
45 changes: 45 additions & 0 deletions Benchmarks/SortedArrayBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using BenchmarkDotNet.Attributes;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using BrightData;
using BrightData.Types;

namespace Benchmarks
{
public class SortedArrayBenchmarks
{
[Params(8, 32, 128, 1024, 32768)]
public int Size { get; set; }

[Benchmark(Baseline = true)]
public void SortedList() => SortedList(Enumerable.Range(0, Size).Select(x => (float)Size - x).ToArray());

[Benchmark]
public void IndexedSortedArray() => IndexedSortedArray(Enumerable.Range(0, Size).Select(x => (float)Size - x).ToArray());

public static void SortedList(float[] data)
{
var list = new SortedList<uint, float>(data.Length);
for(var i = 0U; i < data.Length; i++)
list.Add(i, data[i]);
var search = list.BinarySearch<uint, float>(0);
}

readonly struct IndexedValue<T>(uint index, T value) : IHaveSingleIndex
{
public uint Index => index;
public T Value => value;
}

public static void IndexedSortedArray(float[] data)
{
var list = new IndexedSortedArray<IndexedValue<float>>(data.Length);
for(var i = 0U; i < data.Length; i++)
list.Add(new IndexedValue<float>(i, data[i]));
var search = list.Find(0);
}
}
}
29 changes: 13 additions & 16 deletions BrightData.Parquet/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,32 +24,29 @@ public static async Task<IDataTable> CreateTableFromParquet(this BrightDataConte
builder.TableMetaData.Set(key, value);

// write the data
var tasks = new Task[columnCount];
for (var i = 0; i < reader.RowGroupCount; i++) {
using var rowGroupReader = reader.OpenRowGroupReader(i);
for (var j = 0; j < columnCount; j++) {
var field = fields[j];
var buffer = columns[j] ??= CreateColumn(rowGroupReader, field, columnMetaData[i], builder);
var columnType = columnTypes[j];
tasks[j] = rowGroupReader.ReadColumnAsync(fields[j]).ContinueWith(x => {
var parquetData = x.Result.Data;
if (field.IsNullable) {
if (columnType.IsValueType) {
var defaultValue = Activator.CreateInstance(columnType);
foreach (var item in parquetData)
buffer.AppendObject(item ?? defaultValue!);
}
else
throw new Exception($"Nullable non value types are not supported: {columnType}");
}
else {
var column = await rowGroupReader.ReadColumnAsync(fields[j]);
var parquetData = column.Data;
if (field.IsNullable) {
if (columnType.IsValueType) {
var defaultValue = Activator.CreateInstance(columnType);
foreach (var item in parquetData)
buffer.AppendObject(item!);
buffer.AppendObject(item ?? defaultValue!);
}
});
else
throw new Exception($"Nullable non value types are not supported: {columnType}");
}
else {
foreach (var item in parquetData)
buffer.AppendObject(item!);
}

}
await Task.WhenAll(tasks);
}

// write to stream
Expand Down
3 changes: 1 addition & 2 deletions BrightData.Parquet/ParquetDataTableAdaptor.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
using BrightData.Buffer.ReadOnly.Helper;
using BrightData.Converter;
using BrightData.DataTable.Helper;
using BrightData.DataTable.Meta;
using BrightData.DataTable.Rows;
using BrightData.Helper;
using BrightData.Parquet.BufferAdaptors;
using BrightData.Types;
using Parquet;
Expand Down
2 changes: 1 addition & 1 deletion BrightData.UnitTests/BufferTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ await intBuffer.ForEachBlock(block => {
foreach (var num in block)
num.Should().Be(++index);
});
var test = await intBuffer.ToNumeric(_streamProvider, 256);
var test = await intBuffer.ToNumeric();
test.DataType.Should().Be(typeof(sbyte));

index = 0;
Expand Down
13 changes: 0 additions & 13 deletions BrightData.UnitTests/SortedArrayTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,5 @@ public void TestIndexedSortedArray()
array.TryFind(1, out var index).Should().BeTrue();
index!.Value.Index.Should().Be(1);
}

[Fact]
public void TestSortedArray()
{
var array = new SortedArray<uint, float>(4);
for (var i = 0U; i < 4; i++)
array.Add(i, 4-i);
array.Size.Should().Be(4);
array[0].Weight.Should().Be(1);
array[3].Weight.Should().Be(4);
array.TryFind(1, out var value).Should().BeTrue();
value.Should().Be(3);
}
}
}
1 change: 0 additions & 1 deletion BrightData/Analysis/CastToDoubleNumericAnalysis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ internal class CastToDoubleNumericAnalysis<T>(uint writeCount = Consts.MaxWriteC
where T : unmanaged, INumber<T>
{
readonly ConvertToDouble<T> _converter = new();
ulong _count;

public NumericAnalyser<double> Analysis { get; } = new(writeCount);

Expand Down
7 changes: 0 additions & 7 deletions BrightData/Analysis/Readers/DictionaryValues.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,6 @@ internal DictionaryValues(MetaData metaData)
.ToList();
}

/// <summary>
/// Converts from category indices to string
/// </summary>
/// <param name="categoryIndices"></param>
/// <returns></returns>
public IEnumerable<string> GetValues(IEnumerable<int> categoryIndices) => categoryIndices.Select(i => _table[i]);

/// <summary>
/// Gets the string associated with a category index
/// </summary>
Expand Down
7 changes: 5 additions & 2 deletions BrightData/Analysis/Readers/NumericAnalysis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace BrightData.Analysis.Readers
/// <summary>
/// Numeric analysis results
/// </summary>
public class NumericAnalysis
public class NumericAnalysis : INumericAnalysis<double>
{
internal NumericAnalysis(MetaData metaData)
{
Expand Down Expand Up @@ -79,6 +79,9 @@ internal NumericAnalysis(MetaData metaData)
/// </summary>
public double? PopulationStdDev { get; }

/// <inheritdoc />
public ulong Count => Total;

/// <summary>
/// Median value
/// </summary>
Expand All @@ -92,7 +95,7 @@ internal NumericAnalysis(MetaData metaData)
/// <summary>
/// Number of distinct values
/// </summary>
public uint? NumDistinct { get; }
public uint NumDistinct { get; }

/// <summary>
/// Total count of items
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using BrightData.Buffer.Operations;
using BrightData.Helper;

namespace BrightData.Buffer.Operations.Helper
namespace BrightData.Analysis
{
/// <summary>
/// Casts to double to perform numerical analysis
Expand Down
4 changes: 4 additions & 0 deletions BrightData/BrightData.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,8 @@
<DependentUpon>FixedSizeSortedArrayTemplate.tt</DependentUpon>
</Compile>
</ItemGroup>

<ItemGroup>
<Folder Include="DataTable\Helper\" />
</ItemGroup>
</Project>
Loading

0 comments on commit 2a09d03

Please sign in to comment.