Created
October 5, 2021 11:00
-
-
Save tecno14/799fe5894927cfdc45c105b8cdbf28b5 to your computer and use it in GitHub Desktop.
LabelEncoder in C# (double to int only)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Collections.Generic; | |
namespace PricePrediction.MachineLearning | |
{ | |
/// <summary> | |
/// this class used to encode double data into continuous integer values and decode it vice versa | |
/// for example [0.1, 2, 9] will encoded as [0, 1, 2] | |
/// similar : https://github.com/foreverzet/Sharpkit.Learn/blob/master/src/Sharpkit.Learn/Preprocessing/LabelEncoder.cs | |
/// </summary> | |
public class LabelEncoder | |
{ | |
/// <summary> | |
/// Classes mean what encoded values is. | |
/// since we use integers we can know them by just knowing count of them and generate list with same length | |
/// </summary> | |
public readonly List<double> Classes; | |
public LabelEncoder() | |
{ | |
Classes = new(); | |
} | |
public List<int> FitTransform(IList<double> data) | |
{ | |
return Fit(data).Transform(data); | |
} | |
/// <summary> | |
/// Fit label encoder. | |
/// </summary> | |
/// <param name="data">Target values.</param> | |
/// <returns>Returns an instance of self.</returns> | |
public LabelEncoder Fit(IList<double> data) | |
{ | |
//save time by get only distinct data | |
data = data.Distinct().ToList(); | |
//if there no prevease data just add them all | |
if (Classes.Count == 0) | |
{ | |
Classes.AddRange(data); | |
return this; | |
} | |
foreach (var item in data) | |
{ | |
if (Classes.Contains(item)) | |
continue; | |
Classes.Add(item); | |
} | |
return this; | |
} | |
/// <summary> | |
/// Fit label encoder and return encoded labels. | |
/// </summary> | |
/// <param name="data">Target values.</param> | |
/// <returns>Array.</returns> | |
public List<int> Transform(IList<double> data) | |
{ | |
List<int> result = new(); | |
foreach (var item in data) | |
{ | |
int index = Classes.IndexOf(item); | |
if (index == -1) | |
throw new Exception($"{item} not found"); | |
result.Add(index); | |
} | |
return result; | |
} | |
/// <summary> | |
/// Transform labels back to original encoding. | |
/// </summary> | |
/// <param name="data">Target values.</param> | |
/// <returns></returns> | |
public double InverseTransform(int data) | |
{ | |
return InverseTransform(new List<int>() { data })[0]; | |
} | |
/// <summary> | |
/// Transform labels back to original encoding. | |
/// </summary> | |
/// <param name="data">Target values.</param> | |
/// <returns>Array.</returns> | |
private List<double> InverseTransform(IList<int> data) | |
{ | |
List<double> result = new(); | |
foreach (int item in data) | |
{ | |
if ((Classes.Count <= item) || (item < 0)) | |
throw new Exception($"{item} not found"); | |
result.Add(Classes[item]); | |
} | |
return result; | |
} | |
} | |
} |
Author
tecno14
commented
Oct 5, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment