Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some new features for ANN #146

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
*
* There're mainly three stages:
* <ol>
* <li>1. All workers send the there partial data to other workers for reduce.</li>
* <li>2. All workers do reduce on all data it received and then send partial results to others.</li>
* <li>1. All workers send their partial data to the other workers for reduce.</li>
* <li>2. All workers do reduce on all data they received and then send partial results to the others.</li>
* <li>3. All workers merge partial results into final result and put it into session context with pre-defined
* object name.</li>
* </ol>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import java.util.function.Consumer;

/**
* An interface indicating a BatchOperator can information for its training process.
* An interface indicating the information a BatchOperator can provide for its training process.
*
* @param <S> the class which conveys the train information.
* @param <T> the BatchOperator class which provides the train information.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

/**
* MultilayerPerceptronClassifier is a neural network based multi-class classifier.
* Valina neural network with all dense layers are used, the output layer is a softmax layer.
* Vanilla neural network with all dense layers are used, the output layer is a softmax layer.
* Number of inputs has to be equal to the size of feature vectors.
* Number of outputs has to be equal to the total number of labels.
*/
Expand Down Expand Up @@ -143,7 +143,8 @@ public MultilayerPerceptronTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
final int[] layerSize = getLayers();
final int blockSize = getBlockSize();
final DenseVector initialWeights = getInitialWeights();
Topology topology = FeedForwardTopology.multiLayerPerceptron(layerSize, true);
final double dropoutRate = getDropoutRate();
Topology topology = FeedForwardTopology.multiLayerPerceptron(layerSize, true, dropoutRate);
FeedForwardTrainer trainer = new FeedForwardTrainer(topology,
layerSize[0], layerSize[layerSize.length - 1], true, blockSize, initialWeights);
DataSet<DenseVector> weights = trainer.train(trainData, getParams());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.alibaba.alink.operator.common.classification.ann;

/**
* Layer properties of dropout layer.
*/
public class DropoutLayer extends Layer {
public double dropoutRate;

public DropoutLayer(double dropoutRate) {
this.dropoutRate = dropoutRate;
}

@Override
public LayerModel createModel() {
return new DropoutLayerModel(this);
}

@Override
public int getWeightSize() {
return 0;
}

@Override
public int getOutputSize(int inputSize) {
return inputSize;
}

@Override
public boolean isInPlace() {
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.alibaba.alink.operator.common.classification.ann;

import com.alibaba.alink.common.linalg.DenseMatrix;
import com.alibaba.alink.common.linalg.DenseVector;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;

public class DropoutLayerModel extends LayerModel {
private DropoutLayer layer;

public DropoutLayerModel(DropoutLayer layer) {
this.layer = layer;
}

@Override
public void resetModel(DenseVector weights, int offset) {
}

@Override
public void eval(DenseMatrix data, DenseMatrix output) {
double dropoutRate = layer.dropoutRate;

RandomGenerator randomGenerator = new Well19937c(1L);
BinomialDistribution bionimialDistribution = new BinomialDistribution(randomGenerator,1, 1 - dropoutRate);

for (int i = 0; i < data.numRows(); i++) {
for (int j = 0; j < data.numCols(); j++) {
output.set(i, j, data.get(i, j) * bionimialDistribution.sample() * (1.0 / (1 - dropoutRate)));
}
}
}

@Override
public void computePrevDelta(DenseMatrix delta, DenseMatrix output, DenseMatrix prevDelta) {
for (int i = 0; i < delta.numRows(); i++) {
for (int j = 0; j < delta.numCols(); j++) {
double y = output.get(i, j);
prevDelta.set(i, j, y * delta.get(i, j));
}
}
}

@Override
public void grad(DenseMatrix delta, DenseMatrix input, DenseVector cumGrad, int offset) {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.alibaba.alink.operator.common.classification.ann;

/**
* Layer properties of embedding transformations which is y = A * x.
*/
public class EmbeddingLayer extends Layer {
public int numIn;
public int embeddingSize;

public EmbeddingLayer(int numIn, int embeddingSize) {
this.numIn = numIn;
this.embeddingSize = embeddingSize;
}

@Override
public LayerModel createModel() {
return new EmbeddingLayerModel(this);
}

@Override
public int getWeightSize() {
return numIn * embeddingSize;
}

@Override
public int getOutputSize(int inputSize) {
return embeddingSize;
}

@Override
public boolean isInPlace() {
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

package com.alibaba.alink.operator.common.classification.ann;

import com.alibaba.alink.common.linalg.BLAS;
import com.alibaba.alink.common.linalg.DenseMatrix;
import com.alibaba.alink.common.linalg.DenseVector;

/**
* The LayerModel for {@link EmbeddingLayer}
*/
public class EmbeddingLayerModel extends LayerModel {
private DenseMatrix w;

// buffer for holding gradw
private DenseMatrix gradw;

public EmbeddingLayerModel(EmbeddingLayer layer) {
this.w = new DenseMatrix(layer.numIn, layer.embeddingSize);
this.gradw = new DenseMatrix(layer.numIn, layer.embeddingSize);
}

private void pack(DenseVector weights, int offset, DenseMatrix w) {
int pos = 0;
for (int i = 0; i < this.w.numRows(); i++) {
for (int j = 0; j < this.w.numCols(); j++) {
weights.set(offset + pos, w.get(i, j));
pos++;
}
}
}

private void unpack(DenseVector weights, int offset, DenseMatrix w) {
int pos = 0;
for (int i = 0; i < this.w.numRows(); i++) {
for (int j = 0; j < this.w.numCols(); j++) {
w.set(i, j, weights.get(offset + pos));
pos++;
}
}
}

@Override
public void resetModel(DenseVector weights, int offset) {
unpack(weights, offset, this.w);
}

@Override
public void eval(DenseMatrix data, DenseMatrix output) {
BLAS.gemm(1., data, false, this.w, false, 0., output);
}

@Override
public void computePrevDelta(DenseMatrix delta, DenseMatrix output, DenseMatrix prevDelta) {
BLAS.gemm(1.0, delta, false, this.w, true, 0., prevDelta);
}

@Override
public void grad(DenseMatrix delta, DenseMatrix input, DenseVector cumGrad, int offset) {
unpack(cumGrad, offset, this.gradw);
int batchSize = input.numRows();
BLAS.gemm(1.0, input, true, delta, false, 1.0, this.gradw);
pack(cumGrad, offset, this.gradw);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ public FeedForwardTopology(List<Layer> layers) {
this.layers = layers;
}

public static FeedForwardTopology multiLayerPerceptron(int[] layerSize, boolean softmaxOnTop) {
public static FeedForwardTopology multiLayerPerceptron(int[] layerSize, boolean softmaxOnTop, double dropoutRate) {
return multiLayerPerceptron(layerSize, softmaxOnTop, dropoutRate, "sigmoid");
}

public static FeedForwardTopology multiLayerPerceptron(int[] layerSize,
boolean softmaxOnTop,
double dropoutRate,
String activation) {
List<Layer> layers = new ArrayList<>((layerSize.length - 1) * 2);
for (int i = 0; i < layerSize.length - 1; i++) {
layers.add(new AffineLayer(layerSize[i], layerSize[i + 1]));
Expand All @@ -29,7 +36,17 @@ public static FeedForwardTopology multiLayerPerceptron(int[] layerSize, boolean
layers.add(new SigmoidLayerWithSquaredError());
}
} else {
layers.add(new FuntionalLayer(new SigmoidFunction()));
if (activation.toLowerCase().equals("sigmoid")) {
layers.add(new FuntionalLayer(new SigmoidFunction()));
} else if (activation.toLowerCase().equals("relu")) {
layers.add(new FuntionalLayer(new ReluFunction()));
} else if (activation.toLowerCase().equals("tanh")) {
layers.add(new FuntionalLayer(new TanhFunction()));
} else {
throw new RuntimeException("This activation method is not supported now.");
}

layers.add(new DropoutLayer(dropoutRate));
}
}
return new FeedForwardTopology(layers);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void loadModel(List<Row> modelRows) {

this.labels = model.labels;
int[] layerSize0 = model.meta.get(MultilayerPerceptronTrainParams.LAYERS);
Topology topology = FeedForwardTopology.multiLayerPerceptron(layerSize0, true);
Topology topology = FeedForwardTopology.multiLayerPerceptron(layerSize0, true, 0);
this.topo = topology.getModel(model.weights);
this.predDetailMap = new HashMap<>(layerSize0[layerSize0.length - 1]);
isVectorInput = model.meta.get(ModelParamName.IS_VECTOR_INPUT);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.alibaba.alink.operator.common.classification.ann;

/**
* The Relu function.
* f(x) = max(0, x)
* f'(x) = 1 if x > 0 else 0
*/
public class ReluFunction implements ActivationFunction {
@Override
public double eval(double x) {
return Math.max(0, x);
}

@Override
public double derivative(double x) {
if (x > 0) {
return 1.0;
} else {
return 0.0;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.alibaba.alink.operator.common.classification.ann;

/**
* The tanh function.
* tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
* tanh'(x) = 1 - tanh(z)^2
*/
public class TanhFunction implements ActivationFunction {
@Override
public double eval(double x) {
return (Math.exp(x) - Math.exp(-x)) / (Math.exp(x) + Math.exp(-x));
}

@Override
public double derivative(double z) {
return 1 - Math.pow(z, 2.0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ public interface MultilayerPerceptronTrainParams<T> extends
.setDescription("Initial weights.")
.setHasDefaultValue(null)
.build();
ParamInfo<Double> DROPOUT_RATE = ParamInfoFactory
.createParamInfo("dropoutRate", Double.class)
.setDescription("Dropout rate for MLP")
.setHasDefaultValue(0.)
.build();

default int[] getLayers() {
return get(LAYERS);
Expand All @@ -60,4 +65,12 @@ default DenseVector getInitialWeights() {
default T setInitialWeights(DenseVector value) {
return set(INITIAL_WEIGHTS, value);
}

default Double getDropoutRate() {
return get(DROPOUT_RATE);
}

default T setDropoutRate(Double value) {
return set(DROPOUT_RATE, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public void testMLPC() throws Exception {
.setFeatureCols(Iris.getFeatureColNames())
.setLabelCol(Iris.getLabelColName())
.setLayers(new int[]{4, 5, 3})
.setDropoutRate(0.2)
.setMaxIter(100)
.setPredictionCol("pred_label")
.setPredictionDetailCol("pred_detail");
Expand Down
2 changes: 1 addition & 1 deletion docs/cn/samplewithsizebatchop.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ df = pd.DataFrame({"Y": data[:, 0]})
# batch source
inOp = dataframeToOperator(df, schemaStr='Y string', op_type='batch')

sampleOp = SampleBatchOp()\
sampleOp = SampleWithSizeBatchOp()\
.setSize(2)\
.setWithReplacement(False)

Expand Down
2 changes: 1 addition & 1 deletion docs/en/samplewithsizebatchop.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ df = pd.DataFrame({"Y": data[:, 0]})
# batch source
inOp = dataframeToOperator(df, schemaStr='Y string', op_type='batch')

sampleOp = SampleBatchOp()\
sampleOp = SampleWithSizeBatchOp()\
.setSize(2)\
.setWithReplacement(False)

Expand Down