diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java index 19f7584b152..15c5ff4d0cb 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java @@ -20,6 +20,7 @@ import org.tensorflow.Operand; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyAdadelta; import org.tensorflow.types.family.TType; @@ -150,16 +151,16 @@ private void createAdaDeltaSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable accumSlot = getSlot(variable, ACCUMULATOR).get(); Variable accumUpdateSlot = getSlot(variable, ACCUMULATOR_UPDATE).get(); - return tf.train.applyAdadelta( + return deps.train.applyAdadelta( variable, accumSlot, accumUpdateSlot, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), - tf.dtypes.cast(tf.constant(rho), gradient.type()), - tf.dtypes.cast(tf.constant(epsilon), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(rho), gradient.type()), + deps.dtypes.cast(deps.constant(epsilon), gradient.type()), gradient, ApplyAdadelta.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java index 5c51bbc1e4b..fc11f60e1f4 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java @@ -20,6 +20,7 @@ import org.tensorflow.Operand; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyAdagrad; import org.tensorflow.types.family.TType; @@ -140,10 +141,10 @@ private void createAdaGradSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable slot = getSlot(variable, ACCUMULATOR).get(); - return tf.train.applyAdagrad( - variable, slot, tf.dtypes.cast(tf.constant(learningRate), gradient.type()), gradient, opts); + return deps.train.applyAdagrad( + variable, slot, deps.dtypes.cast(deps.constant(learningRate), gradient.type()), gradient, opts); } /** {@inheritDoc} */ diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGradDA.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGradDA.java index 62ab8d309c9..8dba8105d3e 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGradDA.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGradDA.java @@ -22,6 +22,7 @@ import org.tensorflow.Output; import org.tensorflow.ndarray.Shape; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyAdagradDa; import org.tensorflow.types.TInt64; @@ -209,17 +210,17 @@ private void createAdaGradDASlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable gradSlot = getSlot(variable, ACCUMULATOR).get(); Variable gradSquaredSlot = getSlot(variable, SQUARED_ACCUMULATOR).get(); - return tf.train.applyAdagradDa( + return deps.train.applyAdagradDa( variable, gradSlot, gradSquaredSlot, gradient, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), - tf.dtypes.cast(tf.constant(l1Strength), gradient.type()), - tf.dtypes.cast(tf.constant(l2Strength), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(l1Strength), gradient.type()), + deps.dtypes.cast(deps.constant(l2Strength), gradient.type()), globalStep, ApplyAdagradDa.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adam.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adam.java index 6cf1dbcc7c5..c9a2a483c73 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adam.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adam.java @@ -22,6 +22,7 @@ import org.tensorflow.Output; import org.tensorflow.ndarray.Shape; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.Scope; import org.tensorflow.op.annotation.Endpoint; import org.tensorflow.op.annotation.Operator; @@ -223,19 +224,19 @@ private void createAdamSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable firstMomentSlot = getSlot(variable, FIRST_MOMENT).get(); Variable secondMomentSlot = getSlot(variable, SECOND_MOMENT).get(); - return tf.train.applyAdam( + return deps.train.applyAdam( variable, firstMomentSlot, secondMomentSlot, - tf.dtypes.cast(betaOnePower, gradient.type()), - tf.dtypes.cast(betaTwoPower, gradient.type()), - tf.dtypes.cast(learningRateConst, gradient.type()), - tf.dtypes.cast(betaOneConst, gradient.type()), - tf.dtypes.cast(betaTwoConst, gradient.type()), - tf.dtypes.cast(epsilonConst, gradient.type()), + deps.dtypes.cast(betaOnePower, gradient.type()), + deps.dtypes.cast(betaTwoPower, gradient.type()), + deps.dtypes.cast(learningRateConst, gradient.type()), + deps.dtypes.cast(betaOneConst, gradient.type()), + deps.dtypes.cast(betaTwoConst, gradient.type()), + deps.dtypes.cast(epsilonConst, gradient.type()), gradient, ApplyAdam.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adamax.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adamax.java index 635c2ecb862..0e6abfa0032 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adamax.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adamax.java @@ -7,6 +7,7 @@ import org.tensorflow.Output; import org.tensorflow.ndarray.Shape; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Constant; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyAdaMax; @@ -155,19 +156,19 @@ private void createAdamaxSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable firstMomentSlot = getSlot(variable, FIRST_MOMENT).get(); Variable secondMomentSlot = getSlot(variable, SECOND_MOMENT).get(); return ApplyAdaMax.create( - this.tf.scope(), + deps.scope(), variable, firstMomentSlot, secondMomentSlot, - tf.dtypes.cast(betaOnePower, gradient.type()), - tf.dtypes.cast(learningRateConst, gradient.type()), - tf.dtypes.cast(betaOneConst, gradient.type()), - tf.dtypes.cast(betaTwoConst, gradient.type()), - tf.dtypes.cast(epsilonConst, gradient.type()), + deps.dtypes.cast(betaOnePower, gradient.type()), + deps.dtypes.cast(learningRateConst, gradient.type()), + deps.dtypes.cast(betaOneConst, gradient.type()), + deps.dtypes.cast(betaTwoConst, gradient.type()), + deps.dtypes.cast(epsilonConst, gradient.type()), gradient, ApplyAdaMax.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Ftrl.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Ftrl.java index 962b64bab8e..ad97573f586 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Ftrl.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Ftrl.java @@ -5,6 +5,7 @@ import org.tensorflow.Operand; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyFtrl; import org.tensorflow.types.family.TType; @@ -238,21 +239,21 @@ private void createFtrlSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable accumSlot = getSlot(variable, ACCUMULATOR).get(); Variable linearSlot = getSlot(variable, LINEAR_ACCUMULATOR).get(); ApplyFtrl.Options options = ApplyFtrl.useLocking(true); - return this.tf.train.applyFtrl( + return deps.train.applyFtrl( variable, accumSlot, // accum linearSlot, // linear gradient, // gradient - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), // lr - tf.dtypes.cast(tf.constant(l1RegularizationStrength), gradient.type()), // l1 - tf.dtypes.cast(tf.constant(l2RegularizationStrength), gradient.type()), // l2 - tf.dtypes.cast( - tf.constant(l2ShrinkageRegularizationStrength), gradient.type()), // l2Shrinkage - tf.dtypes.cast(tf.constant(learningRatePower), gradient.type()), // lrPower + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), // lr + deps.dtypes.cast(deps.constant(l1RegularizationStrength), gradient.type()), // l1 + deps.dtypes.cast(deps.constant(l2RegularizationStrength), gradient.type()), // l2 + deps.dtypes.cast( + deps.constant(l2ShrinkageRegularizationStrength), gradient.type()), // l2Shrinkage + deps.dtypes.cast(deps.constant(learningRatePower), gradient.type()), // lrPower options); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/GradientDescent.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/GradientDescent.java index 7e2ec9593ed..682f128f680 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/GradientDescent.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/GradientDescent.java @@ -18,6 +18,7 @@ import org.tensorflow.Graph; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.train.ApplyGradientDescent; import org.tensorflow.types.family.TType; @@ -65,10 +66,10 @@ public GradientDescent(Graph graph, String name, float learningRate) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { - return tf.train.applyGradientDescent( + protected Op applyDense(Ops deps, Output gradient, Output variable) { + return deps.train.applyGradientDescent( variable, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), gradient, ApplyGradientDescent.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Momentum.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Momentum.java index b1f6ac8f4e5..4de600808cf 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Momentum.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Momentum.java @@ -20,6 +20,7 @@ import org.tensorflow.Operand; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyMomentum; import org.tensorflow.types.family.TType; @@ -130,14 +131,14 @@ private void createMomentumSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable slot = getSlot(variable, MOMENTUM).get(); - return tf.train.applyMomentum( + return deps.train.applyMomentum( variable, slot, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), gradient, - tf.dtypes.cast(tf.constant(momentum), gradient.type()), + deps.dtypes.cast(deps.constant(momentum), gradient.type()), ApplyMomentum.useNesterov(useNesterov), ApplyMomentum.useLocking(true)); } diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Nadam.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Nadam.java index f55fb8cdc59..9de52eb371b 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Nadam.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Nadam.java @@ -7,6 +7,7 @@ import org.tensorflow.Output; import org.tensorflow.ndarray.Shape; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Assign; import org.tensorflow.op.core.Constant; import org.tensorflow.op.core.Variable; @@ -224,53 +225,53 @@ protected Optional prepare(String scopeName) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Class type = gradient.type(); Variable m = getSlot(variable, FIRST_MOMENT).get(); // first Moment Variable v = getSlot(variable, SECOND_MOMENT).get(); // Second Moment // gPrime = grad / coefficients['oneMinusMScheduleNew'] - Operand gPrime = tf.math.div(gradient, tf.dtypes.cast(oneMinusMScheduleNew, type)); + Operand gPrime = deps.math.div(gradient, deps.dtypes.cast(oneMinusMScheduleNew, type)); // mT = (coefficients['beta_1_t'] * m + coefficients['one_minus_beta_1_t'] * grad) Operand mT = - tf.math.add( - tf.math.mul(tf.dtypes.cast(betaOneConst, type), m), - tf.math.mul(tf.dtypes.cast(oneMinusBeta1, type), gradient)); + deps.math.add( + deps.math.mul(deps.dtypes.cast(betaOneConst, type), m), + deps.math.mul(deps.dtypes.cast(oneMinusBeta1, type), gradient)); // mT = state_ops.assign(m, mT, use_locking=self._use_locking) // update m - mT = tf.assign(m, mT, Assign.useLocking(true)); + mT = deps.assign(m, mT, Assign.useLocking(true)); // mTPrime = mT / coefficients['oneMinusMScheduleNext'] - Operand mTPrime = tf.math.div(mT, tf.dtypes.cast(oneMinusMScheduleNext, type)); + Operand mTPrime = deps.math.div(mT, deps.dtypes.cast(oneMinusMScheduleNext, type)); // vT = (coefficients['beta_2_t'] * v + coefficients['one_minus_beta_2_t'] * // math_ops.square(grad)) Operand vT = - tf.math.add( - tf.math.mul(tf.dtypes.cast(betaTwoConst, type), v), - tf.math.mul(tf.dtypes.cast(oneMinusBeta2, type), tf.math.square(gradient))); + deps.math.add( + deps.math.mul(deps.dtypes.cast(betaTwoConst, type), v), + deps.math.mul(deps.dtypes.cast(oneMinusBeta2, type), deps.math.square(gradient))); // vT = state_ops.assign(v, vT, use_locking=self._use_locking) // update v - vT = tf.assign(v, vT, Assign.useLocking(true)); + vT = deps.assign(v, vT, Assign.useLocking(true)); // vTPrime = vT / coefficients['vTPrimeDenominator'] - Operand vTPrime = tf.math.div(vT, tf.dtypes.cast(vTPrimeDenominator, type)); + Operand vTPrime = deps.math.div(vT, deps.dtypes.cast(vTPrimeDenominator, type)); // m_t_bar = (coefficients['oneMinusMT'] * gPrime + coefficients['mT1'] * mTPrime) Operand m_t_bar = - tf.math.add( - tf.math.mul(tf.dtypes.cast(oneMinusMT, type), gPrime), - tf.math.mul(tf.dtypes.cast(mT1, type), mTPrime)); + deps.math.add( + deps.math.mul(deps.dtypes.cast(oneMinusMT, type), gPrime), + deps.math.mul(deps.dtypes.cast(mT1, type), mTPrime)); // varT = var - coefficients['lr_t'] * m_t_bar / (math_ops.sqrt(vTPrime) + // coefficients['epsilon']) Operand varT = - tf.math.sub( + deps.math.sub( variable, - tf.math.div( - tf.math.mul(tf.dtypes.cast(learningRateConst, type), m_t_bar), - tf.math.add(tf.math.sqrt(vTPrime), tf.dtypes.cast(epsilonConst, type)))); + deps.math.div( + deps.math.mul(deps.dtypes.cast(learningRateConst, type), m_t_bar), + deps.math.add(deps.math.sqrt(vTPrime), deps.dtypes.cast(epsilonConst, type)))); - return tf.assign(variable, varT, Assign.useLocking(true)); + return deps.assign(variable, varT, Assign.useLocking(true)); } /** diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Optimizer.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Optimizer.java index dc7047337e9..59129e8c103 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Optimizer.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Optimizer.java @@ -168,6 +168,8 @@ public Op applyGradients(List> gradsAndVars, String gradsAndVars.stream().map(GradAndVar::getVariable).collect(Collectors.toList()); createSlots(variables); + List gradients = gradsAndVars.stream().map(GradAndVar::getGradient).filter(g -> !g.isClosed()).collect(Collectors.toList()); + Ops tfOpsGrads = tf.withControlDependencies(gradients); Optional prepOp = prepare(name + "/prepare"); @@ -175,7 +177,7 @@ public Op applyGradients(List> gradsAndVars, String prepOp.ifPresent(updateOps::add); for (GradAndVar pair : gradsAndVars) { if (!pair.gradient.isClosed()) { - updateOps.add(applyDense(pair)); + updateOps.add(applyDense(tfOpsGrads, pair)); } } @@ -261,8 +263,8 @@ protected void createSlots(List> variables) {} * @param the datatype of the gradients and variables. * @return An operand which applies the desired optimizer update to the variable. */ - private Op applyDense(GradAndVar gradVarPair) { - return applyDense(gradVarPair.getGradient(), gradVarPair.getVariable()); + private Op applyDense(Ops opDependencies, GradAndVar gradVarPair) { + return applyDense(opDependencies, gradVarPair.getGradient(), gradVarPair.getVariable()); } /** @@ -273,7 +275,7 @@ private Op applyDense(GradAndVar gradVarPair) { * @param The type of the variable. * @return An operand which applies the desired optimizer update to the variable. */ - protected abstract Op applyDense(Output gradient, Output variable); + protected abstract Op applyDense(Ops opDependencies, Output gradient, Output variable); /** * Gathers up the update operations into a single op that can be used as a run target. diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/RMSProp.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/RMSProp.java index 0d4daf748d4..433f969a0db 100644 --- a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/RMSProp.java +++ b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/RMSProp.java @@ -20,6 +20,7 @@ import org.tensorflow.Operand; import org.tensorflow.Output; import org.tensorflow.op.Op; +import org.tensorflow.op.Ops; import org.tensorflow.op.core.Variable; import org.tensorflow.op.train.ApplyCenteredRmsProp; import org.tensorflow.op.train.ApplyRmsProp; @@ -189,31 +190,31 @@ private void createRMSPropSlot(Output v) { /** {@inheritDoc} */ @Override - protected Op applyDense(Output gradient, Output variable) { + protected Op applyDense(Ops deps, Output gradient, Output variable) { Variable rmsSlot = getSlot(variable, RMS).get(); Variable momentumSlot = getSlot(variable, MOMENTUM).get(); if (centered) { Variable mgSlot = getSlot(variable, MG).get(); - return tf.train.applyCenteredRmsProp( + return deps.train.applyCenteredRmsProp( variable, mgSlot, rmsSlot, momentumSlot, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), - tf.dtypes.cast(tf.constant(decay), gradient.type()), - tf.dtypes.cast(tf.constant(momentum), gradient.type()), - tf.dtypes.cast(tf.constant(epsilon), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(decay), gradient.type()), + deps.dtypes.cast(deps.constant(momentum), gradient.type()), + deps.dtypes.cast(deps.constant(epsilon), gradient.type()), gradient, ApplyCenteredRmsProp.useLocking(true)); } - return tf.train.applyRmsProp( + return deps.train.applyRmsProp( variable, rmsSlot, momentumSlot, - tf.dtypes.cast(tf.constant(learningRate), gradient.type()), - tf.dtypes.cast(tf.constant(decay), gradient.type()), - tf.dtypes.cast(tf.constant(momentum), gradient.type()), - tf.dtypes.cast(tf.constant(epsilon), gradient.type()), + deps.dtypes.cast(deps.constant(learningRate), gradient.type()), + deps.dtypes.cast(deps.constant(decay), gradient.type()), + deps.dtypes.cast(deps.constant(momentum), gradient.type()), + deps.dtypes.cast(deps.constant(epsilon), gradient.type()), gradient, ApplyRmsProp.useLocking(true)); } diff --git a/tensorflow-framework/src/test/java/org/tensorflow/framework/optimizers/GradientDescentTest.java b/tensorflow-framework/src/test/java/org/tensorflow/framework/optimizers/GradientDescentTest.java index 10ee78b70c5..d4bc0a7346f 100644 --- a/tensorflow-framework/src/test/java/org/tensorflow/framework/optimizers/GradientDescentTest.java +++ b/tensorflow-framework/src/test/java/org/tensorflow/framework/optimizers/GradientDescentTest.java @@ -110,10 +110,6 @@ public void testBasic() { } } - // This test fails due to incorrect gradients being generated some of the time, when - // using an identical graph on identical data. It should not, but it seems to be a - // problem in TF-core. - @Disabled @Test public void testDeterminism() { ConfigProto config =