Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/python.paddle.v2 #1108

Closed
wants to merge 16 commits into from
272 changes: 124 additions & 148 deletions demo/mnist/api_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,11 @@

The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *


def optimizer_config():
settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))


def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)

import paddle.v2 as paddle

def init_parameter(network):
assert isinstance(network, api.GradientMachine)
for each_param in network.getParameters():
assert isinstance(each_param, api.Parameter)
array_size = len(each_param)
array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
from mnist_util import read_from_mnist


def generator_to_batch(generator, batch_size):
Expand Down Expand Up @@ -73,132 +42,139 @@ def input_order_converter(generator):


def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
paddle.raw.initPaddle("-use_gpu=false",
"-trainer_count=4") # use 4 cpu cores

# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
enable_types = _temp_optimizer_.getParameterTypes()

# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)

# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
assert isinstance(m, api.GradientMachine)
optimizer = paddle.optimizer.Optimizer(
learning_method=paddle.optimizer.AdamOptimizer(),
learning_rate=1e-4,
model_average=paddle.optimizer.ModelAverage(average_window=0.5),
regularization=paddle.optimizer.L2Regularization(rate=0.5))

# define network
imgs = paddle.layers.data_layer(name='pixel', size=784)
hidden1 = paddle.layers.fc_layer(input=imgs, size=200)
hidden2 = paddle.layers.fc_layer(input=hidden1, size=200)
inference = paddle.layers.fc_layer(
input=hidden2, size=10, act=paddle.config.SoftmaxActivation())
cost = paddle.layers.classification_cost(
input=inference, label=paddle.layers.data_layer(
name='label', size=10))

# Initialize Parameter by numpy.
init_parameter(network=m)
model = paddle.model.Model(layers=[cost], optimizer=optimizer)

# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, api.ParameterUpdater)
model.rand_parameter()

# Initialize ParameterUpdater.
updater.init(m)
model.start()

# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])
batch_evaluator = model.make_evaluator()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这两个evaluator看上去比较confusing。这里没有参数,所以看上去两个evaluator的功能是一样的。后面调用了.start和 .finish,但是也看不出来具体在干什么。根据名字(evaluator),猜测是用某种test data来评测模型,但是也看不出来用的是什么test data?

test_evaluator = model.make_evaluator()

train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'

# start gradient machine.
# the gradient machine must be started before invoke forward/backward.
# not just for training, but also for inference.
m.start()

# evaluator can print error rate, etc. It is a C++ class.
batch_evaluator = m.makeEvaluator()
test_evaluator = m.makeEvaluator()

# Get Train Data.
# TrainData will stored in a data pool. Currently implementation is not care
# about memory, speed. Just a very naive implementation.
train_data_generator = input_order_converter(read_from_mnist(train_file))
train_data = BatchPool(train_data_generator, 512)

# outArgs is Neural Network forward result. Here is not useful, just passed
# to gradient_machine.forward
outArgs = api.Arguments.createArguments(0)

for pass_id in xrange(2): # we train 2 passes.
updater.startPass()

for batch_id, data_batch in enumerate(train_data()):
# data_batch is input images.
# here, for online learning, we could get data_batch from network.

# Start update one batch.
pass_type = updater.startBatch(len(data_batch))

# Start BatchEvaluator.
# batch_evaluator can be used between start/finish.
batch_evaluator.start()

# forwardBackward is a shortcut for forward and backward.
# It is sometimes faster than invoke forward/backward separately,
# because in GradientMachine, it may be async.
m.forwardBackward(converter(data_batch), outArgs, pass_type)

for each_param in m.getParameters():
updater.update(each_param)

# Get cost. We use numpy to calculate total cost for this batch.
cost_vec = outArgs.getSlotValue(0)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)

# Make evaluator works.
m.eval(batch_evaluator)

# Print logs.
print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
cost, batch_evaluator

batch_evaluator.finish()
# Finish batch.
# * will clear gradient.
# * ensure all values should be updated.
updater.finishBatch(cost)

# testing stage. use test data set to test current network.
updater.apply()
test_evaluator.start()
test_data_generator = input_order_converter(read_from_mnist(test_file))
for data_batch in generator_to_batch(test_data_generator, 512):
# in testing stage, only forward is needed.
m.forward(converter(data_batch), outArgs, api.PASS_TEST)
m.eval(test_evaluator)

# print error rate for test data set
print 'Pass', pass_id, ' test evaluator: ', test_evaluator
test_evaluator.finish()
updater.restore()

updater.catchUpWith()
params = m.getParameters()
for each_param in params:
assert isinstance(each_param, api.Parameter)
value = each_param.getBuf(api.PARAMETER_VALUE)
value = value.copyToNumpyArray()

# Here, we could save parameter to every where you want
print each_param.getName(), value

updater.finishPass()

m.finish()
for pass_id in xrange(2):
model.start_pass()
Copy link
Collaborator

@wangkuiyi wangkuiyi Feb 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好像我们之前讨论过,不应该由用户控制pass这个概念,因为pass是由数据集合大小决定的?我隐约记得之前讨论过,可以写成如下方式:

for minibatch, last_batch in enumerate(training_data):
    ....
    if last_batch:
        log("Print model quality: %f", evaluate(model, testing_data))
    ....


model.finish_pass()

# # DataProvider Converter is a utility convert Python Object to Paddle C++
# # Input. The input format is as same as Paddle's DataProvider.
# converter = paddle.data.DataProviderConverter(input_types=[
# paddle.data.dense_vector(784), paddle.data.integer_value(10)
# ])
#
# train_file = './data/raw_data/train'
# test_file = './data/raw_data/t10k'
#
# # start gradient machine.
# # the gradient machine must be started before invoke forward/backward.
# # not just for training, but also for inference.
# m.start()
#
# # evaluator can print error rate, etc. It is a C++ class.
# batch_evaluator = m.makeEvaluator()
# test_evaluator = m.makeEvaluator()
#
# # Get Train Data.
# # TrainData will stored in a data pool. Currently implementation is not care
# # about memory, speed. Just a very naive implementation.
# train_data_generator = input_order_converter(read_from_mnist(train_file))
# train_data = BatchPool(train_data_generator, 512)
#
# # outArgs is Neural Network forward result. Here is not useful, just passed
# # to gradient_machine.forward
# outArgs = paddle.raw.Arguments.createArguments(0)
#
# for pass_id in xrange(2): # we train 2 passes.
# updater.startPass()
#
# for batch_id, data_batch in enumerate(train_data()):
# # data_batch is input images.
# # here, for online learning, we could get data_batch from network.
#
# # Start update one batch.
# pass_type = updater.startBatch(len(data_batch))
#
# # Start BatchEvaluator.
# # batch_evaluator can be used between start/finish.
# batch_evaluator.start()
#
# # forwardBackward is a shortcut for forward and backward.
# # It is sometimes faster than invoke forward/backward separately,
# # because in GradientMachine, it may be async.
# m.forwardBackward(converter(data_batch), outArgs, pass_type)
#
# for each_param in m.getParameters():
# updater.update(each_param)
#
# # Get cost. We use numpy to calculate total cost for this batch.
# cost_vec = outArgs.getSlotValue(0)
# cost_vec = cost_vec.copyToNumpyMat()
# cost = cost_vec.sum() / len(data_batch)
#
# # Make evaluator works.
# m.eval(batch_evaluator)
#
# # Print logs.
# print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
# cost, batch_evaluator
#
# batch_evaluator.finish()
# # Finish batch.
# # * will clear gradient.
# # * ensure all values should be updated.
# updater.finishBatch(cost)
#
# # testing stage. use test data set to test current network.
# updater.apply()
# test_evaluator.start()
# test_data_generator = input_order_converter(read_from_mnist(test_file))
# for data_batch in generator_to_batch(test_data_generator, 512):
# # in testing stage, only forward is needed.
# m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST)
# m.eval(test_evaluator)
#
# # print error rate for test data set
# print 'Pass', pass_id, ' test evaluator: ', test_evaluator
# test_evaluator.finish()
# updater.restore()
#
# updater.catchUpWith()
# params = m.getParameters()
# for each_param in params:
# assert isinstance(each_param, paddle.raw.Parameter)
# value = each_param.getBuf(paddle.raw.PARAMETER_VALUE)
# value = value.copyToNumpyArray()
#
# # Here, we could save parameter to every where you want
# print each_param.getName(), value
#
# updater.finishPass()

model.finish()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model.finish 要做什么呢?



if __name__ == '__main__':
Expand Down
5 changes: 3 additions & 2 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ set(OUTPUT_DIR
file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py)

file(GLOB V2_PY_FILES . ./paddle/v2/*.py)
set(PY_FILES paddle/__init__.py
${TRAINER_PY_FILES}
${HELPERS_PY_FILES}
${UTILS_PY_FILES})
${UTILS_PY_FILES}
${V2_PY_FILES})

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
Expand Down
1 change: 1 addition & 0 deletions python/paddle/trainer_config_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
from optimizers import *
from attrs import *
from config_parser_utils import *

# This will enable operator overload for LayerOutput
import layer_math
22 changes: 22 additions & 0 deletions python/paddle/v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
This is an experimental package for Paddle new API.

Currently, we use should always use

.. code-block: python

import paddle.v2 as paddle

as our import statement. The API is in flux, never use this package in
production.
"""

import py_paddle.swig_paddle as raw
import config
import data
import paddle.proto as proto
import layers
import optimizer
import model

__all__ = ['config', 'data', 'raw', 'proto', 'layers', 'optimizer', 'model']
12 changes: 12 additions & 0 deletions python/paddle/v2/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from paddle.trainer_config_helpers import *
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里 import *,让我们丧失了对 paddle.v2 package 里的内容的掌握——只要有人修改了 paddle.trianer_config_helpers 里的内容,这里的symbols就发生变化了吧?

我建议,我们趁此机会,先把 mnist 这个 demo 需要的内容 copy-n-paste 过来。然后依据把 mnist demo 写的让读者能“顾名思义”的原则,修改 copy 过来的库。

随后我们一个一个demo的过,重复上述过程,得到的 paddle.v2 应该就是我们想要的了吧。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不过其实paddle.trainer_config_helpers里面暴露的符号是严格控制的。里面使用了__all__来控制符号暴露。

copy and paste这个包,会少暴露非常多东西。比如,我们教程里面的MNIST可能使用全连接做的。用户可能想改成卷积之类的操作。但是,如果只是copy and paste demo需要的接口的话,卷积很可能就没复制过去。这用户就缺乏了这部分灵活性了。

同时,和之前的一个comment类似,如果我们真的需要使用『返回值』而不是『函数』来去定义网络结构的话,那其实所有的配置解析都要重写一下。copy and paste反而不好,不如直接重写一个解析过程。

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我建议copy-n-paste,就是为了“重写”,而不只是为现有symbols在v2 package下面建立一个link。

from paddle.trainer.config_parser import parse_config as parse
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
from paddle.trainer_config_helpers.config_parser_utils import \
parse_optimizer_config as parse_optimizer

import paddle.trainer_config_helpers as tmp

__all__ = ['parse', 'parse_network', 'parse_optimizer']

__all__.extend(filter(lambda x: x[:2] != '__', dir(tmp)))
Loading