.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python #@save def resnet18(num_classes, in_channels=1): """A slightly modified ResNet-18 model.""" def resnet_block(in_channels, out_channels, num_residuals, first_block=False): blk = [] for i in range(num_residuals): if i == 0 and not first_block: blk.append(d2l.Residual(out_channels, use_1x1conv=True, strides=2)) else: blk.append(d2l.Residual(out_channels)) return nn.Sequential(*blk) # This model uses a smaller convolution kernel, stride, and padding and # removes the max-pooling layer net = nn.Sequential( nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU()) net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True)) net.add_module("resnet_block2", resnet_block(64, 128, 2)) net.add_module("resnet_block3", resnet_block(128, 256, 2)) net.add_module("resnet_block4", resnet_block(256, 512, 2)) net.add_module("global_avg_pool", nn.AdaptiveAvgPool2d((1,1))) net.add_module("fc", nn.Sequential(nn.Flatten(), nn.Linear(512, num_classes))) return net .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python #@save def resnet18(num_classes): """A slightly modified ResNet-18 model.""" def resnet_block(num_channels, num_residuals, first_block=False): blk = nn.Sequential() for i in range(num_residuals): if i == 0 and not first_block: blk.add(d2l.Residual( num_channels, use_1x1conv=True, strides=2)) else: blk.add(d2l.Residual(num_channels)) return blk net = nn.Sequential() # This model uses a smaller convolution kernel, stride, and padding and # removes the max-pooling layer net.add(nn.Conv2D(64, kernel_size=3, strides=1, padding=1), nn.BatchNorm(), nn.Activation('relu')) net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2)) net.add(nn.GlobalAvgPool2D(), nn.Dense(num_classes)) return net .. raw:: html

.. raw:: html

pytorch mxnet

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python net = resnet18(10) # Get a list of GPUs devices = d2l.try_all_gpus() # We will initialize the network inside the training loop .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python net = resnet18(10) # Get a list of GPUs devices = d2l.try_all_gpus() # Initialize all the parameters of the network net.initialize(init=init.Normal(sigma=0.01), ctx=devices) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output [07:09:59] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for CPU [07:09:59] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for GPU [07:10:00] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for GPU .. raw:: html

.. raw:: html

pytorch mxnet

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def train(net, num_gpus, batch_size, lr): train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) devices = [d2l.try_gpu(i) for i in range(num_gpus)] def init_weights(module): if type(module) in [nn.Linear, nn.Conv2d]: nn.init.normal_(module.weight, std=0.01) net.apply(init_weights) # Set the model on multiple GPUs net = nn.DataParallel(net, device_ids=devices) trainer = torch.optim.SGD(net.parameters(), lr) loss = nn.CrossEntropyLoss() timer, num_epochs = d2l.Timer(), 10 animator = d2l.Animator('epoch', 'test acc', xlim=[1, num_epochs]) for epoch in range(num_epochs): net.train() timer.start() for X, y in train_iter: trainer.zero_grad() X, y = X.to(devices[0]), y.to(devices[0]) l = loss(net(X), y) l.backward() trainer.step() timer.stop() animator.add(epoch + 1, (d2l.evaluate_accuracy_gpu(net, test_iter),)) print(f'test acc: {animator.Y[0][-1]:.2f}, {timer.avg():.1f} sec/epoch ' f'on {str(devices)}') .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python x = np.random.uniform(size=(4, 1, 28, 28)) x_shards = gluon.utils.split_and_load(x, devices) net(x_shards[0]), net(x_shards[1]) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output [07:10:01] ../src/operator/cudnn_ops.cc:318: Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable [07:10:02] ../src/operator/cudnn_ops.cc:318: Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable [07:10:03] ../src/operator/cudnn_ops.cc:318: Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable [07:10:04] ../src/operator/cudnn_ops.cc:318: Auto-tuning cuDNN op, set MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output (array([[ 2.2610207e-06, 2.2045981e-06, -5.4046786e-06, 1.2869955e-06, 5.1373163e-06, -3.8297967e-06, 1.4339059e-07, 5.4683451e-06, -2.8279192e-06, -3.9651104e-06], [ 2.0698672e-06, 2.0084667e-06, -5.6382510e-06, 1.0498458e-06, 5.5506434e-06, -4.1065491e-06, 6.0830087e-07, 5.4521784e-06, -3.7365021e-06, -4.1891640e-06]], ctx=gpu(0)), array([[ 2.4629783e-06, 2.6015525e-06, -5.4362617e-06, 1.2938218e-06, 5.6387889e-06, -4.1360108e-06, 3.5758853e-07, 5.5125256e-06, -3.1957325e-06, -4.2976326e-06], [ 1.9431673e-06, 2.2600434e-06, -5.2698201e-06, 1.4807417e-06, 5.4830934e-06, -3.9678889e-06, 7.5751018e-08, 5.6764356e-06, -3.2530229e-06, -4.0943951e-06]], ctx=gpu(1))) .. raw:: html

.. raw:: html

pytorch mxnet

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python train(net, num_gpus=1, batch_size=256, lr=0.1) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output test acc: 0.91, 12.3 sec/epoch on [device(type='cuda', index=0)] .. figure:: output_multiple-gpus-concise_9de99c_39_1.svg .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python weight = net[0].params.get('weight') try: weight.data() except RuntimeError: print('not initialized on cpu') weight.data(devices[0])[0], weight.data(devices[1])[0] .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output not initialized on cpu .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output (array([[[ 0.01382882, -0.01183044, 0.01417865], [-0.00319718, 0.00439528, 0.02562625], [-0.00835081, 0.01387452, -0.01035946]]], ctx=gpu(0)), array([[[ 0.01382882, -0.01183044, 0.01417865], [-0.00319718, 0.00439528, 0.02562625], [-0.00835081, 0.01387452, -0.01035946]]], ctx=gpu(1))) .. raw:: html

.. raw:: html

pytorch mxnet

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python train(net, num_gpus=2, batch_size=512, lr=0.2) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output test acc: 0.87, 7.5 sec/epoch on [device(type='cuda', index=0), device(type='cuda', index=1)] .. figure:: output_multiple-gpus-concise_9de99c_48_1.svg .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python #@save def evaluate_accuracy_gpus(net, data_iter, split_f=d2l.split_batch): """Compute the accuracy for a model on a dataset using multiple GPUs.""" # Query the list of devices devices = list(net.collect_params().values())[0].list_ctx() # No. of correct predictions, no. of predictions metric = d2l.Accumulator(2) for features, labels in data_iter: X_shards, y_shards = split_f(features, labels, devices) # Run in parallel pred_shards = [net(X_shard) for X_shard in X_shards] metric.add(sum(float(d2l.accuracy(pred_shard, y_shard)) for pred_shard, y_shard in zip( pred_shards, y_shards)), labels.size) return metric[0] / metric[1] .. raw:: html

.. raw:: html