Tensorflow를 사용하여 간단한 신경 네트워크를 학습하려고합니다. 이미 mnist 데이터 세트에서 문제없이 비슷한 그물을 실행했지만 데이터를 코드에 적용하고 GPU 컴퓨터에서 실행하려고하면 메모리가 고갈됩니다. 나는 이미 시도했다 : - 클래스 의 일부 주석 처리 - - 교육 몇 신 (新) 시대 - 몇 이미지의 코드 실행을 (10 개 이미지 대신 ~의 75 만) 여기 아마존에서 P2xlarge GPU를 실행할 때 메모리 오류가 계속 발생합니다.
이에 대한 코드입니다 BATCH_SIZE 감소 올바른 형식으로 데이터를 얻으려는 (제게 저를 만들지 않은) 클래스와의 네트워크, 코드에서 메모리를 소모 할 수있는 것을 발견 할 수 있습니까? 여기from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import random
import tensorflow as tf
import gzip
import os
import random
import glob
import csv
import numpy as np
from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
class DataSet(object):
def __init__(self, images, labels, fake_data=False, one_hot=False):
"""Construct a DataSet. one_hot arg is used only if fake_data is true."""
if fake_data:
self._num_examples = 10000
self.one_hot = one_hot
assert images.shape[0] == labels.shape[0], (
'images.shape: %s labels.shape: %s' % (images.shape,
self._num_examples = images.shape[0]
# This part is commented out because I kept getting memory exhaustion when using the big dataset ~75k images (224,224,3)
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
# assert images.shape[3] == 3
# images = images.reshape(images.shape[0],
# images.shape[1] * images.shape[2] * images.shape[3])
# # Convert from [0, 255] -> [0.0, 1.0].
# images = images.astype(np.float32)
# images = np.multiply(images, 1.0/255.0)
self._images = images
self._labels = labels
self._epochs_completed = 0
self._index_in_epoch = 0
def images(self):
return self._images
def labels(self):
return self._labels
def num_examples(self):
return self._num_examples
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1] * 784
if self.one_hot:
fake_label = [1] + [0] * 9
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = np.arange(self._num_examples)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def read_data_sets(train_data, train_labels, test_data, test_labels,fake_data=False, one_hot=False):
class DataSets(object):
data_sets = DataSets()
if fake_data:
data_sets.train = DataSet([], [], fake_data=True, one_hot=one_hot)
data_sets.validation = DataSet([], [], fake_data=True, one_hot=one_hot)
data_sets.test = DataSet([], [], fake_data=True, one_hot=one_hot)
return data_sets
data_sets.train = DataSet(train_data, train_labels)
data_sets.test = DataSet(test_data, test_labels)
return data_sets
def randomize(a, b):
assert len(a) == len(b)
# Generate the permutation index array.
permutation = np.random.permutation(a.shape[0])
# Shuffle the arrays by giving the permutation in the square brackets.
shuffled_a = a[permutation]
shuffled_b = b[permutation]
return shuffled_a, shuffled_b
training_images = np.load('data_small/training_images.npy')
training_labels = np.load('data_small/training_labels.npy')
test_images = np.load('data_small/test_images.npy')
test_labels = np.load('data_small/test_labels.npy')
training_images, training_labels = randomize(training_images, training_labels)
avec = read_data_sets(training_images, training_labels, test_images, test_labels)
batch_size = 1 #53
print ('The batch size is: ',batch_size)
images = tf.placeholder(tf.float32, [None, 224*224*3])
# Kept getting a error when I initially set placeholder as [-1,224,224,3]
images = tf.reshape(images, [-1,224,224,3])
labels = tf.placeholder(tf.float32, [None, 1])
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
# size of window movement of window
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
weights = {'W_conv1':tf.Variable(tf.truncated_normal([3,3,3,64], stddev=1e-4)),
'W_conv2':tf.Variable(tf.truncated_normal([3,3,64,64], stddev=1e-4)),
'W_conv3':tf.Variable(tf.truncated_normal([3,3,64,128], stddev=1e-4)),
'W_conv4':tf.Variable(tf.truncated_normal([3,3,128,128], stddev=1e-4)),
'W_conv5':tf.Variable(tf.truncated_normal([3,3,128,256], stddev=1e-4)),
'W_conv6':tf.Variable(tf.truncated_normal([3,3,256,256], stddev=1e-4)),
'W_conv7':tf.Variable(tf.truncated_normal([3,3,256,256], stddev=1e-4)),
'W_fc':tf.Variable(tf.truncated_normal([28*28*256,4096], stddev=1e-4)),
'W_fc2':tf.Variable(tf.truncated_normal([4096,2622], stddev=1e-4)),
'reg':tf.Variable(tf.truncated_normal([2622,1], stddev=1e-4))}
biases = {'b_conv1':tf.Variable(tf.constant(0.1, shape=[64])),
'b_conv2':tf.Variable(tf.constant(0.1, shape=[64])),
'b_conv3':tf.Variable(tf.constant(0.1, shape=[128])),
'b_conv4':tf.Variable(tf.constant(0.1, shape=[128])),
'b_conv5':tf.Variable(tf.constant(0.1, shape=[256])),
'b_conv6':tf.Variable(tf.constant(0.1, shape=[256])),
'b_conv7':tf.Variable(tf.constant(0.1, shape=[256])),
'b_fc':tf.Variable(tf.constant(0.1, shape=[4096])),
'b_fc2':tf.Variable(tf.constant(0.1, shape=[2622])),
'b_reg':tf.Variable(tf.constant(0.1, shape=[1]))}
conv1 = tf.nn.relu(conv2d(images, weights['W_conv1']) + biases['b_conv1'])
conv1 = tf.Print(conv1, [conv1], "conv1: ")
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
conv2 = tf.Print(conv2, [conv2], "conv2: ")
conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['b_conv3'])
conv3 = tf.Print(conv3, [conv3], "conv3: ")
conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['b_conv4'])
conv4 = maxpool2d(conv4)
conv4 = tf.Print(conv4, [conv4], "conv4: ")
conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['b_conv5'])
conv5 = tf.Print(conv5, [conv5], "conv5: ")
conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['b_conv6'])
conv6 = tf.Print(conv6, [conv6], "conv6: ")
conv7 = tf.nn.relu(conv2d(conv6, weights['W_conv7']) + biases['b_conv7'])
conv7 = maxpool2d(conv7)
conv7 = tf.Print(conv7, [conv7], "conv7: ")
fc = tf.reshape(conv7,[-1, 28*28*256])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
fc2 = tf.matmul(fc, weights['W_fc2'])+biases['b_fc2']
fc2 = tf.nn.dropout(fc2, keep_rate)
pred = tf.add(tf.matmul(fc2, weights['reg']), biases['b_reg'])
loss = tf.reduce_mean(tf.square(pred-labels))
opt = tf.train.RMSPropOptimizer(0.001)
train_op = opt.minimize(loss)
hm_epochs = 5
print ('Total epochs: ', hm_epochs)
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
print('Begin session')
sess.run(init_op) #initializea all variables
for epoch in range(hm_epochs):
print('Begin epoch:',epoch)
epoch_loss = 0
for _ in range (int(avec.train.num_examples/batch_size)):
#batcha myndum og labels
np_images, np_labels = avec.train.next_batch(batch_size)
print('np_images shape:',np_images.shape)
print('np_labels shape:',np_labels.shape)
#set batchinn inn i feed_dictid mitt
feed = {images: np_images, labels: np_labels}
# the training step, run the loss, pred and train_op and the data is fed with the feed_dict
np_loss, np_pred, _ = sess.run([loss, pred, train_op], feed_dict = feed)
epoch_loss += np_loss
print ('Epoch', epoch+1, 'completed out of', hm_epochs, 'loss: ', epoch_loss/(avec.train.num_examples/batch_size))
#save_path = saver.save(sess, "model1.ckpt")
#print("Model saved in file: %s" % save_path)
그가 세 그룹 (총 6.12gb 두 총 4.33gb 중 하나)
W tensorflow/core/common_runtime/bfc_allocator.cc:274] *****************************************************************************************xxxxxxxxxxx
W tensorflow/core/common_runtime/bfc_allocator.cc:275] Ran out of memory trying to allocate 1.0KiB. See logs for memory state.
W tensorflow/core/framework/op_kernel.cc:993] Resource exhausted: OOM when allocating tensor with shape[256]
W tensorflow/core/common_runtime/bfc_allocator.cc:274] *****************************************************************************************xxxxxxxxxxx
W tensorflow/core/common_runtime/bfc_allocator.cc:275] Ran out of memory trying to allocate 1.0KiB. See logs for memory state.
W tensorflow/core/framework/op_kernel.cc:993] Resource exhausted: OOM when allocating tensor with shape[256]
안 GPU (CUDA 표시 장치), 배치 크기 하나의 CPU에서 실행 알아낼 data_small입니다 testing_images.npy 및/경우 비정상적으로 어떤 장력이 너무 큰지 확인하기 위해 메모리를 프로파일하십시오. –