Converting Tensorflow Graph to use Estimator, get 'TypeError: data type not understood' at loss function...












5















I am trying to convert Tensorflow's official basic word2vec implementation to use tf.Estimator.
The issue is that the loss function( sampled_softmax_loss or nce_loss ) gives an error when using Tensorflow Estimators. It works perfectly fine in the original implementation.



Here's is Tensorflow's official basic word2vec implementation:



https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py



Here is the Google Colab notebook where I implemented this code, which is working.



https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I



Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.



https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e



For convenience, here is exact code from the Estimator version above where I define model_fn



batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
num_sampled = 64 # Number of negative examples to sample.

def my_model( features, labels, mode, params):

with tf.name_scope('inputs'):
train_inputs = features
train_labels = labels

with tf.name_scope('embeddings'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

with tf.name_scope('weights'):
nce_weights = tf.Variable(
tf.truncated_normal(
[vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
with tf.name_scope('biases'):
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

with tf.name_scope('loss'):
loss = tf.reduce_mean(
tf.nn.nce_loss(
weights=nce_weights,
biases=nce_biases,
labels=train_labels,
inputs=embed,
num_sampled=num_sampled,
num_classes=vocabulary_size))

tf.summary.scalar('loss', loss)

if mode == "train":
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)


And here is where I call the estimator and training



word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128,
'batch_size': 16
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


And this the error message I get when I call the Estimator training:



INFO:tensorflow:Calling model_fn.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197

<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)
33 inputs=embed,
34 num_sampled=num_sampled,
---> 35 num_classes=vocabulary_size))
36
37 # Add the loss value as a scalar to summary.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)
1246 remove_accidental_hits=remove_accidental_hits,
1247 partition_strategy=partition_strategy,
-> 1248 name=name)
1249 sampled_losses = sigmoid_cross_entropy_with_logits(
1250 labels=labels, logits=logits, name="sampled_losses")

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])

TypeError: data type not understood


Edit: Upon request, here's what a typical output for input_fn looks like



print(generate_batch(batch_size=8, num_skips=2, skip_window=1))



(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],
[ 12],
[ 6],
[3081],
[ 12],
[ 195],
[ 6],
[ 2]], dtype=int32))









share|improve this question

























  • What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

    – jdehesa
    Nov 27 '18 at 14:27











  • For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

    – SantoshGupta7
    Nov 29 '18 at 20:42


















5















I am trying to convert Tensorflow's official basic word2vec implementation to use tf.Estimator.
The issue is that the loss function( sampled_softmax_loss or nce_loss ) gives an error when using Tensorflow Estimators. It works perfectly fine in the original implementation.



Here's is Tensorflow's official basic word2vec implementation:



https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py



Here is the Google Colab notebook where I implemented this code, which is working.



https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I



Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.



https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e



For convenience, here is exact code from the Estimator version above where I define model_fn



batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
num_sampled = 64 # Number of negative examples to sample.

def my_model( features, labels, mode, params):

with tf.name_scope('inputs'):
train_inputs = features
train_labels = labels

with tf.name_scope('embeddings'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

with tf.name_scope('weights'):
nce_weights = tf.Variable(
tf.truncated_normal(
[vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
with tf.name_scope('biases'):
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

with tf.name_scope('loss'):
loss = tf.reduce_mean(
tf.nn.nce_loss(
weights=nce_weights,
biases=nce_biases,
labels=train_labels,
inputs=embed,
num_sampled=num_sampled,
num_classes=vocabulary_size))

tf.summary.scalar('loss', loss)

if mode == "train":
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)


And here is where I call the estimator and training



word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128,
'batch_size': 16
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


And this the error message I get when I call the Estimator training:



INFO:tensorflow:Calling model_fn.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197

<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)
33 inputs=embed,
34 num_sampled=num_sampled,
---> 35 num_classes=vocabulary_size))
36
37 # Add the loss value as a scalar to summary.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)
1246 remove_accidental_hits=remove_accidental_hits,
1247 partition_strategy=partition_strategy,
-> 1248 name=name)
1249 sampled_losses = sigmoid_cross_entropy_with_logits(
1250 labels=labels, logits=logits, name="sampled_losses")

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])

TypeError: data type not understood


Edit: Upon request, here's what a typical output for input_fn looks like



print(generate_batch(batch_size=8, num_skips=2, skip_window=1))



(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],
[ 12],
[ 6],
[3081],
[ 12],
[ 195],
[ 6],
[ 2]], dtype=int32))









share|improve this question

























  • What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

    – jdehesa
    Nov 27 '18 at 14:27











  • For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

    – SantoshGupta7
    Nov 29 '18 at 20:42
















5












5








5


3






I am trying to convert Tensorflow's official basic word2vec implementation to use tf.Estimator.
The issue is that the loss function( sampled_softmax_loss or nce_loss ) gives an error when using Tensorflow Estimators. It works perfectly fine in the original implementation.



Here's is Tensorflow's official basic word2vec implementation:



https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py



Here is the Google Colab notebook where I implemented this code, which is working.



https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I



Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.



https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e



For convenience, here is exact code from the Estimator version above where I define model_fn



batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
num_sampled = 64 # Number of negative examples to sample.

def my_model( features, labels, mode, params):

with tf.name_scope('inputs'):
train_inputs = features
train_labels = labels

with tf.name_scope('embeddings'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

with tf.name_scope('weights'):
nce_weights = tf.Variable(
tf.truncated_normal(
[vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
with tf.name_scope('biases'):
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

with tf.name_scope('loss'):
loss = tf.reduce_mean(
tf.nn.nce_loss(
weights=nce_weights,
biases=nce_biases,
labels=train_labels,
inputs=embed,
num_sampled=num_sampled,
num_classes=vocabulary_size))

tf.summary.scalar('loss', loss)

if mode == "train":
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)


And here is where I call the estimator and training



word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128,
'batch_size': 16
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


And this the error message I get when I call the Estimator training:



INFO:tensorflow:Calling model_fn.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197

<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)
33 inputs=embed,
34 num_sampled=num_sampled,
---> 35 num_classes=vocabulary_size))
36
37 # Add the loss value as a scalar to summary.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)
1246 remove_accidental_hits=remove_accidental_hits,
1247 partition_strategy=partition_strategy,
-> 1248 name=name)
1249 sampled_losses = sigmoid_cross_entropy_with_logits(
1250 labels=labels, logits=logits, name="sampled_losses")

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])

TypeError: data type not understood


Edit: Upon request, here's what a typical output for input_fn looks like



print(generate_batch(batch_size=8, num_skips=2, skip_window=1))



(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],
[ 12],
[ 6],
[3081],
[ 12],
[ 195],
[ 6],
[ 2]], dtype=int32))









share|improve this question
















I am trying to convert Tensorflow's official basic word2vec implementation to use tf.Estimator.
The issue is that the loss function( sampled_softmax_loss or nce_loss ) gives an error when using Tensorflow Estimators. It works perfectly fine in the original implementation.



Here's is Tensorflow's official basic word2vec implementation:



https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py



Here is the Google Colab notebook where I implemented this code, which is working.



https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I



Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.



https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e



For convenience, here is exact code from the Estimator version above where I define model_fn



batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
num_sampled = 64 # Number of negative examples to sample.

def my_model( features, labels, mode, params):

with tf.name_scope('inputs'):
train_inputs = features
train_labels = labels

with tf.name_scope('embeddings'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

with tf.name_scope('weights'):
nce_weights = tf.Variable(
tf.truncated_normal(
[vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
with tf.name_scope('biases'):
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

with tf.name_scope('loss'):
loss = tf.reduce_mean(
tf.nn.nce_loss(
weights=nce_weights,
biases=nce_biases,
labels=train_labels,
inputs=embed,
num_sampled=num_sampled,
num_classes=vocabulary_size))

tf.summary.scalar('loss', loss)

if mode == "train":
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)


And here is where I call the estimator and training



word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128,
'batch_size': 16
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


And this the error message I get when I call the Estimator training:



INFO:tensorflow:Calling model_fn.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197

<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)
33 inputs=embed,
34 num_sampled=num_sampled,
---> 35 num_classes=vocabulary_size))
36
37 # Add the loss value as a scalar to summary.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)
1246 remove_accidental_hits=remove_accidental_hits,
1247 partition_strategy=partition_strategy,
-> 1248 name=name)
1249 sampled_losses = sigmoid_cross_entropy_with_logits(
1250 labels=labels, logits=logits, name="sampled_losses")

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])

TypeError: data type not understood


Edit: Upon request, here's what a typical output for input_fn looks like



print(generate_batch(batch_size=8, num_skips=2, skip_window=1))



(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],
[ 12],
[ 6],
[3081],
[ 12],
[ 195],
[ 6],
[ 2]], dtype=int32))






python tensorflow tensorflow-estimator






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Dec 6 '18 at 6:45







SantoshGupta7

















asked Nov 21 '18 at 5:17









SantoshGupta7SantoshGupta7

6811515




6811515













  • What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

    – jdehesa
    Nov 27 '18 at 14:27











  • For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

    – SantoshGupta7
    Nov 29 '18 at 20:42





















  • What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

    – jdehesa
    Nov 27 '18 at 14:27











  • For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

    – SantoshGupta7
    Nov 29 '18 at 20:42



















What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27





What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27













For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42







For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42














3 Answers
3






active

oldest

votes


















3





+25









You use generate_batch like a variable here:



word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


Call the function with generate_batch().
But I think you must pass some values to the function.






share|improve this answer


























  • I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

    – SantoshGupta7
    Nov 29 '18 at 20:53











  • Can you show us the output of the generate_batch() call?

    – tifi90
    Nov 29 '18 at 21:18











  • yeah, just updated the original post, the output of generate_batch() call is at the bottom.

    – SantoshGupta7
    Nov 29 '18 at 22:27











  • The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

    – tifi90
    Nov 30 '18 at 9:58











  • They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

    – SantoshGupta7
    Dec 1 '18 at 3:19



















0














It might be that tensors and ops must be in the input_fn, not in the 'model_fn'




I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.



You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().




https://github.com/tensorflow/tensorflow/issues/8042




Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved




https://github.com/tensorflow/tensorflow/issues/4026



However, there still is not enough info on what's causing the issue. This is just a lead.






share|improve this answer































    0














    Found the answer




    Error clearly says you have invalid type for labels.



    You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
    performs implicit conversion from ndarray to Tensor under the hood
    (what's why your code works outside of Estimator), but in this case it
    don't.




    .




    No, official impl. feeds data from a placeholder. Placeholder is
    always a Tensor, so it don't depends on implicit things.



    But if you directly call loss function with a numpy array as input
    (Notice: call during graph construction phase, so argument content
    gets embedded into graph), it MAY work (however, I did not check it).



    This code:



    nce_loss(labels=[1,2,3]) will be called only ONCE during graph
    construction. Labels will be statically embedded into graph as a
    constant and potentially can be of any Tensor-compatible type (list,
    ndarray, etc)



    This code: ```Python def model(label_input):
    nce_loss(labels=label_input)



    estimator(model_fun=model).train() ``` can't embed labels variable
    statically, because it content is not defined during graph
    construction. So if you feed anything except the Tensor, it will throw
    an error.




    From



    https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/



    So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked






    share|improve this answer























      Your Answer






      StackExchange.ifUsing("editor", function () {
      StackExchange.using("externalEditor", function () {
      StackExchange.using("snippets", function () {
      StackExchange.snippets.init();
      });
      });
      }, "code-snippets");

      StackExchange.ready(function() {
      var channelOptions = {
      tags: "".split(" "),
      id: "1"
      };
      initTagRenderer("".split(" "), "".split(" "), channelOptions);

      StackExchange.using("externalEditor", function() {
      // Have to fire editor after snippets, if snippets enabled
      if (StackExchange.settings.snippets.snippetsEnabled) {
      StackExchange.using("snippets", function() {
      createEditor();
      });
      }
      else {
      createEditor();
      }
      });

      function createEditor() {
      StackExchange.prepareEditor({
      heartbeatType: 'answer',
      autoActivateHeartbeat: false,
      convertImagesToLinks: true,
      noModals: true,
      showLowRepImageUploadWarning: true,
      reputationToPostImages: 10,
      bindNavPrevention: true,
      postfix: "",
      imageUploader: {
      brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
      contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
      allowUrls: true
      },
      onDemand: true,
      discardSelector: ".discard-answer"
      ,immediatelyShowMarkdownHelp:true
      });


      }
      });














      draft saved

      draft discarded


















      StackExchange.ready(
      function () {
      StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-estimator-get-typeerror-data-type-not-unde%23new-answer', 'question_page');
      }
      );

      Post as a guest















      Required, but never shown

























      3 Answers
      3






      active

      oldest

      votes








      3 Answers
      3






      active

      oldest

      votes









      active

      oldest

      votes






      active

      oldest

      votes









      3





      +25









      You use generate_batch like a variable here:



      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      Call the function with generate_batch().
      But I think you must pass some values to the function.






      share|improve this answer


























      • I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

        – SantoshGupta7
        Nov 29 '18 at 20:53











      • Can you show us the output of the generate_batch() call?

        – tifi90
        Nov 29 '18 at 21:18











      • yeah, just updated the original post, the output of generate_batch() call is at the bottom.

        – SantoshGupta7
        Nov 29 '18 at 22:27











      • The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

        – tifi90
        Nov 30 '18 at 9:58











      • They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

        – SantoshGupta7
        Dec 1 '18 at 3:19
















      3





      +25









      You use generate_batch like a variable here:



      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      Call the function with generate_batch().
      But I think you must pass some values to the function.






      share|improve this answer


























      • I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

        – SantoshGupta7
        Nov 29 '18 at 20:53











      • Can you show us the output of the generate_batch() call?

        – tifi90
        Nov 29 '18 at 21:18











      • yeah, just updated the original post, the output of generate_batch() call is at the bottom.

        – SantoshGupta7
        Nov 29 '18 at 22:27











      • The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

        – tifi90
        Nov 30 '18 at 9:58











      • They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

        – SantoshGupta7
        Dec 1 '18 at 3:19














      3





      +25







      3





      +25



      3




      +25





      You use generate_batch like a variable here:



      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      Call the function with generate_batch().
      But I think you must pass some values to the function.






      share|improve this answer















      You use generate_batch like a variable here:



      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      Call the function with generate_batch().
      But I think you must pass some values to the function.







      share|improve this answer














      share|improve this answer



      share|improve this answer








      edited Nov 29 '18 at 11:01

























      answered Nov 29 '18 at 10:31









      tifi90tifi90

      938




      938













      • I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

        – SantoshGupta7
        Nov 29 '18 at 20:53











      • Can you show us the output of the generate_batch() call?

        – tifi90
        Nov 29 '18 at 21:18











      • yeah, just updated the original post, the output of generate_batch() call is at the bottom.

        – SantoshGupta7
        Nov 29 '18 at 22:27











      • The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

        – tifi90
        Nov 30 '18 at 9:58











      • They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

        – SantoshGupta7
        Dec 1 '18 at 3:19



















      • I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

        – SantoshGupta7
        Nov 29 '18 at 20:53











      • Can you show us the output of the generate_batch() call?

        – tifi90
        Nov 29 '18 at 21:18











      • yeah, just updated the original post, the output of generate_batch() call is at the bottom.

        – SantoshGupta7
        Nov 29 '18 at 22:27











      • The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

        – tifi90
        Nov 30 '18 at 9:58











      • They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

        – SantoshGupta7
        Dec 1 '18 at 3:19

















      I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

      – SantoshGupta7
      Nov 29 '18 at 20:53





      I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

      – SantoshGupta7
      Nov 29 '18 at 20:53













      Can you show us the output of the generate_batch() call?

      – tifi90
      Nov 29 '18 at 21:18





      Can you show us the output of the generate_batch() call?

      – tifi90
      Nov 29 '18 at 21:18













      yeah, just updated the original post, the output of generate_batch() call is at the bottom.

      – SantoshGupta7
      Nov 29 '18 at 22:27





      yeah, just updated the original post, the output of generate_batch() call is at the bottom.

      – SantoshGupta7
      Nov 29 '18 at 22:27













      The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

      – tifi90
      Nov 30 '18 at 9:58





      The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

      – tifi90
      Nov 30 '18 at 9:58













      They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

      – SantoshGupta7
      Dec 1 '18 at 3:19





      They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

      – SantoshGupta7
      Dec 1 '18 at 3:19













      0














      It might be that tensors and ops must be in the input_fn, not in the 'model_fn'




      I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.



      You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().




      https://github.com/tensorflow/tensorflow/issues/8042




      Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved




      https://github.com/tensorflow/tensorflow/issues/4026



      However, there still is not enough info on what's causing the issue. This is just a lead.






      share|improve this answer




























        0














        It might be that tensors and ops must be in the input_fn, not in the 'model_fn'




        I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.



        You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().




        https://github.com/tensorflow/tensorflow/issues/8042




        Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved




        https://github.com/tensorflow/tensorflow/issues/4026



        However, there still is not enough info on what's causing the issue. This is just a lead.






        share|improve this answer


























          0












          0








          0







          It might be that tensors and ops must be in the input_fn, not in the 'model_fn'




          I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.



          You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().




          https://github.com/tensorflow/tensorflow/issues/8042




          Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved




          https://github.com/tensorflow/tensorflow/issues/4026



          However, there still is not enough info on what's causing the issue. This is just a lead.






          share|improve this answer













          It might be that tensors and ops must be in the input_fn, not in the 'model_fn'




          I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.



          You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().




          https://github.com/tensorflow/tensorflow/issues/8042




          Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved




          https://github.com/tensorflow/tensorflow/issues/4026



          However, there still is not enough info on what's causing the issue. This is just a lead.







          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Dec 3 '18 at 18:36









          SantoshGupta7SantoshGupta7

          6811515




          6811515























              0














              Found the answer




              Error clearly says you have invalid type for labels.



              You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
              performs implicit conversion from ndarray to Tensor under the hood
              (what's why your code works outside of Estimator), but in this case it
              don't.




              .




              No, official impl. feeds data from a placeholder. Placeholder is
              always a Tensor, so it don't depends on implicit things.



              But if you directly call loss function with a numpy array as input
              (Notice: call during graph construction phase, so argument content
              gets embedded into graph), it MAY work (however, I did not check it).



              This code:



              nce_loss(labels=[1,2,3]) will be called only ONCE during graph
              construction. Labels will be statically embedded into graph as a
              constant and potentially can be of any Tensor-compatible type (list,
              ndarray, etc)



              This code: ```Python def model(label_input):
              nce_loss(labels=label_input)



              estimator(model_fun=model).train() ``` can't embed labels variable
              statically, because it content is not defined during graph
              construction. So if you feed anything except the Tensor, it will throw
              an error.




              From



              https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/



              So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked






              share|improve this answer




























                0














                Found the answer




                Error clearly says you have invalid type for labels.



                You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
                performs implicit conversion from ndarray to Tensor under the hood
                (what's why your code works outside of Estimator), but in this case it
                don't.




                .




                No, official impl. feeds data from a placeholder. Placeholder is
                always a Tensor, so it don't depends on implicit things.



                But if you directly call loss function with a numpy array as input
                (Notice: call during graph construction phase, so argument content
                gets embedded into graph), it MAY work (however, I did not check it).



                This code:



                nce_loss(labels=[1,2,3]) will be called only ONCE during graph
                construction. Labels will be statically embedded into graph as a
                constant and potentially can be of any Tensor-compatible type (list,
                ndarray, etc)



                This code: ```Python def model(label_input):
                nce_loss(labels=label_input)



                estimator(model_fun=model).train() ``` can't embed labels variable
                statically, because it content is not defined during graph
                construction. So if you feed anything except the Tensor, it will throw
                an error.




                From



                https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/



                So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked






                share|improve this answer


























                  0












                  0








                  0







                  Found the answer




                  Error clearly says you have invalid type for labels.



                  You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
                  performs implicit conversion from ndarray to Tensor under the hood
                  (what's why your code works outside of Estimator), but in this case it
                  don't.




                  .




                  No, official impl. feeds data from a placeholder. Placeholder is
                  always a Tensor, so it don't depends on implicit things.



                  But if you directly call loss function with a numpy array as input
                  (Notice: call during graph construction phase, so argument content
                  gets embedded into graph), it MAY work (however, I did not check it).



                  This code:



                  nce_loss(labels=[1,2,3]) will be called only ONCE during graph
                  construction. Labels will be statically embedded into graph as a
                  constant and potentially can be of any Tensor-compatible type (list,
                  ndarray, etc)



                  This code: ```Python def model(label_input):
                  nce_loss(labels=label_input)



                  estimator(model_fun=model).train() ``` can't embed labels variable
                  statically, because it content is not defined during graph
                  construction. So if you feed anything except the Tensor, it will throw
                  an error.




                  From



                  https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/



                  So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked






                  share|improve this answer













                  Found the answer




                  Error clearly says you have invalid type for labels.



                  You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
                  performs implicit conversion from ndarray to Tensor under the hood
                  (what's why your code works outside of Estimator), but in this case it
                  don't.




                  .




                  No, official impl. feeds data from a placeholder. Placeholder is
                  always a Tensor, so it don't depends on implicit things.



                  But if you directly call loss function with a numpy array as input
                  (Notice: call during graph construction phase, so argument content
                  gets embedded into graph), it MAY work (however, I did not check it).



                  This code:



                  nce_loss(labels=[1,2,3]) will be called only ONCE during graph
                  construction. Labels will be statically embedded into graph as a
                  constant and potentially can be of any Tensor-compatible type (list,
                  ndarray, etc)



                  This code: ```Python def model(label_input):
                  nce_loss(labels=label_input)



                  estimator(model_fun=model).train() ``` can't embed labels variable
                  statically, because it content is not defined during graph
                  construction. So if you feed anything except the Tensor, it will throw
                  an error.




                  From



                  https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/



                  So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked







                  share|improve this answer












                  share|improve this answer



                  share|improve this answer










                  answered Dec 6 '18 at 20:09









                  SantoshGupta7SantoshGupta7

                  6811515




                  6811515






























                      draft saved

                      draft discarded




















































                      Thanks for contributing an answer to Stack Overflow!


                      • Please be sure to answer the question. Provide details and share your research!

                      But avoid



                      • Asking for help, clarification, or responding to other answers.

                      • Making statements based on opinion; back them up with references or personal experience.


                      To learn more, see our tips on writing great answers.




                      draft saved


                      draft discarded














                      StackExchange.ready(
                      function () {
                      StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-estimator-get-typeerror-data-type-not-unde%23new-answer', 'question_page');
                      }
                      );

                      Post as a guest















                      Required, but never shown





















































                      Required, but never shown














                      Required, but never shown












                      Required, but never shown







                      Required, but never shown

































                      Required, but never shown














                      Required, but never shown












                      Required, but never shown







                      Required, but never shown







                      這個網誌中的熱門文章

                      Tangent Lines Diagram Along Smooth Curve

                      Yusuf al-Mu'taman ibn Hud

                      Zucchini