Converting Tensorflow Graph to use Estimator, get 'TypeError: data type not understood' at loss function...

I am trying to convert Tensorflow's official basic word2vec implementation to use tf.Estimator.
The issue is that the loss function( sampled_softmax_loss or nce_loss ) gives an error when using Tensorflow Estimators. It works perfectly fine in the original implementation.

Here's is Tensorflow's official basic word2vec implementation:

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py

Here is the Google Colab notebook where I implemented this code, which is working.

https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I

Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.

https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e

For convenience, here is exact code from the Estimator version above where I define model_fn

batch_size = 128

embedding_size = 128  # Dimension of the embedding vector.

skip_window = 1  # How many words to consider left and right.

num_skips = 2  # How many times to reuse an input to generate a label.

num_sampled = 64  # Number of negative examples to sample.



def my_model( features, labels, mode, params):



    with tf.name_scope('inputs'):

        train_inputs = features

        train_labels = labels



    with tf.name_scope('embeddings'):

        embeddings = tf.Variable(

          tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

        embed = tf.nn.embedding_lookup(embeddings, train_inputs)



    with tf.name_scope('weights'):

        nce_weights = tf.Variable(

          tf.truncated_normal(

              [vocabulary_size, embedding_size],

              stddev=1.0 / math.sqrt(embedding_size)))

    with tf.name_scope('biases'):

        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))



    with tf.name_scope('loss'):

        loss = tf.reduce_mean(

            tf.nn.nce_loss(

                weights=nce_weights,

                biases=nce_biases,

                labels=train_labels,

                inputs=embed,

                num_sampled=num_sampled,

                num_classes=vocabulary_size))



    tf.summary.scalar('loss', loss)



    if mode == "train":

        with tf.name_scope('optimizer'):

            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)



        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)

And here is where I call the estimator and training

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128,

            'batch_size': 16

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this the error message I get when I call the Estimator training:

INFO:tensorflow:Calling model_fn.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-22-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)

     33                 inputs=embed,

     34                 num_sampled=num_sampled,

---> 35                 num_classes=vocabulary_size))

     36 

     37     # Add the loss value as a scalar to summary.



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)

   1246       remove_accidental_hits=remove_accidental_hits,

   1247       partition_strategy=partition_strategy,

-> 1248       name=name)

   1249   sampled_losses = sigmoid_cross_entropy_with_logits(

   1250       labels=labels, logits=logits, name="sampled_losses")



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Edit: Upon request, here's what a typical output for input_fn looks like

print(generate_batch(batch_size=8, num_skips=2, skip_window=1))

(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],

       [  12],

       [   6],

       [3081],

       [  12],

       [ 195],

       [   6],

       [   2]], dtype=int32))

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27

For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42

add a comment |

Here's is Tensorflow's official basic word2vec implementation:

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py

Here is the Google Colab notebook where I implemented this code, which is working.

https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I

Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.

https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e

For convenience, here is exact code from the Estimator version above where I define model_fn

batch_size = 128

embedding_size = 128  # Dimension of the embedding vector.

skip_window = 1  # How many words to consider left and right.

num_skips = 2  # How many times to reuse an input to generate a label.

num_sampled = 64  # Number of negative examples to sample.



def my_model( features, labels, mode, params):



    with tf.name_scope('inputs'):

        train_inputs = features

        train_labels = labels



    with tf.name_scope('embeddings'):

        embeddings = tf.Variable(

          tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

        embed = tf.nn.embedding_lookup(embeddings, train_inputs)



    with tf.name_scope('weights'):

        nce_weights = tf.Variable(

          tf.truncated_normal(

              [vocabulary_size, embedding_size],

              stddev=1.0 / math.sqrt(embedding_size)))

    with tf.name_scope('biases'):

        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))



    with tf.name_scope('loss'):

        loss = tf.reduce_mean(

            tf.nn.nce_loss(

                weights=nce_weights,

                biases=nce_biases,

                labels=train_labels,

                inputs=embed,

                num_sampled=num_sampled,

                num_classes=vocabulary_size))



    tf.summary.scalar('loss', loss)



    if mode == "train":

        with tf.name_scope('optimizer'):

            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)



        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)

And here is where I call the estimator and training

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128,

            'batch_size': 16

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this the error message I get when I call the Estimator training:

INFO:tensorflow:Calling model_fn.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-22-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)

     33                 inputs=embed,

     34                 num_sampled=num_sampled,

---> 35                 num_classes=vocabulary_size))

     36 

     37     # Add the loss value as a scalar to summary.



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)

   1246       remove_accidental_hits=remove_accidental_hits,

   1247       partition_strategy=partition_strategy,

-> 1248       name=name)

   1249   sampled_losses = sigmoid_cross_entropy_with_logits(

   1250       labels=labels, logits=logits, name="sampled_losses")



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Edit: Upon request, here's what a typical output for input_fn looks like

print(generate_batch(batch_size=8, num_skips=2, skip_window=1))

(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],

       [  12],

       [   6],

       [3081],

       [  12],

       [ 195],

       [   6],

       [   2]], dtype=int32))

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27

For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42

add a comment |

Here's is Tensorflow's official basic word2vec implementation:

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py

Here is the Google Colab notebook where I implemented this code, which is working.

https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I

Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.

https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e

For convenience, here is exact code from the Estimator version above where I define model_fn

batch_size = 128

embedding_size = 128  # Dimension of the embedding vector.

skip_window = 1  # How many words to consider left and right.

num_skips = 2  # How many times to reuse an input to generate a label.

num_sampled = 64  # Number of negative examples to sample.



def my_model( features, labels, mode, params):



    with tf.name_scope('inputs'):

        train_inputs = features

        train_labels = labels



    with tf.name_scope('embeddings'):

        embeddings = tf.Variable(

          tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

        embed = tf.nn.embedding_lookup(embeddings, train_inputs)



    with tf.name_scope('weights'):

        nce_weights = tf.Variable(

          tf.truncated_normal(

              [vocabulary_size, embedding_size],

              stddev=1.0 / math.sqrt(embedding_size)))

    with tf.name_scope('biases'):

        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))



    with tf.name_scope('loss'):

        loss = tf.reduce_mean(

            tf.nn.nce_loss(

                weights=nce_weights,

                biases=nce_biases,

                labels=train_labels,

                inputs=embed,

                num_sampled=num_sampled,

                num_classes=vocabulary_size))



    tf.summary.scalar('loss', loss)



    if mode == "train":

        with tf.name_scope('optimizer'):

            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)



        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)

And here is where I call the estimator and training

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128,

            'batch_size': 16

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this the error message I get when I call the Estimator training:

INFO:tensorflow:Calling model_fn.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-22-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)

     33                 inputs=embed,

     34                 num_sampled=num_sampled,

---> 35                 num_classes=vocabulary_size))

     36 

     37     # Add the loss value as a scalar to summary.



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)

   1246       remove_accidental_hits=remove_accidental_hits,

   1247       partition_strategy=partition_strategy,

-> 1248       name=name)

   1249   sampled_losses = sigmoid_cross_entropy_with_logits(

   1250       labels=labels, logits=logits, name="sampled_losses")



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Edit: Upon request, here's what a typical output for input_fn looks like

print(generate_batch(batch_size=8, num_skips=2, skip_window=1))

(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],

       [  12],

       [   6],

       [3081],

       [  12],

       [ 195],

       [   6],

       [   2]], dtype=int32))

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

Here's is Tensorflow's official basic word2vec implementation:

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/word2vec/word2vec_basic.py

Here is the Google Colab notebook where I implemented this code, which is working.

https://colab.research.google.com/drive/1nTX77dRBHmXx6PEF5pmYpkIVxj_TqT5I

Here is the Google Colab notebook where I altered the code so that it uses Tensorflow Estimator, which is Not working.

https://colab.research.google.com/drive/1IVDqGwMx6BK5-Bgrw190jqHU6tt3ZR3e

For convenience, here is exact code from the Estimator version above where I define model_fn

batch_size = 128

embedding_size = 128  # Dimension of the embedding vector.

skip_window = 1  # How many words to consider left and right.

num_skips = 2  # How many times to reuse an input to generate a label.

num_sampled = 64  # Number of negative examples to sample.



def my_model( features, labels, mode, params):



    with tf.name_scope('inputs'):

        train_inputs = features

        train_labels = labels



    with tf.name_scope('embeddings'):

        embeddings = tf.Variable(

          tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

        embed = tf.nn.embedding_lookup(embeddings, train_inputs)



    with tf.name_scope('weights'):

        nce_weights = tf.Variable(

          tf.truncated_normal(

              [vocabulary_size, embedding_size],

              stddev=1.0 / math.sqrt(embedding_size)))

    with tf.name_scope('biases'):

        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))



    with tf.name_scope('loss'):

        loss = tf.reduce_mean(

            tf.nn.nce_loss(

                weights=nce_weights,

                biases=nce_biases,

                labels=train_labels,

                inputs=embed,

                num_sampled=num_sampled,

                num_classes=vocabulary_size))



    tf.summary.scalar('loss', loss)



    if mode == "train":

        with tf.name_scope('optimizer'):

            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)



        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=optimizer)

And here is where I call the estimator and training

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128,

            'batch_size': 16

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this the error message I get when I call the Estimator training:

INFO:tensorflow:Calling model_fn.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-22-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-20-9d389437162a> in my_model(features, labels, mode, params)

     33                 inputs=embed,

     34                 num_sampled=num_sampled,

---> 35                 num_classes=vocabulary_size))

     36 

     37     # Add the loss value as a scalar to summary.



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name)

   1246       remove_accidental_hits=remove_accidental_hits,

   1247       partition_strategy=partition_strategy,

-> 1248       name=name)

   1249   sampled_losses = sigmoid_cross_entropy_with_logits(

   1250       labels=labels, logits=logits, name="sampled_losses")



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Edit: Upon request, here's what a typical output for input_fn looks like

print(generate_batch(batch_size=8, num_skips=2, skip_window=1))

(array([3081, 3081,   12,   12,    6,    6,  195,  195], dtype=int32), array([[5234],

       [  12],

       [   6],

       [3081],

       [  12],

       [ 195],

       [   6],

       [   2]], dtype=int32))

python tensorflow tensorflow-estimator

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

edited Dec 6 '18 at 6:45

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

asked Nov 21 '18 at 5:17

SantoshGupta7

6811515

What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27

For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42

add a comment |

What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27

For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42

What Python, TensorFlow and NumPy versions are you using? If they are not up to date (TensorFlow 1.12, NumPy 1.15), have you tried upgrading?

– jdehesa
Nov 27 '18 at 14:27

For Tensorflow , version '1.12.0' ; For Numpy , version '1.14.6'

– SantoshGupta7
Nov 29 '18 at 20:42

add a comment |

3 Answers
3

active

oldest

votes

+25

You use generate_batch like a variable here:

word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

Call the function with generate_batch().
But I think you must pass some values to the function.

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

add a comment |

It might be that tensors and ops must be in the input_fn, not in the 'model_fn'

I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.

You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().

https://github.com/tensorflow/tensorflow/issues/8042

Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved

https://github.com/tensorflow/tensorflow/issues/4026

However, there still is not enough info on what's causing the issue. This is just a lead.

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

add a comment |

Found the answer

Error clearly says you have invalid type for labels.

You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
performs implicit conversion from ndarray to Tensor under the hood
(what's why your code works outside of Estimator), but in this case it
don't.

No, official impl. feeds data from a placeholder. Placeholder is
always a Tensor, so it don't depends on implicit things.

But if you directly call loss function with a numpy array as input
(Notice: call during graph construction phase, so argument content
gets embedded into graph), it MAY work (however, I did not check it).

This code:

nce_loss(labels=[1,2,3]) will be called only ONCE during graph
construction. Labels will be statically embedded into graph as a
constant and potentially can be of any Tensor-compatible type (list,
ndarray, etc)

This code: ```Python def model(label_input):
nce_loss(labels=label_input)

estimator(model_fun=model).train() ``` can't embed labels variable
statically, because it content is not defined during graph
construction. So if you feed anything except the Tensor, it will throw
an error.

From

https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/

So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-estimator-get-typeerror-data-type-not-unde%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

3 Answers
3

active

oldest

votes

3 Answers
3

active

oldest

votes

+25

You use generate_batch like a variable here:

word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

Call the function with generate_batch().
But I think you must pass some values to the function.

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

add a comment |

+25

You use generate_batch like a variable here:

word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

Call the function with generate_batch().
But I think you must pass some values to the function.

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

add a comment |

+25

You use generate_batch like a variable here:

word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

Call the function with generate_batch().
But I think you must pass some values to the function.

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

You use generate_batch like a variable here:

word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

Call the function with generate_batch().
But I think you must pass some values to the function.

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

edited Nov 29 '18 at 11:01

answered Nov 29 '18 at 10:31

tifi90

938

answered Nov 29 '18 at 10:31

tifi90

938

answered Nov 29 '18 at 10:31

tifi90

938

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

add a comment |

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

I set it up so that no values are passed to the function. I used generate_batch() but now I'm getting a TypeError: unsupported callable error. The Official documentation says to treat it like a function, so it should be called like generate_batch. tensorflow.org/guide/estimators . This is elaborated in this post stackoverflow.com/questions/47120637/…

– SantoshGupta7
Nov 29 '18 at 20:53

Can you show us the output of the generate_batch() call?

– tifi90
Nov 29 '18 at 21:18

yeah, just updated the original post, the output of generate_batch() call is at the bottom.

– SantoshGupta7
Nov 29 '18 at 22:27

The size of the features array and the labels array is different. Length 16 and length 15. Shouldn't they have the same size?

– tifi90
Nov 30 '18 at 9:58

They'e both 16, I think it's a little confusing since the labels start on the same line as the features end. ` [1892528, 1352240, 1552349]], dtype=int32), array([[1635226],`, 1635226 is part of the 2nd array.

– SantoshGupta7
Dec 1 '18 at 3:19

add a comment |

It might be that tensors and ops must be in the input_fn, not in the 'model_fn'

I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.

You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().

https://github.com/tensorflow/tensorflow/issues/8042

Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved

https://github.com/tensorflow/tensorflow/issues/4026

However, there still is not enough info on what's causing the issue. This is just a lead.

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

add a comment |

It might be that tensors and ops must be in the input_fn, not in the 'model_fn'

I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.

You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().

https://github.com/tensorflow/tensorflow/issues/8042

Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved

https://github.com/tensorflow/tensorflow/issues/4026

However, there still is not enough info on what's causing the issue. This is just a lead.

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

add a comment |

It might be that tensors and ops must be in the input_fn, not in the 'model_fn'

I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.

You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().

https://github.com/tensorflow/tensorflow/issues/8042

Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved

https://github.com/tensorflow/tensorflow/issues/4026

However, there still is not enough info on what's causing the issue. This is just a lead.

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

It might be that tensors and ops must be in the input_fn, not in the 'model_fn'

I found this issue #4026 which solved my problem ... Maybe it is just me being stupid, but it would be great if you mention that the tensors and ops all have to be inside the input_fn somewhere in the documentation.

You have to call read_batch_examples from somewhere inside input_fn so that the tensors it creates are in the graph that Estimator creates in fit().

https://github.com/tensorflow/tensorflow/issues/8042

Oh I feel like an idiot! I've been creating the op outside of the graph scope. It works now, can't believe I didn't think to try that. Thanks a lot! This is a non-issue and has been resolved

https://github.com/tensorflow/tensorflow/issues/4026

However, there still is not enough info on what's causing the issue. This is just a lead.

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

answered Dec 3 '18 at 18:36

SantoshGupta7

6811515

add a comment |

Found the answer

Error clearly says you have invalid type for labels.

You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
performs implicit conversion from ndarray to Tensor under the hood
(what's why your code works outside of Estimator), but in this case it
don't.

No, official impl. feeds data from a placeholder. Placeholder is
always a Tensor, so it don't depends on implicit things.

But if you directly call loss function with a numpy array as input
(Notice: call during graph construction phase, so argument content
gets embedded into graph), it MAY work (however, I did not check it).

This code:

nce_loss(labels=[1,2,3]) will be called only ONCE during graph
construction. Labels will be statically embedded into graph as a
constant and potentially can be of any Tensor-compatible type (list,
ndarray, etc)

This code: ```Python def model(label_input):
nce_loss(labels=label_input)

estimator(model_fun=model).train() ``` can't embed labels variable
statically, because it content is not defined during graph
construction. So if you feed anything except the Tensor, it will throw
an error.

From

https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/

So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

add a comment |

Found the answer

Error clearly says you have invalid type for labels.

You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
performs implicit conversion from ndarray to Tensor under the hood
(what's why your code works outside of Estimator), but in this case it
don't.

No, official impl. feeds data from a placeholder. Placeholder is
always a Tensor, so it don't depends on implicit things.

But if you directly call loss function with a numpy array as input
(Notice: call during graph construction phase, so argument content
gets embedded into graph), it MAY work (however, I did not check it).

This code:

nce_loss(labels=[1,2,3]) will be called only ONCE during graph
construction. Labels will be statically embedded into graph as a
constant and potentially can be of any Tensor-compatible type (list,
ndarray, etc)

This code: ```Python def model(label_input):
nce_loss(labels=label_input)

estimator(model_fun=model).train() ``` can't embed labels variable
statically, because it content is not defined during graph
construction. So if you feed anything except the Tensor, it will throw
an error.

From

https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/

So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

add a comment |

Found the answer

Error clearly says you have invalid type for labels.

You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
performs implicit conversion from ndarray to Tensor under the hood
(what's why your code works outside of Estimator), but in this case it
don't.

No, official impl. feeds data from a placeholder. Placeholder is
always a Tensor, so it don't depends on implicit things.

But if you directly call loss function with a numpy array as input
(Notice: call during graph construction phase, so argument content
gets embedded into graph), it MAY work (however, I did not check it).

This code:

nce_loss(labels=[1,2,3]) will be called only ONCE during graph
construction. Labels will be statically embedded into graph as a
constant and potentially can be of any Tensor-compatible type (list,
ndarray, etc)

This code: ```Python def model(label_input):
nce_loss(labels=label_input)

estimator(model_fun=model).train() ``` can't embed labels variable
statically, because it content is not defined during graph
construction. So if you feed anything except the Tensor, it will throw
an error.

From

https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/

So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

Found the answer

Error clearly says you have invalid type for labels.

You trying to pass numpy array instead of Tensor. Sometimes Tensorflow
performs implicit conversion from ndarray to Tensor under the hood
(what's why your code works outside of Estimator), but in this case it
don't.

No, official impl. feeds data from a placeholder. Placeholder is
always a Tensor, so it don't depends on implicit things.

But if you directly call loss function with a numpy array as input
(Notice: call during graph construction phase, so argument content
gets embedded into graph), it MAY work (however, I did not check it).

This code:

nce_loss(labels=[1,2,3]) will be called only ONCE during graph
construction. Labels will be statically embedded into graph as a
constant and potentially can be of any Tensor-compatible type (list,
ndarray, etc)

This code: ```Python def model(label_input):
nce_loss(labels=label_input)

estimator(model_fun=model).train() ``` can't embed labels variable
statically, because it content is not defined during graph
construction. So if you feed anything except the Tensor, it will throw
an error.

From

https://www.reddit.com/r/MachineLearning/comments/a39pef/r_tensorflow_estimators_managing_simplicity_vs/

So I used labels=tf.dtypes.cast( train_labels, tf.int64) and it worked

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

answered Dec 6 '18 at 20:09

SantoshGupta7

6811515

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

S5 rIV7lrv9,Gm NtmKgYBiB,cZ,X97sfnyzvP AOTZhLXCOytY98QkD5DWvvq

搜尋此網誌

Wsrtjtyk