slate file incorrectly initializes the arguments











up vote
0
down vote

favorite












My slate file for parsing pdf is not working correctly. I get the error attached:



Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "/home/ryan/.local/lib/python2.7/site-packages/slate/slate.py", line 37, in __init__
self.doc = PDFDocument(file)
TypeError: __init__() takes at least 2 arguments (1 given)


for this slate file. I had to modify the import pdfminer... to correct from pdfminer.pdfdocument import PDFDocument for a previous issue but can't figure out how to correct this one from analyzing traceback statement... I tried finding the missing error... Any help with how to correct this problem would much appreciated, thanks!



from StringIO import StringIO

from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter as PI
from pdfminer.pdfdevice import PDFDevice
from pdfminer.converter import TextConverter

import utils

__all__ = ['PDF']

class PDFPageInterpreter(PI):
def process_page(self, page):
if 1 <= self.debug:
print >>stderr, 'Processing page: %r' % page
(x0,y0,x1,y1) = page.mediabox
if page.rotate == 90:
ctm = (0,-1,1,0, -y0,x1)
elif page.rotate == 180:
ctm = (-1,0,0,-1, x1,y1)
elif page.rotate == 270:
ctm = (0,1,-1,0, y1,-x0)
else:
ctm = (1,0,0,1, -x0,-y0)
self.device.outfp.seek(0)
self.device.outfp.buf = ''
self.device.begin_page(page, ctm)
self.render_contents(page.resources, page.contents, ctm=ctm)
self.device.end_page(page)
return self.device.outfp.getvalue()

class PDF(list):
def __init__(self, file, password='', just_text=1):
self.parser = PDFParser(file)
self.doc = PDFDocument()
self.parser.set_document(self.doc)
self.doc.set_parser(self.parser)
self.doc.initialize(password)
if self.doc.is_extractable:
self.resmgr = PDFResourceManager()
self.device = TextConverter(self.resmgr, outfp=StringIO())
self.interpreter = PDFPageInterpreter(
self.resmgr, self.device)
for page in self.doc.get_pages():
self.append(self.interpreter.process_page(page))
self.metadata = self.doc.info
if just_text:
self._cleanup()

def _cleanup(self):
"""
Frees lots of non-textual information, such as the fonts
and images and the objects that were needed to parse the
PDF.
"""
del self.device
del self.doc
del self.parser
del self.resmgr
del self.interpreter

def text(self, clean=True):
"""
Returns the text of the PDF as a single string.
Options:

:clean:
Removes misc cruft, like lots of whitespace.
"""
if clean:
return ''.join(utils.trim_whitespace(page) for page in self)
else:
return ''.join(self)









share|improve this question


























    up vote
    0
    down vote

    favorite












    My slate file for parsing pdf is not working correctly. I get the error attached:



    Traceback (most recent call last):
    File "<stdin>", line 2, in <module>
    File "/home/ryan/.local/lib/python2.7/site-packages/slate/slate.py", line 37, in __init__
    self.doc = PDFDocument(file)
    TypeError: __init__() takes at least 2 arguments (1 given)


    for this slate file. I had to modify the import pdfminer... to correct from pdfminer.pdfdocument import PDFDocument for a previous issue but can't figure out how to correct this one from analyzing traceback statement... I tried finding the missing error... Any help with how to correct this problem would much appreciated, thanks!



    from StringIO import StringIO

    from pdfminer.pdfparser import PDFParser
    from pdfminer.pdfdocument import PDFDocument
    from pdfminer.pdfinterp import PDFResourceManager
    from pdfminer.pdfinterp import PDFPageInterpreter as PI
    from pdfminer.pdfdevice import PDFDevice
    from pdfminer.converter import TextConverter

    import utils

    __all__ = ['PDF']

    class PDFPageInterpreter(PI):
    def process_page(self, page):
    if 1 <= self.debug:
    print >>stderr, 'Processing page: %r' % page
    (x0,y0,x1,y1) = page.mediabox
    if page.rotate == 90:
    ctm = (0,-1,1,0, -y0,x1)
    elif page.rotate == 180:
    ctm = (-1,0,0,-1, x1,y1)
    elif page.rotate == 270:
    ctm = (0,1,-1,0, y1,-x0)
    else:
    ctm = (1,0,0,1, -x0,-y0)
    self.device.outfp.seek(0)
    self.device.outfp.buf = ''
    self.device.begin_page(page, ctm)
    self.render_contents(page.resources, page.contents, ctm=ctm)
    self.device.end_page(page)
    return self.device.outfp.getvalue()

    class PDF(list):
    def __init__(self, file, password='', just_text=1):
    self.parser = PDFParser(file)
    self.doc = PDFDocument()
    self.parser.set_document(self.doc)
    self.doc.set_parser(self.parser)
    self.doc.initialize(password)
    if self.doc.is_extractable:
    self.resmgr = PDFResourceManager()
    self.device = TextConverter(self.resmgr, outfp=StringIO())
    self.interpreter = PDFPageInterpreter(
    self.resmgr, self.device)
    for page in self.doc.get_pages():
    self.append(self.interpreter.process_page(page))
    self.metadata = self.doc.info
    if just_text:
    self._cleanup()

    def _cleanup(self):
    """
    Frees lots of non-textual information, such as the fonts
    and images and the objects that were needed to parse the
    PDF.
    """
    del self.device
    del self.doc
    del self.parser
    del self.resmgr
    del self.interpreter

    def text(self, clean=True):
    """
    Returns the text of the PDF as a single string.
    Options:

    :clean:
    Removes misc cruft, like lots of whitespace.
    """
    if clean:
    return ''.join(utils.trim_whitespace(page) for page in self)
    else:
    return ''.join(self)









    share|improve this question
























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      My slate file for parsing pdf is not working correctly. I get the error attached:



      Traceback (most recent call last):
      File "<stdin>", line 2, in <module>
      File "/home/ryan/.local/lib/python2.7/site-packages/slate/slate.py", line 37, in __init__
      self.doc = PDFDocument(file)
      TypeError: __init__() takes at least 2 arguments (1 given)


      for this slate file. I had to modify the import pdfminer... to correct from pdfminer.pdfdocument import PDFDocument for a previous issue but can't figure out how to correct this one from analyzing traceback statement... I tried finding the missing error... Any help with how to correct this problem would much appreciated, thanks!



      from StringIO import StringIO

      from pdfminer.pdfparser import PDFParser
      from pdfminer.pdfdocument import PDFDocument
      from pdfminer.pdfinterp import PDFResourceManager
      from pdfminer.pdfinterp import PDFPageInterpreter as PI
      from pdfminer.pdfdevice import PDFDevice
      from pdfminer.converter import TextConverter

      import utils

      __all__ = ['PDF']

      class PDFPageInterpreter(PI):
      def process_page(self, page):
      if 1 <= self.debug:
      print >>stderr, 'Processing page: %r' % page
      (x0,y0,x1,y1) = page.mediabox
      if page.rotate == 90:
      ctm = (0,-1,1,0, -y0,x1)
      elif page.rotate == 180:
      ctm = (-1,0,0,-1, x1,y1)
      elif page.rotate == 270:
      ctm = (0,1,-1,0, y1,-x0)
      else:
      ctm = (1,0,0,1, -x0,-y0)
      self.device.outfp.seek(0)
      self.device.outfp.buf = ''
      self.device.begin_page(page, ctm)
      self.render_contents(page.resources, page.contents, ctm=ctm)
      self.device.end_page(page)
      return self.device.outfp.getvalue()

      class PDF(list):
      def __init__(self, file, password='', just_text=1):
      self.parser = PDFParser(file)
      self.doc = PDFDocument()
      self.parser.set_document(self.doc)
      self.doc.set_parser(self.parser)
      self.doc.initialize(password)
      if self.doc.is_extractable:
      self.resmgr = PDFResourceManager()
      self.device = TextConverter(self.resmgr, outfp=StringIO())
      self.interpreter = PDFPageInterpreter(
      self.resmgr, self.device)
      for page in self.doc.get_pages():
      self.append(self.interpreter.process_page(page))
      self.metadata = self.doc.info
      if just_text:
      self._cleanup()

      def _cleanup(self):
      """
      Frees lots of non-textual information, such as the fonts
      and images and the objects that were needed to parse the
      PDF.
      """
      del self.device
      del self.doc
      del self.parser
      del self.resmgr
      del self.interpreter

      def text(self, clean=True):
      """
      Returns the text of the PDF as a single string.
      Options:

      :clean:
      Removes misc cruft, like lots of whitespace.
      """
      if clean:
      return ''.join(utils.trim_whitespace(page) for page in self)
      else:
      return ''.join(self)









      share|improve this question













      My slate file for parsing pdf is not working correctly. I get the error attached:



      Traceback (most recent call last):
      File "<stdin>", line 2, in <module>
      File "/home/ryan/.local/lib/python2.7/site-packages/slate/slate.py", line 37, in __init__
      self.doc = PDFDocument(file)
      TypeError: __init__() takes at least 2 arguments (1 given)


      for this slate file. I had to modify the import pdfminer... to correct from pdfminer.pdfdocument import PDFDocument for a previous issue but can't figure out how to correct this one from analyzing traceback statement... I tried finding the missing error... Any help with how to correct this problem would much appreciated, thanks!



      from StringIO import StringIO

      from pdfminer.pdfparser import PDFParser
      from pdfminer.pdfdocument import PDFDocument
      from pdfminer.pdfinterp import PDFResourceManager
      from pdfminer.pdfinterp import PDFPageInterpreter as PI
      from pdfminer.pdfdevice import PDFDevice
      from pdfminer.converter import TextConverter

      import utils

      __all__ = ['PDF']

      class PDFPageInterpreter(PI):
      def process_page(self, page):
      if 1 <= self.debug:
      print >>stderr, 'Processing page: %r' % page
      (x0,y0,x1,y1) = page.mediabox
      if page.rotate == 90:
      ctm = (0,-1,1,0, -y0,x1)
      elif page.rotate == 180:
      ctm = (-1,0,0,-1, x1,y1)
      elif page.rotate == 270:
      ctm = (0,1,-1,0, y1,-x0)
      else:
      ctm = (1,0,0,1, -x0,-y0)
      self.device.outfp.seek(0)
      self.device.outfp.buf = ''
      self.device.begin_page(page, ctm)
      self.render_contents(page.resources, page.contents, ctm=ctm)
      self.device.end_page(page)
      return self.device.outfp.getvalue()

      class PDF(list):
      def __init__(self, file, password='', just_text=1):
      self.parser = PDFParser(file)
      self.doc = PDFDocument()
      self.parser.set_document(self.doc)
      self.doc.set_parser(self.parser)
      self.doc.initialize(password)
      if self.doc.is_extractable:
      self.resmgr = PDFResourceManager()
      self.device = TextConverter(self.resmgr, outfp=StringIO())
      self.interpreter = PDFPageInterpreter(
      self.resmgr, self.device)
      for page in self.doc.get_pages():
      self.append(self.interpreter.process_page(page))
      self.metadata = self.doc.info
      if just_text:
      self._cleanup()

      def _cleanup(self):
      """
      Frees lots of non-textual information, such as the fonts
      and images and the objects that were needed to parse the
      PDF.
      """
      del self.device
      del self.doc
      del self.parser
      del self.resmgr
      del self.interpreter

      def text(self, clean=True):
      """
      Returns the text of the PDF as a single string.
      Options:

      :clean:
      Removes misc cruft, like lots of whitespace.
      """
      if clean:
      return ''.join(utils.trim_whitespace(page) for page in self)
      else:
      return ''.join(self)






      python ubuntu pdf slate






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 8 at 2:19









      RyanWolfe9013

      264




      264





























          active

          oldest

          votes











          Your Answer






          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "1"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53200632%2fslate-file-incorrectly-initializes-the-arguments%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes
















          draft saved

          draft discarded




















































          Thanks for contributing an answer to Stack Overflow!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          To learn more, see our tips on writing great answers.





          Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


          Please pay close attention to the following guidance:


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53200632%2fslate-file-incorrectly-initializes-the-arguments%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          這個網誌中的熱門文章

          Tangent Lines Diagram Along Smooth Curve

          Yusuf al-Mu'taman ibn Hud

          Zucchini