Python login and web scraper not working, how do i fix it?











up vote
-1
down vote

favorite












I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:

result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.



from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD

payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}

session_requests = requests.session()

#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]


result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)

url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)



url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)

response = requests.get(url)

html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''

soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string

table = soup.find_all('table')[0] # Grab the first table

new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size

row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1

new_table


class HTMLTableParser:

def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]

def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =

# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):

# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)

# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())

# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")

columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1

# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass

return df









share|improve this question






















  • Straight from the error message, should session_requests.geturl be session_requests.get(url)?
    – G. Anderson
    Nov 7 at 22:04












  • Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
    – Andersson
    Nov 7 at 22:05










  • that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
    – Shelbert Miller
    Nov 8 at 4:26















up vote
-1
down vote

favorite












I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:

result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.



from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD

payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}

session_requests = requests.session()

#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]


result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)

url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)



url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)

response = requests.get(url)

html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''

soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string

table = soup.find_all('table')[0] # Grab the first table

new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size

row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1

new_table


class HTMLTableParser:

def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]

def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =

# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):

# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)

# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())

# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")

columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1

# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass

return df









share|improve this question






















  • Straight from the error message, should session_requests.geturl be session_requests.get(url)?
    – G. Anderson
    Nov 7 at 22:04












  • Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
    – Andersson
    Nov 7 at 22:05










  • that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
    – Shelbert Miller
    Nov 8 at 4:26













up vote
-1
down vote

favorite









up vote
-1
down vote

favorite











I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:

result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.



from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD

payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}

session_requests = requests.session()

#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]


result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)

url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)



url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)

response = requests.get(url)

html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''

soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string

table = soup.find_all('table')[0] # Grab the first table

new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size

row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1

new_table


class HTMLTableParser:

def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]

def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =

# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):

# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)

# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())

# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")

columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1

# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass

return df









share|improve this question













I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:

result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.



from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD

payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}

session_requests = requests.session()

#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]


result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)

url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)



url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)

response = requests.get(url)

html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''

soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string

table = soup.find_all('table')[0] # Grab the first table

new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size

row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1

new_table


class HTMLTableParser:

def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]

def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =

# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):

# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)

# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())

# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")

columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1

# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass

return df






python web-scraping






share|improve this question













share|improve this question











share|improve this question




share|improve this question










asked Nov 7 at 21:59









Shelbert Miller

41




41












  • Straight from the error message, should session_requests.geturl be session_requests.get(url)?
    – G. Anderson
    Nov 7 at 22:04












  • Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
    – Andersson
    Nov 7 at 22:05










  • that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
    – Shelbert Miller
    Nov 8 at 4:26


















  • Straight from the error message, should session_requests.geturl be session_requests.get(url)?
    – G. Anderson
    Nov 7 at 22:04












  • Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
    – Andersson
    Nov 7 at 22:05










  • that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
    – Shelbert Miller
    Nov 8 at 4:26
















Straight from the error message, should session_requests.geturl be session_requests.get(url)?
– G. Anderson
Nov 7 at 22:04






Straight from the error message, should session_requests.geturl be session_requests.get(url)?
– G. Anderson
Nov 7 at 22:04














Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
– Andersson
Nov 7 at 22:05




Session really has no geturl attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
– Andersson
Nov 7 at 22:05












that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26




that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26

















active

oldest

votes











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198481%2fpython-login-and-web-scraper-not-working-how-do-i-fix-it%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown






























active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes
















draft saved

draft discarded




















































Thanks for contributing an answer to Stack Overflow!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.





Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


Please pay close attention to the following guidance:


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198481%2fpython-login-and-web-scraper-not-working-how-do-i-fix-it%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







這個網誌中的熱門文章

Tangent Lines Diagram Along Smooth Curve

Yusuf al-Mu'taman ibn Hud

Zucchini