Python login and web scraper not working, how do i fix it?
up vote
-1
down vote
favorite
I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:
result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.
from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD
payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}
session_requests = requests.session()
#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)
url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)
url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
response = requests.get(url)
html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''
soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string
table = soup.find_all('table')[0] # Grab the first table
new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
new_table
class HTMLTableParser:
def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]
def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =
# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):
# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)
# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())
# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")
columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1
# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass
return df
python web-scraping
add a comment |
up vote
-1
down vote
favorite
I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:
result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.
from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD
payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}
session_requests = requests.session()
#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)
url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)
url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
response = requests.get(url)
html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''
soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string
table = soup.find_all('table')[0] # Grab the first table
new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
new_table
class HTMLTableParser:
def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]
def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =
# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):
# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)
# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())
# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")
columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1
# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass
return df
python web-scraping
Straight from the error message, shouldsession_requests.geturl
besession_requests.get(url)
?
– G. Anderson
Nov 7 at 22:04
Session really has nogeturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
– Andersson
Nov 7 at 22:05
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26
add a comment |
up vote
-1
down vote
favorite
up vote
-1
down vote
favorite
I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:
result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.
from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD
payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}
session_requests = requests.session()
#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)
url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)
url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
response = requests.get(url)
html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''
soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string
table = soup.find_all('table')[0] # Grab the first table
new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
new_table
class HTMLTableParser:
def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]
def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =
# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):
# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)
# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())
# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")
columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1
# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass
return df
python web-scraping
I'm trying to login to a website and scrape all the tr elements of the table. However, I keep getting this error:
result = session_requests.geturl,
AttributeError: 'Session' object has no attribute 'geturl'
The code is below I have python 3.6 installed. All the relevant packages are installed as well. Im running the code in Pycharm. The chromedriver is running on my pc.
from bs4 import BeautifulSoup
import requests
from lxml import HTML
import lxml.html as LH
import pandas as PD
payload = {'email': " ",'password': '', '_token': '2Mz68c6QRz1QzKValc9wm42mVXwzR5La3nbXBXwy'}
session_requests = requests.session()
#Creates session object to persist login session accross all requests
login_url= "https://airmeetsea.com/login"
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)
url = 'https://airmeetsea/unknown'(
result = session_requests.geturl,
headers = dict(referer = url)
)
url='https://airmeetsea.com/unknown'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
response = requests.get(url)
html_string = '''
<table>
<tr>
<td> Hello! </td>
<td> Table </td>
</tr>
</table>
'''
soup = BeautifulSoup(html_string, 'lxml') # Parse the HTML as a string
table = soup.find_all('table')[0] # Grab the first table
new_table = pd.DataFrame(columns=range(0,2), index = [0]) # I know the size
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
new_table.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
new_table
class HTMLTableParser:
def parse_url(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
return [(table['id'],self.parse_html_table(table))
for table in soup.find_all('table')]
def parse_html_table(self, table):
n_columns = 0
n_rows=0
column_names =
# Find number of rows and columns
# we also find the column titles if we can
for row in table.find_all('tr'):
# Determine the number of rows in the table
td_tags = row.find_all('td')
if len(td_tags) > 0:
n_rows+=1
if n_columns == 0:
# Set the number of columns for our table
n_columns = len(td_tags)
# Handle column names if we find them
th_tags = row.find_all('th')
if len(th_tags) > 0 and len(column_names) == 0:
for th in th_tags:
column_names.append(th.get_text())
# Safeguard on Column Titles
if len(column_names) > 0 and len(column_names) != n_columns:
raise Exception("Column titles do not match the number of columns")
columns = column_names if len(column_names) > 0 else range(0,n_columns)
df = pd.DataFrame(columns = columns,
index= range(0,n_rows))
row_marker = 0
for row in table.find_all('tr'):
column_marker = 0
columns = row.find_all('td')
for column in columns:
df.iat[row_marker,column_marker] = column.get_text()
column_marker += 1
if len(columns) > 0:
row_marker += 1
# Convert to float if possible
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass
return df
python web-scraping
python web-scraping
asked Nov 7 at 21:59
Shelbert Miller
41
41
Straight from the error message, shouldsession_requests.geturl
besession_requests.get(url)
?
– G. Anderson
Nov 7 at 22:04
Session really has nogeturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
– Andersson
Nov 7 at 22:05
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26
add a comment |
Straight from the error message, shouldsession_requests.geturl
besession_requests.get(url)
?
– G. Anderson
Nov 7 at 22:04
Session really has nogeturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!
– Andersson
Nov 7 at 22:05
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26
Straight from the error message, should
session_requests.geturl
be session_requests.get(url)
?– G. Anderson
Nov 7 at 22:04
Straight from the error message, should
session_requests.geturl
be session_requests.get(url)
?– G. Anderson
Nov 7 at 22:04
Session really has no
geturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!– Andersson
Nov 7 at 22:05
Session really has no
geturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!– Andersson
Nov 7 at 22:05
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198481%2fpython-login-and-web-scraper-not-working-how-do-i-fix-it%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Straight from the error message, should
session_requests.geturl
besession_requests.get(url)
?– G. Anderson
Nov 7 at 22:04
Session really has no
geturl
attribute. What is your question. What do you want to do and why you're trying to call a string with arguments?!– Andersson
Nov 7 at 22:05
that was a mistake i made uploading the code. AND i tried session_requests.get(url)... it didnt work
– Shelbert Miller
Nov 8 at 4:26