DataFrame constructor not properly called when outputing to csv file
up vote
0
down vote
favorite
Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called
) whenever I tried to output the results of the program to a dataframe csv
MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')
df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit
df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])
df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)
rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values
str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',
drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']
timestamp_columns = ['EVENT_TS']
df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()
pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])
File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')
python dataframe machine-learning k-means
add a comment |
up vote
0
down vote
favorite
Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called
) whenever I tried to output the results of the program to a dataframe csv
MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')
df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit
df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])
df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)
rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values
str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',
drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']
timestamp_columns = ['EVENT_TS']
df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()
pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])
File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')
python dataframe machine-learning k-means
1
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called
) whenever I tried to output the results of the program to a dataframe csv
MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')
df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit
df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])
df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)
rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values
str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',
drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']
timestamp_columns = ['EVENT_TS']
df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()
pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])
File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')
python dataframe machine-learning k-means
Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called
) whenever I tried to output the results of the program to a dataframe csv
MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')
df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit
df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])
df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)
rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values
str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',
drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']
timestamp_columns = ['EVENT_TS']
df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()
pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])
File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')
python dataframe machine-learning k-means
python dataframe machine-learning k-means
edited Nov 7 at 22:34
martineau
64.8k987175
64.8k987175
asked Nov 7 at 21:53
ExpressCode
1
1
1
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09
add a comment |
1
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09
1
1
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198416%2fdataframe-constructor-not-properly-called-when-outputing-to-csv-file%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09