DataFrame constructor not properly called when outputing to csv file











up vote
0
down vote

favorite












Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called) whenever I tried to output the results of the program to a dataframe csv



MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')

df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit

df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])

df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)

rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values

str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',

drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']

timestamp_columns = ['EVENT_TS']

df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()

pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])

File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')









share|improve this question




















  • 1




    what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
    – d_kennetz
    Nov 7 at 22:09















up vote
0
down vote

favorite












Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called) whenever I tried to output the results of the program to a dataframe csv



MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')

df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit

df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])

df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)

rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values

str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',

drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']

timestamp_columns = ['EVENT_TS']

df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()

pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])

File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')









share|improve this question




















  • 1




    what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
    – d_kennetz
    Nov 7 at 22:09













up vote
0
down vote

favorite









up vote
0
down vote

favorite











Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called) whenever I tried to output the results of the program to a dataframe csv



MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')

df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit

df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])

df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)

rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values

str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',

drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']

timestamp_columns = ['EVENT_TS']

df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()

pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])

File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')









share|improve this question















Hi I am new to the field of machine learning but I am getting this error(DataFrame constructor not properly called) whenever I tried to output the results of the program to a dataframe csv



MAX_FEATURES = 500
MIN_DF = 3
sw = stopwords.words('english')

df = pd.read_csv('sample.csv', low_memory=False)
df1 = df[['RECIPIENT_ID', 'EVENT_TS']].copy()
#raise SystemExit

df['EVENT_NAME'] = np.where(df['EVENT_NAME'].apply(
lambda s: all([c in string.ascii_uppercase + string.digits + '-,' for c in s])),
'PAINTID', df['EVENT_NAME'])

df['EVENT_NAME'] = df['EVENT_NAME'].replace(r'^paint&spMailing.*', 'paint', regex=True)

rn_extract = df['REFERRER_NAME'].str.extract(r'(google)|(sample|sale)|(persona.email)|(welcome)|(bing)|(yahoo)|(inspiration)|(interior)|(exterior)|(pathtopurchase)|(newsletter)|(memorial day)|(color of the month)|(fathers day)|(excite)|(designer ?story)|(ask jeeves)|(webcrawler)|(color.?clinic)|(personaexp|personalexp)|(rebate)|(rwb)|(pins to palettes)|(trend)|(20 days of color)|(4th of july)|(canada week)|(behr box 2.0)|(labor day)|(colorfullybehr)|(holiday collection)|(mindful moments)|(earthlink)|(dogpile)|(quick dry)|(swipes intro)|(swipes nationwide)|(event day)|(color discovery tool)|(cnet search)|(myway)|(civic day)|(summer savings)', flags=re.I, expand=False)
rn_extract = rn_extract.fillna('|').apply(''.join, axis=1).replace(r'|', '', regex=True).str.strip()
df['REFERRER_NAME'] = rn_extract.values

str_columns = ['EVENT_NAME', 'SITE_NAME'] # 'EVENT_URL', 'REFERRER_URL', 'EVENT_TYPE_NAME',
#'SESSION_LEAD_SOURCE', 'REFERRER_TYPE', 'REFERRER_NAME','REFERRER_KEYWORDS',

drop_columns = ['DATABASE_ID', 'VISITOR_KEY', 'SESSION_KEY', 'SESSION_START_TS',
'REFERRER_MAILING_ID', 'EVENT_ID', 'EVENT_TYPE_CODE',
'EVENT_HYPERLINK_ID', 'PAGE_ID', 'PAGE_PARENT_ID',
'PAGE_PARENT_NAME', 'SITE_DOMAIN_ID', 'SITE_ID',
'SITE_TYPE_CODE', 'SITE_URL', 'Email']

timestamp_columns = ['EVENT_TS']

df = df.drop(drop_columns, axis=1)
df['text'] = df[str_columns].astype(str).apply('|'.join, axis=1).replace(r'(nan|)|(nan$)', '', regex=True)
df = df.drop(str_columns, axis=1)
df = df.groupby('RECIPIENT_ID')['text'].apply('|'.join).str.lower().str.strip().to_frame()

pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_df=1.0, min_df=MIN_DF, token_pattern=r'b[a-z0-9]{2,20}b',
stop_words=sw + ['behr', 'www', 'com', 'http']))])

File=pd.DataFrame(X).to_csv('grouby_Pipeline.csv')






python dataframe machine-learning k-means






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 7 at 22:34









martineau

64.8k987175




64.8k987175










asked Nov 7 at 21:53









ExpressCode

1




1








  • 1




    what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
    – d_kennetz
    Nov 7 at 22:09














  • 1




    what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
    – d_kennetz
    Nov 7 at 22:09








1




1




what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09




what are the results of the program? It would help us if you could give a sample input, expected output, and the minimal amount of code needed to reproduce your problem. It seems like a lot of this code is okay, and therefore pretty irrelevant when trying to pinpoint your issue. You should try to follow the guidelines here when posting. Thanks!
– d_kennetz
Nov 7 at 22:09

















active

oldest

votes











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














 

draft saved


draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198416%2fdataframe-constructor-not-properly-called-when-outputing-to-csv-file%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown






























active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes
















 

draft saved


draft discarded



















































 


draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53198416%2fdataframe-constructor-not-properly-called-when-outputing-to-csv-file%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







這個網誌中的熱門文章

Tangent Lines Diagram Along Smooth Curve

Yusuf al-Mu'taman ibn Hud

Zucchini