From b34fd0655e1c7b8f72453d2bce335efc043e7d6c Mon Sep 17 00:00:00 2001 From: a-ye13 <43657152+a-ye13@users.noreply.github.com> Date: Wed, 3 Oct 2018 00:44:47 -0400 Subject: [PATCH] Addition of Socially Engaged Customers Revenue Created a code that is supposed to read the training data, and group the socialEngagementType column by fullVisitorId based on whether they are or not socially engaged. Then it should take the mean of the revenue by socially engaged customers. --- explore.py | 5 +++++ explore_utils.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/explore.py b/explore.py index 23546a9..64569cc 100644 --- a/explore.py +++ b/explore.py @@ -21,6 +21,11 @@ def main(args): print("The most visit times for a customer in train set is: ", explore_utils.find_most_visit(data)) + # Mean number of visits for socially engaged customers + + print("The mean visit times for socially engaged customers is: ", + explore_utils.social_active_customers_revenue(data)) + # Customer spending percentiles percentiles = [95, 97.5, 99, 99.9, 99.99] diff --git a/explore_utils.py b/explore_utils.py index cf184a9..8833cc1 100644 --- a/explore_utils.py +++ b/explore_utils.py @@ -56,3 +56,25 @@ def find_most_common_traffic_sources(dataset,num=5): """ return dataset.train['trafficSource.source'].value_counts().head(num) + +def social_active_customers_revenue(dataset): + + """Find the revenue generated by socially engaged users, determining + whether or not they generate more revenue + + args: + dataset (Dataset): the google analytics dataset + + returns: + the revenue generated by social engaged users, and compares it to + normal users + + """ + train_df = dataset.train.copy() + + # Separates data frame into socialEngagementTypes + sociallyengaged = train_df.groupby("fullVisitorId")['socialEngagementType'].value() + mean_revenue = train_df.groupby("fullVisitorId")[sociallyengaged].mean() + +# + return mean_revenue