import numpy as np
import pandas as pd
data_file = 'shopee_sales_1500_0305.csv'
data = pd.read_csv(data_file)
data.head(10)
import numpy as np
import pandas as pd
from PIL import Image
currency_rate = 0.0005
print('\n---------------数据集统计分析------------------\n')
fields = data.columns.tolist()
print('数据集来源:{}'.format(data_file))
print('正在统计分析数据集......')
print('数据集字段: {}'.format(fields))
category_name = data['category'].unique()
print('数据集大分类: {}'.format(category_name ))
print('数据集小分类: {}'.format(data['main class'].unique()))
data_section = data[['category','main class','price','monthly sales']]
data_name = ['价格','月销量','商品数量','最高销量','小分类']
print('\n---------------类别统计分析-------------------\n')
# 统计大分类是 MenShoes 的数据
print('正在统计 {0[0]} 的数据: {1[0]},{1[1]},{1[2]},{1[3]},{1[4]} ......'.format(category_name,data_name))
menshoes = data_section[data_section['category'] == 'MenShoes']
menshoes_pricelist = menshoes['price']* currency_rate
menshoes_saleslist = menshoes['monthly sales']
menshoes_num = menshoes.count()
menshoes_salesmax = max(menshoes_saleslist)
# 统计大分类是 Watches 的数据
print('正在统计 {0[1]} 的数据: {1[0]},{1[1]},{1[2]},{1[3]},{1[4]} ......'.format(category_name,data_name))
watches = data_section[data_section['category'] == 'Watches']
watches_pricelist = watches['price']* currency_rate
watches_saleslist = watches['monthly sales']
watches_num = watches.count()
watches_salesmax = max(watches_saleslist)
watches_class = watches['main class'].unique()
watches_class_num = [len(watches[watches['main class'] == i]) for i in watches_class ]
# 统计大分类是 Menbags 的数据
print('正在统计 {0[2]} 的数据: {1[0]},{1[1]},{1[2]},{1[3]},{1[4]} ......'.format(category_name,data_name))
menbags = data_section[data_section['category'] == 'Menbags']
menbags_pricelist = menbags['price']* currency_rate
menbags_saleslist = menbags['monthly sales']
menbags_num = menbags.count()
menbags_salesmax = max(menbags_saleslist)
# 统计大分类是 Fashion 的数据
print('正在统计 {0[3]} 的数据: {1[0]},{1[1]},{1[2]},{1[3]},{1[4]} ......'.format(category_name,data_name))
fashion = data_section[data_section['category'] == 'Fashion']
fashion_pricelist = fashion['price']* currency_rate
fashion_saleslist = fashion['monthly sales']
fashion_num = fashion.count()
fashion_salesmax = max(fashion_saleslist)
fashion_class = fashion['main class'].unique()
fashion_class_num = [len(fashion[fashion['main class'] == i]) for i in fashion_class ]
from pyecharts import Pie
from pyecharts import online
online()
pie = Pie('Category percentage',title_pos='center')
pie.add('Item Number',category_name,[menshoes_num,watches_num,menbags_num,fashion_num],radius=[40, 70],label_text_color=None,is_label_show=True,legend_orient="vertical",legend_pos="left")
pie
男鞋,手表,男包,饰品四大品类中月销量超过1500的商品
下面主要分析饰品和手表
from pyecharts import Pie
pie = Pie('Fashion accessories',title_pos='center')
pie.add('Item number',fashion_class,fashion_class_num,radius=[40, 70],label_text_color=None,is_label_show=True,legend_orient="vertical",legend_pos="Left")
pie
from pyecharts import Pie
pie = Pie('Watches',title_pos='center')
pie.add('Item number',watches_class,watches_class_num,radius=[40, 70],label_text_color=None,is_label_show=True,legend_orient="vertical",legend_pos="Left")
pie
手表款式包括:女表,男表,情侣表
其中:女表占比最高
from pyecharts import Scatter
scatter = Scatter('Price distribution','Menshoes-Watches-Menbags-Fashion')
scatter.add('MenShoes',menshoes_saleslist,menshoes_pricelist,xaxis_name ='sales',yaxis_name = 'price (rmb)',yaxis_name_gap=50,xaxis_min=1500)
scatter.add('Watches',watches_saleslist,watches_pricelist,xaxis_name ='sales',yaxis_name = 'price (rmb)',yaxis_name_gap=50,xaxis_min=1500)
scatter.add('Menbags',menbags_saleslist,menbags_pricelist,xaxis_name ='sales',yaxis_name = 'price (rmb)',yaxis_name_gap=50,xaxis_min=1500)
scatter.add('Fashion',fashion_saleslist,fashion_pricelist,xaxis_name ='sales',yaxis_name = 'price (rmb)',yaxis_name_gap=50,xaxis_min=1500)
scatter
from pyecharts import Boxplot
boxplot = Boxplot('Price peak')
x_axis = data['category'].unique()
y_axis = [menshoes_pricelist,watches_pricelist,menbags_pricelist,fashion_pricelist]
boxplot.add("price", x_axis, boxplot.prepare_data(y_axis))
num = 0
for box in boxplot.prepare_data(y_axis):
print(x_axis[num])
print("min 最小值:{0[0]} / Q1下四分位数:{0[1]} / median(or Q2)中位数:{0[2]} / Q3上四分位数:{0[3]} / max最大值:{0[4]}".format(box,x_axis))
num += 1
boxplot
from pyecharts import Scatter
scatter = Scatter('Sales distribution','Menshoes-Watches-Menbags-Fashion')
scatter.add('MenShoes',menshoes_pricelist,menshoes_saleslist,yaxis_name ='sales',xaxis_name = 'price (rmb)',yaxis_name_gap=50,yaxis_min=1500)
scatter.add('Watches',watches_pricelist,watches_saleslist,yaxis_name ='sales',xaxis_name = 'price (rmb)',yaxis_name_gap=50,yaxis_min=1500)
scatter.add('Menbags',menbags_pricelist,menbags_saleslist,yaxis_name ='sales',xaxis_name = 'price (rmb)',yaxis_name_gap=50,yaxis_min=1500)
scatter.add('Fashion',fashion_pricelist,fashion_saleslist,yaxis_name ='sales',xaxis_name = 'price (rmb)',yaxis_name_gap=50,yaxis_min=1500)
scatter
from pyecharts import Boxplot
boxplot = Boxplot('Sales peak')
x_axis = data['category'].unique()
y_axis = [menshoes_saleslist,watches_saleslist,menbags_saleslist,fashion_saleslist]
boxplot.add("sales", x_axis, boxplot.prepare_data(y_axis))
num = 0
for box in boxplot.prepare_data(y_axis):
print(x_axis[num])
print("min 最小值:{0[0]} / Q1下四分位数:{0[1]} / median(or Q2)中位数:{0[2]} / Q3上四分位数:{0[3]} / max最大值:{0[4]}".format(box,x_axis))
num += 1
boxplot