影院复工数据分析
# 导入包
import numpy as np
import pandas as pd
from pyecharts.charts import Bar, Map, Line, Page
from pyecharts import options as opts
from pyecharts.globals import SymbolType, WarningType
WarningType.ShowWarning = False
影院复工地图-淘票票
复工率Top5省份
df_1 = pd.read_csv('../data/淘票票复工率Top5省份0827.csv')
df_1 = df_1.sort_values('openCinemaRate')
df_1
|
boxOffice |
boxOfficeStr |
cityId |
cityName |
openCinemaCnt |
openCinemaCntStr |
openCinemaIncrCnt |
openCinemaIncrCntStr |
openCinemaRate |
openCinemaRateStr |
scheduleCnt |
scheduleCntStr |
showDate |
soldSeatCnt |
soldSeatCntStr |
---|
4 |
59522139 |
59.5万 |
999929 |
青海省 |
41 |
41 |
0 |
0 |
0.953 |
95.3% |
1000 |
1000 |
20200827 |
16906 |
1.69万 |
---|
3 |
970996899 |
971.0万 |
999916 |
河南省 |
510 |
510 |
0 |
0 |
0.964 |
96.4% |
15119 |
1.51万 |
20200827 |
289174 |
28.92万 |
---|
2 |
540238952 |
540.2万 |
999914 |
江西省 |
334 |
334 |
-5 |
-5 |
0.965 |
96.5% |
7198 |
7198 |
20200827 |
158912 |
15.89万 |
---|
1 |
370686517 |
370.7万 |
999920 |
广西 |
258 |
258 |
1 |
1 |
0.985 |
98.5% |
5581 |
5581 |
20200827 |
104000 |
10.40万 |
---|
0 |
27058030 |
27.1万 |
999926 |
西藏 |
23 |
23 |
1 |
1 |
1.000 |
100.0% |
409 |
409 |
20200827 |
6764 |
6764 |
---|
# 产生数据
x_data = df_1['cityName'].values.tolist()
y_data = [float(i.split('%')[0]) for i in df_1['openCinemaRateStr'].values.tolist()]
# 条形图
bar1 = Bar(init_opts=opts.InitOpts(width='1350px', height='750px'))
bar1.add_xaxis(x_data)
bar1.add_yaxis('', y_data, category_gap='50%')
bar1.set_global_opts(title_opts=opts.TitleOpts(title='全国复工率Top5省份(8-27)'))
bar1.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='right'))
bar1.set_colors('#F6A061')
bar1.reversal_axis()
bar1.render()
'C:\\Users\\wzd\\Desktop\\CDA\\CDA_Python\\Python项目实作\\网络爬虫\\电影\\电影复工分析\\code\\render.html'
全国影院复工地图
df_2 = pd.read_csv('../data/淘票票各省票房-0827.csv')
# 城市数据
df_2['cityName'] = df_2['cityName'].str.replace('市', '').str.replace('省', '')
df_2.head()
|
boxOffice |
boxOfficeStr |
cityId |
cityName |
openCinemaCnt |
openCinemaCntStr |
openCinemaIncrCnt |
openCinemaIncrCntStr |
openCinemaRate |
openCinemaRateStr |
scheduleCnt |
scheduleCntStr |
showDate |
soldSeatCnt |
soldSeatCntStr |
---|
0 |
571133208 |
571.1万 |
99991 |
北京 |
206 |
206 |
3 |
3 |
0.866 |
86.6% |
5729 |
5729 |
20200827 |
115052 |
11.51万 |
---|
1 |
220772644 |
220.8万 |
99992 |
天津 |
90 |
90 |
0 |
0 |
0.833 |
83.3% |
2636 |
2636 |
20200827 |
57467 |
5.75万 |
---|
2 |
636961140 |
637.0万 |
99993 |
河北 |
384 |
384 |
-3 |
-3 |
0.901 |
90.1% |
9807 |
9807 |
20200827 |
180247 |
18.02万 |
---|
3 |
364804000 |
364.8万 |
99994 |
山西 |
264 |
264 |
-4 |
-4 |
0.917 |
91.7% |
6163 |
6163 |
20200827 |
103178 |
10.32万 |
---|
4 |
241007410 |
241.0万 |
99995 |
内蒙古 |
186 |
186 |
-7 |
-7 |
0.842 |
84.2% |
4207 |
4207 |
20200827 |
68254 |
6.83万 |
---|
open_num = df_2[['cityName', 'openCinemaCnt']]
open_num.head()
|
cityName |
openCinemaCnt |
---|
0 |
北京 |
206 |
---|
1 |
天津 |
90 |
---|
2 |
河北 |
384 |
---|
3 |
山西 |
264 |
---|
4 |
内蒙古 |
186 |
---|
x_data = open_num.cityName.values.tolist()
y_data = open_num.openCinemaCnt.values.tolist()
# 地图
map1 = Map(init_opts=opts.InitOpts(width='1350px', height='750px'))
map1.add("", [list(z) for z in zip(x_data, y_data)],
maptype='china'
)
map1.set_global_opts(title_opts=opts.TitleOpts(title='全国影院复工数地图(8-27)'),
visualmap_opts=opts.VisualMapOpts(max_=open_num.openCinemaCnt.max(),
is_piecewise=True,
# pieces=
),
)
map1.render()
'C:\\Users\\wzd\\Desktop\\CDA\\CDA_Python\\Python项目实作\\网络爬虫\\电影\\电影复工分析\\code\\render.html'
全国影院复工数Top10
num_top10 = open_num.sort_values('openCinemaCnt', ascending=False).head(10)
num_top10
|
cityName |
openCinemaCnt |
---|
18 |
广东 |
1092 |
---|
9 |
江苏 |
813 |
---|
10 |
浙江 |
663 |
---|
22 |
四川 |
564 |
---|
14 |
山东 |
517 |
---|
15 |
河南 |
510 |
---|
17 |
湖南 |
405 |
---|
2 |
河北 |
384 |
---|
16 |
湖北 |
374 |
---|
11 |
安徽 |
363 |
---|
x_data = num_top10.cityName.values.tolist()
y_data = num_top10.openCinemaCnt.values.tolist()
# 条形图
bar2 = Bar(init_opts=opts.InitOpts(width='1350px', height='750px'))
bar2.add_xaxis(x_data)
bar2.add_yaxis('', y_data)
bar2.set_global_opts(title_opts=opts.TitleOpts(title='全国影院复工数Top10省份(8-27日)'),
# xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
visualmap_opts=opts.VisualMapOpts(is_show=False, max_=1092)
)
bar2.render()
'C:\\Users\\wzd\\Desktop\\CDA\\CDA_Python\\Python项目实作\\网络爬虫\\电影\\电影复工分析\\code\\render.html'
|
boxOffice |
boxOfficeStr |
cityId |
cityName |
openCinemaCnt |
openCinemaCntStr |
openCinemaIncrCnt |
openCinemaIncrCntStr |
openCinemaRate |
openCinemaRateStr |
scheduleCnt |
scheduleCntStr |
showDate |
soldSeatCnt |
soldSeatCntStr |
---|
0 |
422155895 |
422.2万 |
-5 |
全国 |
1397 |
1397 |
1009 |
1009 |
0.129 |
12.9% |
12599 |
1.26万 |
20200720 |
164716 |
16.47万 |
---|
1 |
537683369 |
537.7万 |
-5 |
全国 |
2040 |
2040 |
643 |
643 |
0.189 |
18.9% |
23482 |
2.35万 |
20200721 |
215780 |
21.58万 |
---|
2 |
644860406 |
644.9万 |
-5 |
全国 |
2567 |
2567 |
527 |
527 |
0.237 |
23.7% |
35414 |
3.54万 |
20200722 |
274121 |
27.41万 |
---|
3 |
662853406 |
662.9万 |
-5 |
全国 |
3152 |
3152 |
585 |
585 |
0.291 |
29.1% |
46315 |
4.63万 |
20200723 |
309331 |
30.93万 |
---|
4 |
2030300376 |
2030.3万 |
-5 |
全国 |
4670 |
4670 |
1518 |
1518 |
0.432 |
43.2% |
83543 |
8.35万 |
20200724 |
757934 |
75.79万 |
---|
df_3.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 boxOffice 39 non-null int64
1 boxOfficeStr 39 non-null object
2 cityId 39 non-null int64
3 cityName 39 non-null object
4 openCinemaCnt 39 non-null int64
5 openCinemaCntStr 39 non-null int64
6 openCinemaIncrCnt 39 non-null int64
7 openCinemaIncrCntStr 39 non-null int64
8 openCinemaRate 39 non-null float64
9 openCinemaRateStr 39 non-null object
10 scheduleCnt 39 non-null int64
11 scheduleCntStr 39 non-null object
12 showDate 39 non-null int64
13 soldSeatCnt 39 non-null int64
14 soldSeatCntStr 39 non-null object
dtypes: float64(1), int64(9), object(5)
memory usage: 4.7+ KB