pandas 实战 连接mysql 统计公众号情况

1. 连接mysql,使用 read_sql

import pymysql
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

connect = pymysql.connect(
            host = '127.0.0.1',
            db = 'wxarticle',
            user = 'root',
            passwd = '',
            charset = 'utf8',
            use_unicode = True
        )
cursor = connect.cursor()

select_sql = "select date,code,articleClicksCount,articleCount,ad_count from wxcode"
df = pd.read_sql(select_sql, con=connect)

2. groupby sum

dfDate = df.groupby('date').sum()

3. 画折线图

x = dfDate.index
y = dfDate['articleClicksCount'].values
plt.figure(figsize=(8,4)) #创建绘图对象
plt.plot(x,y,"b",linewidth=1)   #在当前绘图对象绘图(X轴,Y轴,蓝色,线宽度)
plt.xlabel("Date)") #X轴标签
plt.ylabel("Article Total Read Count")  #Y轴标签
plt.title("Total Read Count") #图标题
plt.show()  #显示图

4. np sum 统计总和

qX = ['Q1', 'Q2']
qY = [np.sum(dfDate['articleClicksCount'].values[0:3]), np.sum(dfDate['articleClicksCount'].values[3:])]
plt.figure(figsize=(8,4)) #创建绘图对象
plt.plot(qX,qY,"b",linewidth=1)   #在当前绘图对象绘图(X轴,Y轴,蓝色,线宽度)
plt.xlabel("Date)") #X轴标签
plt.ylabel("Article Total Read Count")  #Y轴标签
plt.title("Total Read Count Q1, Q2") #图标题
plt.show()  #显示图

5. 过滤行

dfAdDate = df.loc[df['ad_count'] == 1]

6. 修改列名字

dfAdDateGroup.columns = ['带广告总阅读次数', '带广告的文章总数','带广告的总账号个数','总样本数量','带广告的总账号占的比重','总阅读次数','广告阅读次数占的比重','带2个广告的账号个数']

http://www.waitingfy.com/archives/4758

4758

Leave a Reply

Name and Email Address are required fields.
Your email will not be published or shared with third parties.