1. 连接mysql,使用 read_sql
import pymysql
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
connect = pymysql.connect(
host = '127.0.0.1',
db = 'wxarticle',
user = 'root',
passwd = '',
charset = 'utf8',
use_unicode = True
)
cursor = connect.cursor()
select_sql = "select date,code,articleClicksCount,articleCount,ad_count from wxcode"
df = pd.read_sql(select_sql, con=connect)
2. groupby sum
dfDate = df.groupby('date').sum()
3. 画折线图
x = dfDate.index
y = dfDate['articleClicksCount'].values
plt.figure(figsize=(8,4)) #创建绘图对象
plt.plot(x,y,"b",linewidth=1) #在当前绘图对象绘图(X轴,Y轴,蓝色,线宽度)
plt.xlabel("Date)") #X轴标签
plt.ylabel("Article Total Read Count") #Y轴标签
plt.title("Total Read Count") #图标题
plt.show() #显示图
4. np sum 统计总和
qX = ['Q1', 'Q2']
qY = [np.sum(dfDate['articleClicksCount'].values[0:3]), np.sum(dfDate['articleClicksCount'].values[3:])]
plt.figure(figsize=(8,4)) #创建绘图对象
plt.plot(qX,qY,"b",linewidth=1) #在当前绘图对象绘图(X轴,Y轴,蓝色,线宽度)
plt.xlabel("Date)") #X轴标签
plt.ylabel("Article Total Read Count") #Y轴标签
plt.title("Total Read Count Q1, Q2") #图标题
plt.show() #显示图
5. 过滤行
dfAdDate = df.loc[df['ad_count'] == 1]
6. 修改列名字
dfAdDateGroup.columns = ['带广告总阅读次数', '带广告的文章总数','带广告的总账号个数','总样本数量','带广告的总账号占的比重','总阅读次数','广告阅读次数占的比重','带2个广告的账号个数']
http://www.waitingfy.com/archives/4758
Post Views: 10
4758
pandas | | | Trackback |