#去除na results = Series([x.split()[0] for x in frame.a.dropna()]) # print(results[:5]) # print(results.value_counts()[:8]) cframe = frame[frame.a.notnull()] #得到一个np,如果包含Windows就是Windows,不然是NotWindows operating_system = np.where(cframe['a'].str.contains('Windows'), 'Windows', 'NotWindows') #核心,用groupby 根据时区和操作系统来group by_tz_os = cframe.groupby(['tz', operating_system]) # print(by_tz_os.size()) #groupby 用size来显示, 用unstack来整理,空的设置为0 agg_counts = by_tz_os.size().unstack().fillna(0) # print(agg_counts[:10]) #排序 indexer = agg_counts.sum(1).argsort() # print(indexer) count_subset = agg_counts.take(indexer)[-10:] # print(count_subset) #stacked = False会分开 count_subset.plot(kind='barh', stacked=False) plt.show()3955
06
6月
python pandas 实战 显示时区按照windows和非windows进行分解
