import numpy as np
from pandas import Series,DataFrame
import pandas as pd
# 使用pandas的cut函数划分年龄组
ages = [20,22,25,27,21,23,37,31,61,45,32]
bins = [18,25,35,60,100]
cats = pd.cut(ages,bins)
print(cats)
[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (35, 60], (25, 35], (60, 100], (35, 60], (25, 35]]
Length: 11
Categories (4, interval[int64]): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]
cats.codes
array([0, 0, 0, 1, 0, 0, 2, 1, 3, 2, 1], dtype=int8)
pd.value_counts(cats)
(18, 25] 5
(25, 35] 3
(35, 60] 2
(60, 100] 1
dtype: int64
pd.cut(ages,bins,right=False)
[[18, 25), [18, 25), [25, 35), [25, 35), [18, 25), ..., [35, 60), [25, 35), [60, 100), [35, 60), [25, 35)]
Length: 11
Categories (4, interval[int64]): [[18, 25) < [25, 35) < [35, 60) < [60, 100)]
group_names = ['Youth','YoungAdult','MiddleAged','Senior']
pd.cut(ages,bins,labels=group_names)
[Youth, Youth, Youth, YoungAdult, Youth, ..., MiddleAged, YoungAdult, Senior, MiddleAged, YoungAdult]
Length: 11
Categories (4, object): [Youth < YoungAdult < MiddleAged < Senior]
bins = np.array(['%.2f' % p for p in np.arange(-0.7, 1.2, 0.1) ]).astype('float64')
cats = pd.cut(ret,bins, include_lowest=True)
# print(cats)
cats_value_counts = pd.value_counts(cats)
# print(cats_value_counts)
plt.ylabel('Count' )
plt.xlabel('Nasdaq Year Return' )
ax = cats.value_counts().plot.bar(rot=0, color="b", figsize=(25, 18))
http://www.waitingfy.com/archives/5214
Post Views: 0
5214
pandas | | | Trackback |