import pandas as pd import numpy as np import matplotlib.pyplot as plt import re from sklearn.neighbors import KNeighborsRegressor df = pd.read_csv("data/cs-training.csv") # 将名字都改为 snake_case def camel_to_snake(column_name): """ converts a string that is camelCase into snake_case """ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', column_name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() camel_to_snake("javaLovesCamelCase") df.columns = [camel_to_snake(col) for col in df.columns] df.columns.tolist() income_imputer = KNeighborsRegressor(n_neighbors=1) train_w_monthly_income = df[df.monthly_income.isnull()==False] train_w_null_monthly_income = df[df.monthly_income.isnull()==True] cols = ['number_real_estate_loans_or_lines', 'number_of_open_credit_lines_and_loans'] income_imputer.fit(train_w_monthly_income[cols], train_w_monthly_income.monthly_income) new_values = income_imputer.predict(train_w_null_monthly_income[cols]) # print(new_values) train_w_null_monthly_income.loc[:,'monthly_income']=new_values5195
16
12月
sklearn KNeighborsRegressor
