Python中做广义线性模型

data2 是关于重伤病人的一些基本资料。自变量X是病人的住院天数，因变量Y是病人出院后长期恢复的预后指数，指数数值越大表示预后结局越好。
尝试对数据拟合合适的线性或非线性模型
过程：
1、通过散点图可以判断可能可以使用的模型有：线性回归，对数，指数和冥指数回归

# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

import matplotlib.pyplot as plt
from sklearn import metrics
data2=pd.read_table(r'C:/Users/Administrator/Desktop/data2.txt',sep='\s+',
	encoding='gbk',usecols=(1,2))
# plt.scatter(data2['X'],data2['Y'])
# plt.show()

Python中做广义线性模型
通过散点图可以判断可能可以使用的模型有：线性回归，对数，指数和冥指数回归
2、分别回归结果如下：

# 线性回归
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit([[x] for x in data2['X']],list(data2['Y']))
print('线性回归模型准确度为：{0}'.format(lm.score(
	[[x] for x in data2['X']],list(data2['Y']))))
print ("线性模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm.predict([[x] for x in data2['X']]))))	

	
# 非线性-倒数
from sklearn.linear_model import LinearRegression
lm_1=LinearRegression()
lm_1.fit([[1/x] for x in data2['X']],list(data2['Y']))
print('非线性(倒数)回归模型准确度为：{0}'.format(lm_1.score(
	[[1/x] for x in data2['X']],list(data2['Y']))))
print ("非线性(倒数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_1.predict([[1/x] for x in data2['X']]))))
	
# 非线性-指数
from sklearn.linear_model import LinearRegression
import math
lm_2=LinearRegression()
lm_2.fit([[math.log(x)] for x in data2['X']],list(data2['Y']))
print('非线性(指数)回归模型准确度为：{0}'.format(lm_2.score(
	[[math.log(x)] for x in data2['X']],list(data2['Y']))))
print ("非线性(指数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_2.predict([[math.log(x)] for x in data2['X']]))))

# 非线性-幂函数
from sklearn.linear_model import LinearRegression
import math
lm_3=LinearRegression()
lm_3.fit([[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))
print('非线性(幂函数))回归模型准确度为：{0}'.format(lm_3.score(
	[[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))))
print ("非线性(幂函数))回归模型方差为:{0}".format(metrics.mean_squared_error(list(math.log(y) for y in data2['Y']),
	lm_3.predict([[math.log(x)] for x in data2['X']]))))

Python中做广义线性模型
综合考虑准确度和方差，指数模型最优
源码：

# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

import matplotlib.pyplot as plt
from sklearn import metrics
data2=pd.read_table(r'C:/Users/Administrator/Desktop/data2.txt',sep='\s+',
	encoding='gbk',usecols=(1,2))
# plt.scatter(data2['X'],data2['Y'])
# plt.show()

# 线性回归
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit([[x] for x in data2['X']],list(data2['Y']))
print('线性回归模型准确度为：{0}'.format(lm.score(
	[[x] for x in data2['X']],list(data2['Y']))))
print ("线性模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm.predict([[x] for x in data2['X']]))))	

	
# 非线性-倒数
from sklearn.linear_model import LinearRegression
lm_1=LinearRegression()
lm_1.fit([[1/x] for x in data2['X']],list(data2['Y']))
print('非线性(倒数)回归模型准确度为：{0}'.format(lm_1.score(
	[[1/x] for x in data2['X']],list(data2['Y']))))
print ("非线性(倒数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_1.predict([[1/x] for x in data2['X']]))))
	
# 非线性-指数
from sklearn.linear_model import LinearRegression
import math
lm_2=LinearRegression()
lm_2.fit([[math.log(x)] for x in data2['X']],list(data2['Y']))
print('非线性(指数)回归模型准确度为：{0}'.format(lm_2.score(
	[[math.log(x)] for x in data2['X']],list(data2['Y']))))
print ("非线性(指数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_2.predict([[math.log(x)] for x in data2['X']]))))

# 非线性-幂函数
from sklearn.linear_model import LinearRegression
import math
lm_3=LinearRegression()
lm_3.fit([[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))
print('非线性(幂函数))回归模型准确度为：{0}'.format(lm_3.score(
	[[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))))
print ("非线性(幂函数))回归模型方差为:{0}".format(metrics.mean_squared_error(list(math.log(y) for y in data2['Y']),
	lm_3.predict([[math.log(x)] for x in data2['X']]))))

Hi，您需要填写昵称和邮箱！