• 加入Google Analytics、Google Tag Manager QQ群一起交流谷歌分析小站—总群
  • 加入Adobe Analytics、Adobe Launch交流群,加入请附上Adobe的组织ID,没有请勿加Adobe Analytics交流群
  • Google Analytics和Google Tag Manager视频课程第三版http://ke.ichdata.com/course/50

Python中做广义线性模型

Python与算法 GA小站 4年前 (2016-08-10) 4098次浏览 已收录 0个评论

data2 是关于重伤病人的一些基本资料。自变量X是病人的住院天数,因变量Y是病人出院后长期恢复的预后指数,指数数值越大表示预后结局越好。
尝试对数据拟合合适的线性或非线性模型
过程:
1、通过散点图可以判断可能可以使用的模型有:线性回归,对数,指数和冥指数回归

# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

import matplotlib.pyplot as plt
from sklearn import metrics
data2=pd.read_table(r'C:/Users/Administrator/Desktop/data2.txt',sep='\s+',
	encoding='gbk',usecols=(1,2))
# plt.scatter(data2['X'],data2['Y'])
# plt.show()

Python中做广义线性模型
通过散点图可以判断可能可以使用的模型有:线性回归,对数,指数和冥指数回归
2、分别回归结果如下:

# 线性回归
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit([[x] for x in data2['X']],list(data2['Y']))
print('线性回归模型准确度为:{0}'.format(lm.score(
	[[x] for x in data2['X']],list(data2['Y']))))
print ("线性模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm.predict([[x] for x in data2['X']]))))	

	
# 非线性-倒数
from sklearn.linear_model import LinearRegression
lm_1=LinearRegression()
lm_1.fit([[1/x] for x in data2['X']],list(data2['Y']))
print('非线性(倒数)回归模型准确度为:{0}'.format(lm_1.score(
	[[1/x] for x in data2['X']],list(data2['Y']))))
print ("非线性(倒数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_1.predict([[1/x] for x in data2['X']]))))
	
# 非线性-指数
from sklearn.linear_model import LinearRegression
import math
lm_2=LinearRegression()
lm_2.fit([[math.log(x)] for x in data2['X']],list(data2['Y']))
print('非线性(指数)回归模型准确度为:{0}'.format(lm_2.score(
	[[math.log(x)] for x in data2['X']],list(data2['Y']))))
print ("非线性(指数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_2.predict([[math.log(x)] for x in data2['X']]))))

# 非线性-幂函数
from sklearn.linear_model import LinearRegression
import math
lm_3=LinearRegression()
lm_3.fit([[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))
print('非线性(幂函数))回归模型准确度为:{0}'.format(lm_3.score(
	[[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))))
print ("非线性(幂函数))回归模型方差为:{0}".format(metrics.mean_squared_error(list(math.log(y) for y in data2['Y']),
	lm_3.predict([[math.log(x)] for x in data2['X']]))))	

Python中做广义线性模型
       综合考虑准确度和方差,指数模型最优
源码:

# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

import matplotlib.pyplot as plt
from sklearn import metrics
data2=pd.read_table(r'C:/Users/Administrator/Desktop/data2.txt',sep='\s+',
	encoding='gbk',usecols=(1,2))
# plt.scatter(data2['X'],data2['Y'])
# plt.show()

# 线性回归
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit([[x] for x in data2['X']],list(data2['Y']))
print('线性回归模型准确度为:{0}'.format(lm.score(
	[[x] for x in data2['X']],list(data2['Y']))))
print ("线性模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm.predict([[x] for x in data2['X']]))))	

	
# 非线性-倒数
from sklearn.linear_model import LinearRegression
lm_1=LinearRegression()
lm_1.fit([[1/x] for x in data2['X']],list(data2['Y']))
print('非线性(倒数)回归模型准确度为:{0}'.format(lm_1.score(
	[[1/x] for x in data2['X']],list(data2['Y']))))
print ("非线性(倒数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_1.predict([[1/x] for x in data2['X']]))))
	
# 非线性-指数
from sklearn.linear_model import LinearRegression
import math
lm_2=LinearRegression()
lm_2.fit([[math.log(x)] for x in data2['X']],list(data2['Y']))
print('非线性(指数)回归模型准确度为:{0}'.format(lm_2.score(
	[[math.log(x)] for x in data2['X']],list(data2['Y']))))
print ("非线性(指数)回归模型方差为:{0}".format(metrics.mean_squared_error(
	list(data2['Y']),
	lm_2.predict([[math.log(x)] for x in data2['X']]))))

# 非线性-幂函数
from sklearn.linear_model import LinearRegression
import math
lm_3=LinearRegression()
lm_3.fit([[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))
print('非线性(幂函数))回归模型准确度为:{0}'.format(lm_3.score(
	[[math.log(x)] for x in data2['X']],list(math.log(y) for y in data2['Y']))))
print ("非线性(幂函数))回归模型方差为:{0}".format(metrics.mean_squared_error(list(math.log(y) for y in data2['Y']),
	lm_3.predict([[math.log(x)] for x in data2['X']]))))	
喜欢 (0)
发表我的评论
取消评论
表情 贴图 加粗 删除线 居中 斜体 签到

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址