Tensorflow logarithmic probability regression of Titanic survival

Github address: github.com/yangjinghit…

import pandas as pd
import numpy as np
Copy the code

data = pd.read_csv('./tt/train.csv')
Copy the code

data.columns
Copy the code

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
Copy the code

data = data[['Survived'.'Pclass'.'Sex'.'Age'.'SibSp'.'Parch'.'Fare'.'Cabin'.'Embarked']]
Copy the code

data.head(3)
Copy the code

	Survived	Pclass	Sex	Age	SibSp	Fare	Cabin	Embarked
0	0	3	male	22.0	1	7.2500	NaN	S
1	1	1	female	38.0	1	71.2833	C85	C
2	1	3	female	26.0	0	7.9250	NaN	S

data['Age'] = data['Age'].fillna(data['Age'].mean())
Copy the code

data['Cabin'] = pd.factorize(data.Cabin)[0]
Copy the code

data.fillna(0, inplace = True)
Copy the code

data['Sex'] = [1 if x=='male' else 0 for x in data.Sex]
Copy the code

data['p1'] = np.array(data['Pclass'] = =1).astype(np.int32)
data['p2'] = np.array(data['Pclass'] = =2).astype(np.int32)
data['p3'] = np.array(data['Pclass'] = =3).astype(np.int32)
Copy the code

del data['Pclass']
Copy the code

data.Embarked.unique()
Copy the code

array(['S', 'C', 'Q', 0], dtype=object)
Copy the code

data['e1'] = np.array(data['Embarked'] = ='S').astype(np.int32)
data['e2'] = np.array(data['Embarked'] = ='C').astype(np.int32)
data['e3'] = np.array(data['Embarked'] = ='Q').astype(np.int32)
Copy the code

del data['Embarked']
Copy the code

data.values.dtype
Copy the code

dtype('float64')
Copy the code

data.columns
Copy the code

Index(['Survived', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'p1', 'p2',
       'p3', 'e1', 'e2', 'e3'],
      dtype='object')
Copy the code

data_train = data[['Sex'.'Age'.'SibSp'.'Parch'.'Fare'.'Cabin'.'p1'.'p2'.'p3'.'e1'.'e2'.'e3']]
Copy the code

data_target = data['Survived'].values.reshape(len(data),1)

Copy the code

np.shape(data_train), np.shape(data_target)
Copy the code

((891, 12), (891, 1))
Copy the code

import tensorflow as tf
Copy the code

/ anaconda3 / envs/py35 / lib/python3.5 importlib / _bootstrap py: 222: RuntimeWarning: Compiletime version 3.6 of the module 'tensorflow. Python. Framework. Fast_tensor_util' does not match the runtime version 3.5 Return (* args, f * * KWDS)/anaconda3 envs py35 / lib/python3.5 / site - packages/h5py/set py: 36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_convertersCopy the code

x = tf.placeholder("float", shape=[None.12])
y = tf.placeholder("float", shape=[None.1])
Copy the code

weight = tf.Variable(tf.random_normal([12.1]))
bias = tf.Variable(tf.random_normal([1]))
output = tf.matmul(x, weight) + bias
pred = tf.cast(tf.sigmoid(output)>0.5, tf.float32)
Copy the code

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = output))
Copy the code

train_step = tf.train.GradientDescentOptimizer(0.003).minimize(loss)
Copy the code

accuracy = tf.reduce_mean(tf.cast(tf.equal(pred,y), tf.float32))
Copy the code

data_test = pd.read_csv('./tt/test.csv')
data_test = data_test[['Pclass'.'Sex'.'Age'.'SibSp'.'Parch'.'Fare'.'Cabin'.'Embarked']]
data_test['Age'] = data_test['Age'].fillna(data_test['Age'].mean())
data_test['Cabin'] = pd.factorize(data_test.Cabin)[0]
data_test['Sex'] = [1 if x=='male' else 0 for x in data_test.Sex]
data_test.fillna(0, inplace = True)
data_test['p1'] = np.array(data_test['Pclass'] = =1).astype(np.int32)
data_test['p2'] = np.array(data_test['Pclass'] = =2).astype(np.int32)
data_test['p3'] = np.array(data_test['Pclass'] = =3).astype(np.int32)
data_test['e1'] = np.array(data_test['Embarked'] = ='S').astype(np.int32)
data_test['e2'] = np.array(data_test['Embarked'] = ='C').astype(np.int32)
data_test['e3'] = np.array(data_test['Embarked'] = ='Q').astype(np.int32)
del data_test['Pclass']
del data_test['Embarked']
Copy the code

test_label = pd.read_csv('./tt/gender_submission.csv')
test_label = np.reshape(test_label.Survived.values.astype(np.float32), (418.1))
Copy the code

sess = tf.Session()
sess.run(tf.global_variables_initializer())
loss_train = []
train_acc = []
test_acc = []
Copy the code

for i in range(5000) : index = np.random.permutation(len(data_target)) data_train = np.array(data_train)[index] data_target = np.array(data_target)[index]for n in range(len(data_target)//100 + 1):
        batch_xs = data_train[n*100:n*100 + 100]
        batch_ys = data_target[n*100:n*100 + 100]
        sess.run(train_step, feed_dict={x:batch_xs, y:batch_ys})
        if i%1000= =0:
            loss_temp = sess.run(loss, feed_dict={x:batch_xs, y:batch_ys})
            loss_train.append(loss_temp)
            train_acc_temp = sess.run(accuracy, feed_dict={x:batch_xs, y:batch_ys})
            train_acc.append(train_acc_temp)
            test_acc_temp = sess.run(accuracy, feed_dict={x:data_test, y:test_label})
            test_acc.append(test_acc_temp)
            print(loss_temp, train_acc_temp, test_acc_temp)
Copy the code

7.3604326 0.57 0.5717703
8.014492 0.44 0.45454547
7.1409583 0.31 0.4617225
5.1470814 0.36 0.43301436
6.3511767 0.45 0.4784689
5.792799 0.44 0.47368422
4.681352 0.4 0.44258374
5.842095 0.39 0.46411484
3.0389767 0.48351648 0.42105263
0.48493686 0.75 0.8779904
0.4163903 0.79 0.8732057
0.4867239 0.78 0.88755983
0.44354805 0.82 0.88755983
0.52813506 0.76 0.8755981
0.4696804 0.76 0.8851675
0.49193314 0.77 0.8779904
0.45960158 0.83 0.8755981
0.32401434 0.8681319 0.83732057
0.44543365 0.79 0.9114832
0.44734138 0.78 0.8971292
0.4601239 0.8 0.8971292
0.4851056 0.79 0.90430623
0.4117188 0.83 0.8995215
0.39182335 0.78 0.8827751
0.36042503 0.86 0.8923445
0.5273975 0.77 0.8827751
0.4674853 0.8021978 0.8899522
0.3866866 0.87 0.8684211
0.46905473 0.79 0.8947368
0.39205357 0.83 0.94736844
0.52698064 0.78 0.94736844
0.45487815 0.83 0.9569378
0.51008093 0.76 0.9521531
0.36122188 0.85 0.9569378
0.38313925 0.84 0.9186603
0.46178767 0.7582418 0.8779904
0.4290183 0.79 0.90909094
0.38088027 0.86 0.94258374
0.50604784 0.78 0.92105263
0.42591545 0.83 0.9138756
0.44238362 0.81 0.8923445
0.5280578 0.73 0.93779904
0.37598786 0.84 0.95454544
0.44755325 0.83 0.9354067
0.45485118 0.7912088 0.93779904
Copy the code

sess.run(weight)
Copy the code

Array ([[2.7579787], [0.02792009], [0.33448917], [0.16113752], [0.01580716], [0.01695831], [0.94773066]. [1.2085476], [0.1693252], [-0.52223164], [0.05860335], [-0.6202243]], DType = FLOAT32Copy the code

sess.run(bias)
Copy the code

Array ([1.7377106], dtype = float32)Copy the code

import matplotlib.pyplot as plt
Copy the code

plt.plot(loss_train, 'k-')
plt.title('train loss')
plt.show()
Copy the code

plt.plot(train_acc,'b-', label ='train_acc')
plt.plot(test_acc, 'r--', label = 'test_acc')
plt.title('train and test accuracy')
plt.legend()
plt.show()
Copy the code

Tensorflow logarithmic probability regression of Titanic survival

Related Posts

Cannot find declaration to go to

How to reduce memory footprint by 90%

Zhouyi Compass Deployment and Simulation (13) | August challenge