Untitled

카이제곱 (독립성) 검정 3+1가지 방법

  • 범주-범주 교차표를 이용한 연관성 확인
  • COPD데이터로 sex별 DM(당뇨)의 연관성 확인
# 데이터 준비
df1 <- read.csv("week4/4주차_코드/COPD_Lung_Cancer.csv", header=T)
head(df1)
##   PERSON_ID SEX DTH CTRB_PT_TYPE_CD   BMI BP_LWST BLDS TOT_CHOLE  HMG
## 1  10000065   1   0               8 25.73      90  112       177 12.2
## 2  10000183   1   0               9 22.77     100  105       177 15.7
## 3  10000269   1   0               8 24.54      70   76       156 13.1
## 4  10000471   1   0               1 21.63     100   70       228 14.0
## 5  10000788   2   0               7 25.65      54  113       199 12.1
## 6  10001096   2   0               3 19.84      60  137       182 12.0
##   FMLY_CANCER_PATIEN_YN SMK_STAT_TYPE DRNK_HABIT EXERCI_FREQ lungC copd
## 1                     1             1          1           1     0    0
## 2                     1             1          5           1     0    1
## 3                     1             1          1           1     1    1
## 4                    NA             1          1           1     0    1
## 5                     1             1          1           1     0    0
## 6                     1             1          1           1     0    0
##   Asthma DM Tub before_op_score after_op_score
## 1      0  1   0              44             52
## 2      1  1   0              44             53
## 3      0  1   1              37             60
## 4      0  1   0              50             44
## 5      1  1   0              31             43
## 6      0  0   0              34             49
# 방법1 : table( 칼럼인덱싱, 칼럼인덱싱 ) 을 chisq.test()에
chisq.test( table( df1$SEX, df1$DM )) # 0.8943
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(df1$SEX, df1$DM)
## X-squared = 0.017658, df = 1, p-value = 0.8943
# 방법2 : table없이 바로
chisq.test( df1$SEX, df1$DM ) # 0.8943
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  df1$SEX and df1$DM
## X-squared = 0.017658, df = 1, p-value = 0.8943
# 방법3 : xtabs( ~ 칼럼1+칼럼2, data= ) 를 summary()
x <- xtabs( ~ SEX + DM , data = df1 )
summary(x) #0.8412
## Call: xtabs(formula = ~SEX + DM, data = df1)
## Number of cases in table: 944 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 0.04013, df = 1, p-value = 0.8412
# 방법4 : mytable() 칼럼순서 상관x
library(moonBook) 
mytable( DM ~ SEX , data= df1) # 0.894
## 
##    Descriptive Statistics by 'DM'  
## ____________________________________ 
##             0           1        p  
##          (N=375)     (N=569)  
## ------------------------------------ 
##  SEX                           0.894
##    - 1 158 (42.1%) 236 (41.5%)      
##    - 2 217 (57.9%) 333 (58.5%)      
## ------------------------------------

비모수검정 2집단(mann-whitney, wilcoxon rank-sum) 3집단(kruskal wallis)

  • COPD 중 100개 행만 뽑아 정규분포를 안이루도록 만든다음
  • sex(2집단)별 BMI 평균차이 검정
  • SMK_STAT_TYPE(3집단)별 BMI 평균차이 검정
# 데이터 준비
df2 <- df1[1:100,]
head(df2)
##   PERSON_ID SEX DTH CTRB_PT_TYPE_CD   BMI BP_LWST BLDS TOT_CHOLE  HMG
## 1  10000065   1   0               8 25.73      90  112       177 12.2
## 2  10000183   1   0               9 22.77     100  105       177 15.7
## 3  10000269   1   0               8 24.54      70   76       156 13.1
## 4  10000471   1   0               1 21.63     100   70       228 14.0
## 5  10000788   2   0               7 25.65      54  113       199 12.1
## 6  10001096   2   0               3 19.84      60  137       182 12.0
##   FMLY_CANCER_PATIEN_YN SMK_STAT_TYPE DRNK_HABIT EXERCI_FREQ lungC copd
## 1                     1             1          1           1     0    0
## 2                     1             1          5           1     0    1
## 3                     1             1          1           1     1    1
## 4                    NA             1          1           1     0    1
## 5                     1             1          1           1     0    0
## 6                     1             1          1           1     0    0
##   Asthma DM Tub before_op_score after_op_score
## 1      0  1   0              44             52
## 2      1  1   0              44             53
## 3      0  1   1              37             60
## 4      0  1   0              50             44
## 5      1  1   0              31             43
## 6      0  0   0              34             49
# mann-whitney = wilcoxon rank sum test
# cf) wilcoxon signed rank -> signed = 절대값 = 차이를 이용 = 대응표본t-test의 비모수
wilcox.test( BMI ~ SEX, data = df2) # 0.6791
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  BMI by SEX
## W = 1189.5, p-value = 0.6791
## alternative hypothesis: true location shift is not equal to 0
wilcox.test( BMI ~ SEX, data = df2, exact = F) # 만약 동률이 생길 경우, exact = F 옵션줄 것
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  BMI by SEX
## W = 1189.5, p-value = 0.6791
## alternative hypothesis: true location shift is not equal to 0
# kruskal - wallis test 
kruskal.test( BMI ~ SMK_STAT_TYPE, data = df2) #0.1803
## 
##  Kruskal-Wallis rank sum test
## 
## data:  BMI by SMK_STAT_TYPE
## Kruskal-Wallis chi-squared = 3.4263, df = 2, p-value = 0.1803

+ Recent posts