a<-1
b<-2
a+b
c<-3
a+b+c
4/b
2*c
var1<-c(10,20,30,40,50)
var1
var2<-c(1:5)
var2
var3<-seq(1,10,by=2)
var3
var4<-seq(1,10,by=3)
var4
v1<-c(1,2,3,4,5)
v2<-c(10,20,30,40,50)
v1+v2
str5<-c("Hello","World","is","good")
str5
score<-c(100,80,70,50)
mean(score)
min(score)
max(score)
data<-c("홍길동","서울시","010-1111-2222")
paste(data,collapse=",")
install.packages("ggplot2")
library(ggplot2)
x<-c("a","a","b","c","e","e","e","f")
qplot(x)
qplot(data=mpg,x=cty)
qplot(data=mpg,x=drv,y=hwy,geom="boxplot",colour=drv)
english<-c(100,90,80,50)
math<-c(70,90,80,100)
df_midterm<-data.frame(english,math)
df_midterm
mean(df_midterm$english)
mean(df_midterm$math)
df_midterm<-data.frame(english=c(90,80,60,70),math=c(50,60,100,20),class=c(1,1,2,2))
df_midterm
install.packages("readxl")
library(readxl)
df_frame<-read_excel("excel_exam.xlsx")
df_frame
df_frame$total<-(df_frame$math+df_frame$english+df_frame$science)
df_frame$avg<-(df_frame$total/3)
df_frame
save(df_frame,file="df_midterm.rda")
load("df_midterm.rda")
df_midterm
mean(df_midterm$english)
mean(df_frame$english)
mean(df_frame$math)
df_exam_novar<-read_excel("excel_exam_novar.xlsx",col_names=F)
df_exam_novar
write.csv(df_exam_novar,file="df_exam_novar.csv")
df_exam_sheet<-read_excel("excel_exam_sheet.xlsx",sheet=3)
df_exam_sheet
df_csv_exam<-read.csv("csv_exam.csv")
df_csv_exam
df_midterm<-data.frame(english=c(90,80,60,70),
math=c(50,60,100,20),
class=c(1,1,2,2))
df_midterm
write.csv(df_midterm,file="df_midterm.csv")
exam<-read.csv("csv_exam.csv")
exam
head(exam)
head(exam,10)
tail(exam)
tail(exam,10)
View(exam)
dim(exam)
str(exam)
summary(exam)
?mpg
mpg<-as.data.frame(ggplot2::mpg)
dim(mpg)
mpg
head(mpg)
tail(mpg)
View(mpg)
summary(mpg)
df_raw<-data.frame(var1=c(1,2,1),
var2=c(2,3,2))
df_raw
install.packages("dplyr")
library(dplyr)
df_new<-rename(df_raw,v2=var2)
df_new
df_copy<-df_new
df_copy
df<-data.frame(var1=c(10,20,30),
var2=c(100,200,300))
df$sum<-df$var1+df$var2
df
mpg$total<-(mpg$cty+mpg$hwy)/2
head(mpg)
mpg$test<-ifelse(mpg$total>=20,"pass","fail")
tail(mpg)
mpg$grade<-ifelse(mpg$total>=30,"A",
ifelse(mpg$total>=20,"B","C"))
head(mpg,20)
hist(mpg$total)
library(ggplot2)
qplot(data=mpg,x=total,colour=total)
table(mpg$grade)
mpg$grade2<-ifelse(mpg$total>=30,"A",
ifelse(mpg$total>=25,"B",
ifelse(mpg$total>=20,"C","D>")))
head(mpg)
library(dplyr)
exam<-read.csv("csv_exam.csv")
exam
exam %>% filter(class!=2)
exam %>% filter(math>50)
exam %>% filter(class==2 & math==100)
exam %>% filter(math>=90 | english>=90)
exam %>% filter(class==1|class==3|class==5)
exam %>% filter(class %in% c(1,3,5))
exam %>% filter(class!=2 & class!=4)
class1<-exam %>% filter(class==1)
class2<-exam %>% filter(class==2)
class1
class2
7%%2
result<-ifelse(7%%2==1,"홀수","짝수")
result
exam %>% select(class,math,english)
exam %>%
select(english) %>%
head(10)
install.packages("dplyr")
library(dplyr)
exam<-read.csv("csv_exam.csv")
exam
exam %>% arrange(math)
exam %>% arrange(class,desc(math))
exam %>% mutate(total=math+english+science) %>% head
exam2<-exam %>% mutate(total=math+english+science,
mean=(math+english+science)/3) %>%
head
exam2
exam %>% group_by(class) %>%
summarise(mean_math=mean(math),
sum_math=sum(math),
median_math=median(math),
count=n())
mpg %>% group_by(manufacturer,drv) %>%
summarise(mean_cty=mean(cty)) %>%
head(10)
mpg %>% group_by(manufacturer) %>%
filter(class=="suv") %>%
mutate(tot=(cty+hwy)/2) %>%
summarise(mean_tot=mean(tot)) %>%
arrange(desc(mean_tot))
test1 <- data.frame(id=c(1,2,3,4,5),
midterm=c(60,80,70,90,85))
test1
test2 <- data.frame(id=c(1,2,3,4,5),
final=c(70,83,65,95,80))
test2
total<- left_join(test1,test2, by="id")
total
name<-data.frame(class=c(1,2,3,4,5),
teacher=c("kim","lee","park","choi","jung"))
name
exam<-read.csv("csv_exam.csv")
exam
exam_new <- left_join(exam,name,by="class")
exam_new
group_a<-data.frame(id=c(1,2,3,4,5),
test=c(60,80,70,90,85))
group_a
group_b<-data.frame(id=c(6,7,8,9,10),
test=c(70,83,65,95,80))
group_b
group_all<-bind_rows(group_a,group_b)
group_all
group_c<-data.frame(id=c(11,12,13,14,15),
test=c(70,83,65,95,80))
group_c
group_d<-data.frame(id2=c(16,17,18,19,20),
test2=c(70,83,65,95,80))
group_d
group_all2<-bind_rows(group_a,group_b,group_c)
group_all2
group_all3<-bind_rows(group_a,group_b,group_c,group_d)
group_all3
df<-data.frame(sex=c("M","F",NA,"M","F"),
score=c(5,4,3,4,NA))
df
df_nomiss<-df%>%filter(!is.na(score)&!is.na(sex))
df_nomiss
m<-is.na(df)
m
exam<-read.csv("csv_exam.csv")
exam[c(3,8,15),"math"]<-NA
exam
exam %>% summarise(mean_math=mean(math,na.rm=T),
sum_math=sum(math,na.rm=T),
median_math=median(math,na.rm=T))
mean(exam$math,na.rm=T)
exam<-read.csv("csv_exam.csv")
exam[c(3,8,15),"math"]<-NA
exam
exam$math<-ifelse(is.na(exam$math),
mean(exam$math,na.rm=T),
exam$math)
exam
outlier<-data.frame(sex=c(1,2,1,3,2,1),
score=c(5,4,3,4,2,6))
outlier
outlier$sex<-ifelse(outlier$sex==3,NA,outlier$sex)
outlier
outlier$score<-ifelse(outlier$score>5,NA,outlier$score)
outlier
out<-outlier %>% filter(!is.na(sex)&!is.na(score)) %>%
group_by(sex) %>%
summarise(mean_score=mean(score))
out
boxplot(mpg$hwy)$stats
mpg$hwy<-ifelse(mpg$hwy<12|mpg$hwy>37,NA,mpg$hwy)
mpg %>% filter()
mpg %>% filter(is.na(mpg$hwy))
table(mpg$hwy)
mpg %>% group_by(drv) %>%
summarise(mean_hwy=mean(hwy,na.rm=T))
install.packages("ggplot2")
library(ggplot2)
mpg %>% filter(is.na(mpg$hwy))
??mpg
library(ggplot2)
ggplot(data=mpg,aes(x=displ,y=hwy))+geom_point()+xlim(3,6)
library(dplyr)
df_mpg<-mpg %>% filter(!is.na(mpg$hwy)) %>%
group_by(drv) %>%
summarise(mean_hwy=mean(hwy))
df_mpg
ggplot(data=df_mpg,aes(x=drv,y=mean_hwy))+geom_col()
ggplot(data=mpg,aes(x=hwy))+geom_bar()
mpg<-read.csv("mpg.csv")
ggplot(data=economics,aes(x=date,y=unemploy))+geom_line()
ggplot(data=mpg,aes(x=drv,y=hwy))+geom_boxplot()
?economics
install.packages("foreign")
library(foreign) #spss파일 처리
library(dplyr) #전처리 함수 사용 %>%
library(ggplot2) #그래프처리
library(readxl) #엑셀읽기
raw_welfare<-read.spss(file="Koweps_hpc10_2015_beta1.sav",to.data.frame = T)
welfare<-raw_welfare
summary(welfare)
head(welfare)
welfare<-rename(welfare,
sex=h10_g3,
birth=h10_g4,
marriage=h10_g10,
religion=h10_g11,
income=p1002_8aq1,
code_job=h10_eco9,
code_region=h10_reg7)
head(welfare)
table(welfare$sex)
welfare %>% group_by(sex) %>% summarise(count=n())
welfare$sex <- ifelse(welfare$sex ==9, NA, welfare$sex)
welfare$sex <- ifelse(welfare$sex==1,"male","female")
qplot(welfare$sex)
qplot(welfare$incom)
welfare$income<-ifelse(welfare$income %in% c(0,9999),NA,welfare$income)
table(is.na(welfare$income))
welfare %>% filter(is.na(income)) %>% summarise(count=n())
sex_income<-welfare %>% filter(!is.na(income)) %>% group_by(sex) %>%
summarise(mean_income=mean(income))
sex_income<-welfare %>% group_by(sex) %>%
summarise(mean_income=mean(income,na.rm = T))
sex_income
ggplot(data=sex_income,aes(x=sex,y=mean_income))+geom_col()
welfare$age<- 2019-welfare$birth + 1
qplot(welfare$age)
age_income<-welfare %>% filter(!is.na(income))%>%
group_by(age) %>%
summarise(mean_income=mean(income))
head(age_income)
ggplot(data=age_income,aes(x=age,y=mean_income))+geom_line()
welfare<-welfare %>% mutate(ageg=ifelse(age<30,"young",
ifelse(age<=59,"middle","old")))
qplot(welfare$ageg)
welfare<-welfare %>% mutate(ageg=ifelse(age<10,"child",
ifelse(age<20,"10",
ifelse(age<30,"20",
ifelse(age<40,"30",
ifelse(age<50,"40",
ifelse(age<60,"50",
ifelse(age<70,"60",
ifelse(age<80,"70",
ifelse(age<90,"80","older"))))))))))
qplot(welfare$ageg)+scale_x_discrete(limits=c("child","10","20","30","40","50","60","70","80","older"))
sex_income<-welfare %>% filter(!is.na(income)) %>%
group_by(ageg,sex) %>%
summarise(mean_income=mean(income))
sex_income
ggplot(data=sex_income,aes(x=ageg,y=mean_income,fill=sex))+geom_col(position = "dodge")+scale_x_discrete(limits=c("young","middle","old"))
sex_age<-welfare %>% filter(!is.na(income)) %>%
group_by(age,sex) %>%
summarise(mean_income=mean(income))
head(sex_age)
ggplot(data=sex_age,aes(x=age,y=mean_income,col=sex))+geom_line()
sex_ageg<-welfare %>% filter(!is.na(income)) %>%
group_by(ageg,sex) %>%
summarise(mean_income=mean(income))
sex_ageg
ggplot(data=sex_ageg,aes(x=ageg,y=mean_income,col=sex))+geom_col(position = "dodge")
library(readxl)
list_job<-read_excel("Koweps_Codebook.xlsx",col_names=T,sheet=2)
head(list_job)
welfare<-left_join(welfare,list_job,id="code_job")
head(welfare)
welfare %>% filter(!is.na(code_job)) %>%
select(code_job,job) %>%
head(10)
job_income<-welfare %>% filter(!is.na(job)&!is.na(income))%>%
group_by(job) %>% summarise(mean_income=mean(income))
head(job_income)
top10<-job_income %>% arrange(desc(mean_income)) %>%
head(10)
top10
ggplot(data=top10,aes(x=reorder(job,mean_income),y=mean_income))+geom_col()+coord_flip()
bottom10 <- job_income %>%
arrange(mean_income) %>%
head(10)
ggplot(data=bottom10,aes(x=reorder(job,-mean_income),y=mean_income))+geom_col()+coord_flip()+ylim(0,150)
job_male <- welfare %>%
filter(!is.na(job)&sex=="male") %>%
group_by(job) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
head(10)
job_male
job_female <- welfare %>%
filter(!is.na(job)&sex=="female") %>%
group_by(job) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
head(10)
job_female
job_gender <- welfare %>%
filter(!is.na(job)) %>%
group_by(job,sex) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
head(10)
job_gender
ggplot(data=job_gender,aes(x=reorder(job,n),y=n))+geom_col()+coord_flip()
install.packages("graphics")
library(graphics)
x<-c(9,15,20,6)
label<-c("영업1팀","영업2팀","영업3팀","영업4팀")
pie(x,labels=label,main="부서별 영업 실적")
install.packages("prlotrix")
library(plotrix)
pie3D(x, labels=label, explode=0.1, labelcex = 0.7, main="부서별 영업 실적")
library(dplyr)
m_cnt <- welfare %>%
filter(!is.na(marriage)) %>%
group_by(marriage) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
head(10)
m_cnt
pie3D(m_cnt$n, labels=m_cnt$marriage, explode=0.1, labelcex = 0.7, main="결혼형태집계")
welfare$religion<-ifelse(welfare$religion==1,"yes","no")
welfare$group_marriage<-ifelse(welfare$marriage==1,"marriage",
ifelse(welfare$marriage==3,"divorce",NA))
religion_marriage<-welfare %>% filter(!is.na(group_marriage)) %>%
group_by(religion,group_marriage) %>%
summarise(n=n()) %>%
mutate(tot_group=sum(n)) %>%
mutate(pct=round(n/tot_group*100,1))
religion_marriage
pie3D(religion_marriage$n, labels=paste(religion_marriage$religion,religion_marriage$group_marriage," "), explode=0.1, labelcex = 0.7, main="종교유무별 이혼률")