4회 R meetup은 동국대 통계학과 전산실에서 진행되었다. 약속대로 코드를 공유한다.
바로 공유를 했어야 되는데 개인적인 사정으로 인해 늦게 공유하게 되었는데 이점에 대해서 양해 부탁드린다.
먼저 ROAuth를 이용한 트위터 API 사용 권한 가져오는 예제이다.
키는 https://dev.twitter.com/apps/new 에서 등록하면 된다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | library(ROAuth) library(twitteR) cainfo <- system.file("CurlSSL", "cacert.pem", package = "RCurl") cred <- OAuthFactory$new(consumerKey="XXXXXXXXXXXXXX", consumerSecret="YYYYYYYYYYYYYYYYY", requestURL="https://api.twitter.com/oauth/request_token", accessURL="http://api.twitter.com/oauth/access_token", authURL="http://api.twitter.com/oauth/authorize") cred$handshake(cainfo=cainfo) #save(cred, file="twitteR_credentials") #load("twitteR_credentials") registerTwitterOAuth(cred) z <- getCurRateLimitInfo(cainfo=cainfo) z$getRemainingHits() |
이렇게 등록된 키를 이용해서 몇 가지 예제를 보여줬다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | # 팔로워 분포 ----------------------------------------------------------------- gogamza <- getUser("gogamza",cainfo=cainfo) gogamza.friends <- gogamza$getFriends(cainfo=cainfo) #gogamza.friendsID <- gogamza$getFriendIDs() gogamza.followers <- gogamza$getFollowers(cainfo=cainfo) fcnt <- c() idnames <- c() location <- c() ids <- c() friends <- c() statuscnt <- c() for(i in gogamza.followers){ fcnt <- append(fcnt, i$followersCount) idnames <- append(idnames, i$name) location <- append(location, i$location) ids <- append(ids, i$id) friends <- append(friends, i$friendsCount) statuscnt <- append(statuscnt, i$statusesCount) } gogamza.followers.df <- data.frame(id=ids, names=idnames,followers=fcnt, friends=friends, statusCount=statuscnt) cor(gogamza.followers.df[,c(3,4)]) library(ggplot2) #plot graph fol <- ggplot(gogamza.followers.df, aes(followers, friends)) + geom_point(aes(size=statusCount), alpha=I(0.8), colour="red") + geom_text(aes(label=names, size=followers + friends), hjust=1, vjust=-1, angle=25) + scale_size(range=c(3,7)) # 나를 팔로잉 하는 사람들의 팔로워 분포 ggplot(gogamza.followers.df) + geom_histogram(aes(log10(followers), fill="log of followers"), alpha=I(0.7)) + geom_histogram(aes(log10(friends), fill="log of friends"), alpha=I(0.7)) + scale_fill_hue("팔로워 그리고 친구") #전체 그래프 tiff(file="followers.tiff" ,width = 800, height = 600, units = "px",type="windows", family="Dotum", antialias="cleartype" ,compression="lzw") fol dev.off() gogamza.followers.df.medium <- subset(gogamza.followers.df, followers <= 20000) # 팔로워 2만명 이하 tiff(file="followers2.tiff" ,width = 800, height = 600, units = "px",type="windows", family="Dotum", antialias="cleartype" ,compression="lzw") fo2 <- fol %+% gogamza.followers.df.medium fo2 + geom_smooth() dev.off() gogamza.followers.df.small <- subset(gogamza.followers.df, followers <= 6000) # 팔로워 6천명 이하 tiff(file="followers3.tiff" ,width = 800, height = 600, units = "px",type="windows", family="Dotum", antialias="cleartype" ,compression="lzw") fo3 <- fo2 %+% gogamza.followers.df.small fo3 + geom_smooth() dev.off() # 팔로워 천명 이하 gogamza.followers.df.tiny <- subset(gogamza.followers.df, followers <= 1000) tiff(file="followers4.tiff" ,width = 800, height = 600, units = "px",type="windows", family="Dotum", antialias="cleartype" ,compression="lzw") fo4 <- fo3 %+% gogamza.followers.df.tiny fo4 + geom_smooth() dev.off() |
두번째 SNA 예제
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | library(igraph) gogamza <- getUser("gogamza",cainfo=cainfo) gogamza.followers <- gogamza$getFollowers(cainfo=cainfo) followerscnt <- sapply(gogamza.followers, function(x) {x$followersCount}) followers <- sapply(gogamza.followers, function(x) {x$name}) #팔로워가 많은 순으로 정렬해서 top 20명만 추출 followers.df <- data.frame(followers, followerscnt) top_followers <- order(followers.df, decreasing=T)[1:20] followermatrix <- as.matrix(data.frame(followers=followers[top_followers], me="gogamza")) # secondfollowers <- data.frame() # # for(i in gogamza.followers[top_followers]){ # foll <- i$getFollowers(cainfo=cainfo) # followerscnt <- sapply(foll, function(x) {x$followersCount}) # followers <- sapply(foll, function(x) {x$name}) # idx <- order(data.frame(followers, followerscnt), decreasing=T)[1:5] # secondfollowers <- rbind(secondfollowers,data.frame(sapply(foll[idx], name), i$name)) # } sna <- graph.edgelist(followermatrix, directed=F) plot(sna,vertex.label=V(sna)$name, layout=layout.random, vertex.label.cex=1, vertex.label.family="Verdana") |
마지막 예제는 최근 300개의 트윗을 가져와 명사를 추출한 뒤 이를 워드클라우드로 만드는 예제이다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | library(tm) library(KoNLP) library(wordcloud) gogamza <- userTimeline(user="gogamza", n=300, cainfo=cainfo) gogamzatw <- c() for(i in 1:length(gogamza)){ gogamzatw <- append(gogamzatw, gogamza[[i]]$text) } gogamzatw <- gsub("[[:space:]]"," ", gogamzatw) gogamzaNoun <- sapply(gogamzatw, extractNoun,USE.NAMES=F) gogamzaNoun <- unlist(gogamzaNoun, use.name=F) gogamzaNoun <- gogamzaNoun[-which(gogamzaNoun %in% stopwords("english"))] gogamzaNoun <- Filter(function(x){nchar(x)>=2}, gogamzaNoun) gogamzaNoun <- gsub("[[:punct:]]", "", gogamzaNoun) #gogamzaNoun <- Filter(function(x){nchar(x)>=2}, gogamzaNoun) wordcount <- table(gogamzaNoun) pal <- brewer.pal(8,"Dark2") wordcloud(names(wordcount),freq=wordcount,scale=c(4,0.3),min.freq=10, random.order=T,rot.per=.1,colors=pal) |
4회 R meetup 코드 by from __future__ import dream is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.