#20190624 #D'Souza, Alaric #example code for cutoff and spatiotemporal calcuations in R #start of cutoff for loop for(cutoff in cutoffList[[snpvar]]){ print(cutoff) #make empty edgelist character vector allverts<-character() #make empty uniqueCliques list uniqueCliques<-list() #make edgelist from distance table based on cutoff edgelist<-df[df$distance<=cutoff,c("sample1","sample2","distance")] #invert edgelist distance to be consistent with larger weights meaning stronger connection edgelist$distance <- 1-(edgelist$distance/max(edgelist$distance)) #rename columns in edgelist colnames(edgelist)<-c("from","to","weight") #see graphic for visual representation of code in this loop while(dim(edgelist)[1]!=0){ #make nodelist from the edgelist samples that met the snp cutoff nodelist<-mapping[mapping$Sample%in%sort(unique(c(edgelist$from,edgelist$to))),] #rename nodelist columns colnames(nodelist)<-c("id",colnames(mapping)[2:ncol(mapping)]) #build network from edgelist and nodelist net<-graph_from_data_frame(d=edgelist,vertices=nodelist,directed=F) #find maximal cliques in edgelist cutoff cutOffCliques<-maximal.cliques(net,min=2) #get clique with highest minimum weight currClique<-cutOffCliques[order(sapply(cutOffCliques,function(mysub){min(E(induced_subgraph(net,mysub))$weight)}),decreasing=T)[1]] #set first value in uniqueCliques to the clique with the highest minimum weight uniqueCliques<-append(uniqueCliques,list(currClique[[1]]$name)) #set allverts to the first clique that will be added based on minimum weights allverts<-c(allverts,currClique[[1]]$name) #remove used nodes from edgelist edgelist<-edgelist[!(edgelist$from%in%allverts | edgelist$to%in%allverts),] } #name cliques names(uniqueCliques)<-paste0("clique_",1:length(uniqueCliques)) write.table(ldply(uniqueCliques,.fun=as.matrix,.id="clique"), file=paste0(currentOut,curspec,"_cutoff_",as.character(cutoff),"_cliquelist.txt"), sep="\t", quote=F, row.names=F, col.names=c("cliqueID","sample") ) #get combinations of cliques and make into data frame cliqueDF<-ldply(lapply(uniqueCliques,function(clq){data.frame(t(combn(clq,2,simplify=T)),stringsAsFactors=F)})) #rename columns in cliqueDF colnames(cliqueDF)<-c("cliques","samp1","samp2") #make new column for presence absence of logical cliqueDF$value<-TRUE #cast df to have clique membership as columns wideCliqueDF<-dcast(formula=samp1+samp2~cliques,data=cliqueDF,fill=FALSE,value.var="value") #get combined location for sample pairs wideCliqueDF$combLocation<-combineStrings(data.frame(samp1=mapping$surfaceCode[match(wideCliqueDF$samp1,mapping$Sample)], samp2=mapping$surfaceCode[match(wideCliqueDF$samp2,mapping$Sample)], stringsAsFactors=F)) #merge cliquedata with location distance table distCalcDF<-merge(df[c("combLocation","TimeDist","SpaceDist")],wideCliqueDF,by="combLocation",all.x=T) #set notclonal samples distCalcDF[c("samp1","samp2")][is.na(distCalcDF[c("samp1","samp2")])]<-"notclone" #set notclonal sample cliques to false distCalcDF[is.na(distCalcDF)]<-FALSE #write out data frame for #number of samples covered in cliques #observed time distance for current cliques #observed space distance for current cliques write.table( data.frame(numCliques=length(names(uniqueCliques)), samplesCovered=length(allverts), observedTimeDist=sum(distCalcDF$TimeDist[apply(distCalcDF[names(uniqueCliques)],MARGIN=1,any)]), observedSpaceDist=sum(distCalcDF$SpaceDist[apply(distCalcDF[names(uniqueCliques)],MARGIN=1,any)])), file=paste0(currentOut,curspec,"_cutoff_",as.character(cutoff),"_observed_time_space_dist.txt"), sep="\t", quote=F, row.names=F, col.names=T ) #write out data frame for #expected time distance for current cliques #expected space distance for current cliques write.table( data.frame( expectedTimeDist=sapply(1:10000,function(x){sum(sample(distCalcDF$TimeDist)[apply(distCalcDF[names(uniqueCliques)],MARGIN=1,any)])}), expectedSpaceDist=sapply(1:10000,function(x){sum(sample(distCalcDF$SpaceDist)[apply(distCalcDF[names(uniqueCliques)],MARGIN=1,any)])})), file=paste0(currentOut,curspec,"_cutoff_",as.character(cutoff),"_expected_time_space_dist.txt"), sep="\t", quote=F, row.names=F, col.names=T ) #end of cutoff for loop }