Created
May 17, 2016 23:42
-
-
Save marekborowiec/bc5e6190bf9c9d19c9d86e6403521961 to your computer and use it in GitHub Desktop.
R script using ape to get data on clade monophyly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library("ape") | |
| library("data.table") | |
| ### SET WORKING DIRECTORY ### | |
| setwd("/home/mlb/Dropbox/Misof_et_al_Science/Supplementary_Archive_2/aa_final_alignments/trees") | |
| ### READ IN TREE FILES ### | |
| tree_files <- dir(pattern="*bipartitions.EOG*") | |
| ### DEFINE CLADES ### | |
| bees <- c("AMELI","Bombus_terrestris","Exoneura_robusta") | |
| Cole <- c("Aleochara_curtula","Dendroctonus_ponderosae","Meloe_violaceus", | |
| "TCAST","Lepicerus_sp","Priacma_serrata","Gyrinus_marinus", | |
| "Carabus_granulatus") | |
| Stre <- c("Mengenilla","Stylops_melittae") | |
| Neur <- c("Conwentzia_psociformis","Osmylus_fulvicephalus","Dichochrysa_prasina", | |
| "Euroleon_nostras") | |
| Mega <- c("Corydalus_cornutus","Sialis_lutaria") | |
| Raph <- c("Inocellia_crassicornis","Xanthostigma_xanthostigma") | |
| Dipt <- c("AGAMB","Aede_aegy","Phlebotomus_papatasi","Trichocera_fuscata", | |
| "Tipula_maxima","Bibio_marci","Bombylius_major","DMELA","Lipara_lucens", | |
| "Rhagoletis_pomonella","Glossina_morsitans","Sarcophaga_crassipalpis", | |
| "Triarthria_setipennis") | |
| Meco <- c("Boreus_hyemalis","Nannochorista_sp","Bittacus_pilicornis", | |
| "Panorpa_vulgaris") | |
| Siph <- c("Ceratophyllus_gallinae","Archaeopsylla_erinacei","Ctenocephalides_felis") | |
| Hyme <- c("Tenthredo_koehleri","Orussus_abietinus","Cotesia_vestalis", | |
| "Leptopilina_clavipes","NVITR","Chrysis_viridula","AECHI", | |
| "Harp_salt",bees) | |
| Lepi <- c("Micropterix_calthella","Eriocrania_subpurpurella","Triodia_sylvina", | |
| "Nemophora_deegerella","Yponomeuta_evonymella","Zygaena_fausta", | |
| "Polyommatus_icarus","Parides_arcas","BMORI","Manduca_sexta") | |
| Tric <- c("Rhyacophila_fasciata","Platycentropus_radiatus","Hydroptilidae_sp", | |
| "Philopotamus_ludificatus","Cheumatopsyche_sp") | |
| Hemi <- c("Trialeurodes_vaporariorum","Bemisia_tabaci","Acanthocasuarina_muellerianae", | |
| "Planococcus_citri","Essigella_californica","APISU","Aphis_gossypii", | |
| "Acanthosoma_haemorrhoidale","Notostira_elongata","Ranatra_linearis", | |
| "Velia_caprai","Xenophysella_greensladeae","Nilaparvata_lugens", | |
| "Cercopis_vulnerata","Okanagana_villosa") | |
| Psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","Menopon_gallinae", | |
| "PHUMA") | |
| Thys <- c("Gynaikothrips_ficorum","Frankliniella_cephalica","Thrips_palmi") | |
| Isop <- c("Mastotermes_darwiniensis","Prorhinotermes_simplex","ZNEVA") | |
| Blat <- c("Blaberus_atropus","Periplaneta_americana","Cryptocercus_sp") | |
| BlIs <- c(Isop,Blat) | |
| Mant <- c("Metallyticus_splendidus","Empusa_pennata","Mantis_religiosa") | |
| Phas <- c("Timema_cristinae","Peruphasma_schultei","Aretaon_asperrimus") | |
| Embi <- c("Haploembia_palaui","Aposthonia_japonica") | |
| Gryl <- c("Grylloblatta_bifratrilecta","Galloisiana_yuasai") | |
| Mntp <- c("Tanzaniophasma_sp") | |
| Orth <- c("Gryllotalpa_sp","Ceuthophilus_sp","Tetrix_subulata", | |
| "Prosarthria_teretrirostris","Stenobothrus_lineatus") | |
| Plec <- c("Leuctra_sp","Perla_marginata","Cosmioperla_kuna") | |
| Zora <- c("Zorotypus_caudelli") | |
| Ephe <- c("Ephemera_danica","Isonychia_bicolor","Eurylophella_sp", | |
| "Baetis_pumilus") | |
| Odon <- c("Calopteryx_splendens","Cordulegaster_boltoni","Epiophlebia_superstes") | |
| Zyge <- c("Tricholepidion_gertschi","Atelura_formicaria","Thermobia_domestica") | |
| Arch <- c("Meinertellus_cundinamarcensis","Machilis_hrabei") | |
| Dipl <- c("Campodea_augens","Occasjapyx_japonicus") | |
| Coll <- c("Sminthurus_vir_nig","Tetrodontophora_bielanensis","Anurida_maritima", | |
| "Pogonognathellus_lon_fla","Folsomia_candida") | |
| Prot <- c("Acerentomon_sp") | |
| #eury <- c("Eurylophella_sp","Ephemera_danica") | |
| #mosq <- c("Aede_aegy","AGAMB") | |
| #psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","PHUMA","Menopon_gallinae") | |
| CoSt <- c(Cole,Stre) | |
| Neoi <- c(Neur,Mega,Raph) | |
| CoNe <- c(CoSt,Neoi) | |
| Amph <- c(Lepi,Tric) | |
| Antl <- c(Dipt,Meco,Siph) | |
| Holo <- c(CoNe,Amph,Antl,Hyme) | |
| Neop <- c(Holo,Hemi,Psoc,Thys,Blis,Mant,Phas,Embi,Gryl,Mntp,Orth,Plec,Zora) | |
| Pter <- c(Neop,Ephe,Odon) | |
| Hexa <- c(Pter,Arch,Zyge,Dipl,Coll,Prot) | |
| Clades <- list(Cole,Dipt,Lepi,Hyme,Hemi,Amph,Neur,Mega, | |
| Raph,CoSt,Neoi,CoNe,Antl,Holo,Psoc,Thys, | |
| BlIs,Mant,Phas,Embi,Gryl,Orth,Plec,Ephe, | |
| Odon,Zyge,Arch,Dipl,Coll,Neop,Pter,Hexa) | |
| header <- c("OG","Coleoptera_monophyly","Diptera_monophyly", | |
| "Lepidoptera_monophyly","Hymenoptera_monophyly", | |
| "Hemiptera_monophyly","Amphiesmenoptera_monophyly", | |
| "Neuroptera_monophyly","Megaloptera_monophyly", | |
| "Raphidioptera_monophyly","Coleoptera+Strepsiptera_monophyly", | |
| "Neuropterida_monophyly","Coleoptera+Neuropterida_monophyly", | |
| "Antliophora_monophyly","Holometabola_monophyly", | |
| "Psocodea_monophyly","Thysanoptera_monophyly", | |
| "Blattodea+Isoptera_monophyly","Mantodea_monophyly", | |
| "Phasmatodea_monophyly","Embioptera_monophyly", | |
| "Grylloblattodea_monophyly","Orthoptera_monophyly", | |
| "Plecoptera_monophyly","Ephemeroptera_monophyly", | |
| "Odonata_monophyly","Zygentoma_monophyly", | |
| "Archaeognatha_monophyly","Diplura_monophyly", | |
| "Collembola_monophyly","Neoptera_monophyly", | |
| "Pterygota_monophyly","Hexapoda_monophyly") | |
| ### DEFINE ROOT TREES FUNCTION ### | |
| rootNonHex <- function(tree) { | |
| possible_outgroups <- c("ISCAP","Symphylella_vulgaris","Glomeris_pustulata","Cypridininae_sp","Sarsinebalia_urgorii","Litopenaeus_vannamei","Celuca_puligator","Lepeophtheirus_salmonis","DPULE","Speleonectes_tulumensis") | |
| for (outgroup in possible_outgroups) { | |
| taxa <- tree$tip.label | |
| if (outgroup %in% taxa) { | |
| tree <- root(phy=tree,outgroup=outgroup,resolve.root=T) | |
| break | |
| } | |
| } | |
| return(tree) | |
| } | |
| ### DEFINE MONOPHYLY FUNCTION ### | |
| checkMono <- function(tree,clade){ | |
| taxa <- tree$tip.label | |
| for (taxon in clade) { | |
| if (!(taxon %in% taxa)) { | |
| clade <- clade[!clade==taxon] | |
| } | |
| } | |
| len <- length(clade) | |
| if (len <= 1) { | |
| monophyly <- NA | |
| } else { | |
| monophyly <- is.monophyletic(tree,tips=clade) | |
| } | |
| return(monophyly) | |
| } | |
| ### DEFINE A FUNCTION TO CHECK FOR MONOPHYLY OF ALL CLADES ### | |
| getMonophylyData <- function(tree_file,clades) { | |
| tr_name <- sub("RAxML_bipartitions.(EOG[0-9A-Z]+).linsi.aa.fas-out", | |
| "\\1", perl=TRUE, x=tree_file) | |
| tr <- read.tree(tree_file) | |
| tr <- rootNonHex(tr) | |
| #plot(tr) | |
| #title(tr_name) | |
| data <- data.frame(tr_name,lapply(clades,checkMono,tree=tr)) | |
| return(data) | |
| } | |
| ### LOOP OVER ALL TREES ### | |
| ptm <- proc.time() | |
| m <- lapply(tree_files,getMonophylyData,clades=Clades) | |
| proc.time() - ptm | |
| d <- rbindlist(m) | |
| z <- setnames(d,header) | |
| write.csv(z,file="misof_monophyly.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment