#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2017-2024 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

HQ = "Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S, and 5S rRNA genes and at least 18 tRNAs"
MQ = "Many fragments with little to no review of assembly other than reporting of standard assembly statistics"

METAGENOMES = [
    "activated carbon metagenome",
    "activated sludge metagenome",
    "aerosol metagenome",
    "air metagenome",
    "algae metagenome",
    "alkali sediment metagenome",
    "amphibian metagenome",
    "anaerobic digester metagenome",
    "anchialine metagenome",
    "annelid metagenome",
    "ant fungus garden metagenome",
    "ant metagenome",
    "aquaculture metagenome",
    "aquatic eukaryotic metagenome",
    "aquatic metagenome",
    "aquatic viral metagenome",
    "aquifer metagenome",
    "ballast water metagenome",
    "bat gut metagenome",
    "bat metagenome",
    "beach sand metagenome",
    "beetle metagenome",
    "bentonite metagenome",
    "bioanode metagenome",
    "biocathode metagenome",
    "biofilm metagenome",
    "biofilter metagenome",
    "biofloc metagenome",
    "biogas fermenter metagenome",
    "bioleaching metagenome",
    "bioreactor metagenome",
    "bioreactor sludge metagenome",
    "bioretention column metagenome",
    "biosolids metagenome",
    "bird metagenome",
    "blood metagenome",
    "bog metagenome",
    "book metagenome",
    "bovine gut metagenome",
    "bovine metagenome",
    "brine metagenome",
    "canine metagenome",
    "cave metagenome",
    "cetacean metagenome",
    "chemical production metagenome",
    "chicken gut metagenome",
    "ciliate metagenome",
    "clay metagenome",
    "clinical metagenome",
    "cloud metagenome",
    "coal metagenome",
    "cold seep metagenome",
    "cold spring metagenome",
    "compost metagenome",
    "concrete metagenome",
    "coral metagenome",
    "coral reef metagenome",
    "cow dung metagenome",
    "crab metagenome",
    "crude oil metagenome",
    "Crustacea gut metagenome",
    "crustacean metagenome",
    "ctenophore metagenome",
    "decomposition metagenome",
    "desalination cell metagenome",
    "dietary supplements metagenome",
    "dinoflagellate metagenome",
    "drinking water metagenome",
    "dust metagenome",
    "ear metagenome",
    "echinoderm metagenome",
    "egg metagenome",
    "electrolysis cell metagenome",
    "endophyte metagenome",
    "epibiont metagenome",
    "estuary metagenome",
    "eukaryotic metagenome",
    "eukaryotic plankton metagenome",
    "eye metagenome",
    "factory metagenome",
    "feces metagenome",
    "feline metagenome",
    "fermentation metagenome",
    "fertilizer metagenome",
    "fish gut metagenome",
    "fishing equipment metagenome",
    "fish metagenome",
    "floral nectar metagenome",
    "flotsam metagenome",
    "flower metagenome",
    "food contamination metagenome",
    "food fermentation metagenome",
    "food metagenome",
    "food production metagenome",
    "fossil metagenome",
    "freshwater metagenome",
    "freshwater sediment metagenome",
    "frog metagenome",
    "fuel tank metagenome",
    "fungus metagenome",
    "gas well metagenome",
    "gill metagenome",
    "glacier lake metagenome",
    "glacier metagenome",
    "gonad metagenome",
    "grain metagenome",
    "granuloma metagenome",
    "groundwater metagenome",
    "gut metagenome",
    "halite metagenome",
    "herbal medicine metagenome",
    "honeybee metagenome",
    "honey metagenome",
    "horse metagenome",
    "hospital metagenome",
    "hot springs metagenome",
    "human bile metagenome",
    "human blood metagenome",
    "human brain metagenome",
    "human eye metagenome",
    "human feces metagenome",
    "human gut metagenome",
    "human hair metagenome",
    "human lung metagenome",
    "human metagenome",
    "human milk metagenome",
    "human nasopharyngeal metagenome",
    "human oral metagenome",
    "human reproductive system metagenome",
    "human saliva metagenome",
    "human semen metagenome",
    "human skeleton metagenome",
    "human skin metagenome",
    "human sputum metagenome",
    "human tracheal metagenome",
    "human urinary tract metagenome",
    "human vaginal metagenome",
    "human viral metagenome",
    "HVAC metagenome",
    "hydrocarbon metagenome",
    "hydrothermal vent metagenome",
    "hydrozoan metagenome",
    "hypersaline lake metagenome",
    "hyphosphere metagenome",
    "hypolithon metagenome",
    "ice metagenome",
    "indoor metagenome",
    "industrial waste metagenome",
    "insect gut metagenome",
    "insect metagenome",
    "insect nest metagenome",
    "internal organ metagenome",
    "interstitial water metagenome",
    "invertebrate gut metagenome",
    "invertebrate metagenome",
    "jellyfish metagenome",
    "karst metagenome",
    "koala metagenome",
    "lagoon metagenome",
    "lake water metagenome",
    "landfill metagenome",
    "leaf litter metagenome",
    "leaf metagenome",
    "lichen crust metagenome",
    "lichen metagenome",
    "liver metagenome",
    "lung metagenome",
    "macroalgae metagenome",
    "mangrove metagenome",
    "manure metagenome",
    "marine metagenome",
    "marine plankton metagenome",
    "marine sediment metagenome",
    "marsh metagenome",
    "marsupial metagenome",
    "medical device metagenome",
    "metagenome",
    "microbial eukaryotic metagenome",
    "microbial fuel cell metagenome",
    "microbial mat metagenome",
    "microeukaryotic metagenome",
    "milk metagenome",
    "mine drainage metagenome",
    "mine metagenome",
    "mine tailings metagenome",
    "mite metagenome",
    "mixed culture metagenome",
    "mollusc metagenome",
    "money metagenome",
    "moonmilk metagenome",
    "mosquito metagenome",
    "moss metagenome",
    "mouse gut metagenome",
    "mouse metagenome",
    "mouse skin metagenome",
    "mud metagenome",
    "museum specimen metagenome",
    "musk metagenome",
    "nematode metagenome",
    "neuston metagenome",
    "nutrient bag metagenome",
    "oasis metagenome",
    "oil field metagenome",
    "oil metagenome",
    "oil production facility metagenome",
    "oil sands metagenome",
    "oral metagenome",
    "oral-nasopharyngeal metagenome",
    "oral viral metagenome",
    "outdoor metagenome",
    "ovine metagenome",
    "oyster metagenome",
    "painting metagenome",
    "paper pulp metagenome",
    "parasite metagenome",
    "parchment metagenome",
    "peat metagenome",
    "periphyton metagenome",
    "permafrost metagenome",
    "photosynthetic picoeukaryotic metagenome",
    "phycosphere metagenome",
    "phyllosphere metagenome",
    "phytotelma metagenome",
    "pig gut metagenome",
    "pig metagenome",
    "pipeline metagenome",
    "pitcher plant inquiline metagenome",
    "placenta metagenome",
    "plant metagenome",
    "plastic metagenome",
    "plastisphere metagenome",
    "pollen metagenome",
    "pond metagenome",
    "poultry litter metagenome",
    "power plant metagenome",
    "primate metagenome",
    "probiotic metagenome",
    "protist metagenome",
    "psyllid metagenome",
    "rat gut metagenome",
    "rat metagenome",
    "reproductive system metagenome",
    "respiratory tract metagenome",
    "retting metagenome",
    "rhizoplane metagenome",
    "rhizosphere metagenome",
    "rice paddy metagenome",
    "riverine metagenome",
    "rock metagenome",
    "rock porewater metagenome",
    "rodent metagenome",
    "root associated fungus metagenome",
    "root metagenome",
    "runoff metagenome",
    "saline spring metagenome",
    "saltern metagenome",
    "salt lake metagenome",
    "salt marsh metagenome",
    "salt mine metagenome",
    "salt pan metagenome",
    "sand metagenome",
    "scorpion gut metagenome",
    "sea anemone metagenome",
    "seagrass metagenome",
    "sea squirt metagenome",
    "sea urchin metagenome",
    "seawater metagenome",
    "sediment metagenome",
    "seed metagenome",
    "semen metagenome",
    "shale gas metagenome",
    "sheep gut metagenome",
    "sheep metagenome",
    "shoot metagenome",
    "shrew metagenome",
    "shrimp gut metagenome",
    "silage metagenome",
    "skin metagenome",
    "slag metagenome",
    "sludge metagenome",
    "snake metagenome",
    "snow metagenome",
    "soda lake metagenome",
    "soda lime metagenome",
    "soil crust metagenome",
    "soil metagenome",
    "solid waste metagenome",
    "spider metagenome",
    "sponge metagenome",
    "starfish metagenome",
    "steel metagenome",
    "stomach metagenome",
    "stromatolite metagenome",
    "subsurface metagenome",
    "surface metagenome",
    "symbiont metagenome",
    "synthetic metagenome",
    "tannin metagenome",
    "tar pit metagenome",
    "termitarium metagenome",
    "termite fungus garden metagenome",
    "termite gut metagenome",
    "termite metagenome",
    "terrestrial metagenome",
    "tick metagenome",
    "tidal flat metagenome",
    "tin mine metagenome",
    "tobacco metagenome",
    "tomb wall metagenome",
    "tree metagenome",
    "upper respiratory tract metagenome",
    "urban metagenome",
    "urinary tract metagenome",
    "urine metagenome",
    "urogenital metagenome",
    "vaginal metagenome",
    "viral metagenome",
    "volcano metagenome",
    "wallaby gut metagenome",
    "wasp metagenome",
    "wastewater metagenome",
    "wetland metagenome",
    "whale fall metagenome",
    "whole organism metagenome",
    "wine metagenome",
    "Winogradsky column metagenome",
    "wood decay metagenome",
    "zebrafish metagenome",
]

GEOGRAPHIC_LOCATIONS = [
    "Afghanistan",
    "Albania",
    "Algeria",
    "American Samoa",
    "Andorra",
    "Angola",
    "Anguilla",
    "Antarctica",
    "Antigua and Barbuda",
    "Arctic Ocean",
    "Argentina",
    "Armenia",
    "Aruba",
    "Ashmore and Cartier Islands",
    "Atlantic Ocean",
    "Australia",
    "Austria",
    "Azerbaijan",
    "Bahamas",
    "Bahrain",
    "Baker Island",
    "Baltic Sea",
    "Bangladesh",
    "Barbados",
    "Bassas da India",
    "Belarus",
    "Belgium",
    "Belize",
    "Benin",
    "Bermuda",
    "Bhutan",
    "Bolivia",
    "Borneo",
    "Bosnia and Herzegovina",
    "Botswana",
    "Bouvet Island",
    "Brazil",
    "British Virgin Islands",
    "Brunei",
    "Bulgaria",
    "Burkina Faso",
    "Burundi",
    "Cambodia",
    "Cameroon",
    "Canada",
    "Cape Verde",
    "Cayman Islands",
    "Central African Republic",
    "Chad",
    "Chile",
    "China",
    "Christmas Island",
    "Clipperton Island",
    "Cocos Islands",
    "Colombia",
    "Comoros",
    "Cook Islands",
    "Coral Sea Islands",
    "Costa Rica",
    "Cote d'Ivoire",
    "Croatia",
    "Cuba",
    "Curacao",
    "Cyprus",
    "Czech Republic",
    "Democratic Republic of the Congo",
    "Denmark",
    "Djibouti",
    "Dominica",
    "Dominican Republic",
    "East Timor",
    "Ecuador",
    "Egypt",
    "El Salvador",
    "Equatorial Guinea",
    "Eritrea",
    "Estonia",
    "Ethiopia",
    "Europa Island",
    "Falkland Islands (Islas Malvinas)",
    "Faroe Islands",
    "Fiji",
    "Finland",
    "France",
    "French Guiana",
    "French Polynesia",
    "French Southern and Antarctic Lands",
    "Gabon",
    "Gambia",
    "Gaza Strip",
    "Georgia",
    "Germany",
    "Ghana",
    "Gibraltar",
    "Glorioso Islands",
    "Greece",
    "Greenland",
    "Grenada",
    "Guadeloupe",
    "Guam",
    "Guatemala",
    "Guernsey",
    "Guinea",
    "Guinea-Bissau",
    "Guyana",
    "Haiti",
    "Heard Island and McDonald Islands",
    "Honduras",
    "Hong Kong",
    "Howland Island",
    "Hungary",
    "Iceland",
    "India",
    "Indian Ocean",
    "Indonesia",
    "Iran",
    "Iraq",
    "Ireland",
    "Isle of Man",
    "Israel",
    "Italy",
    "Jamaica",
    "Jan Mayen",
    "Japan",
    "Jarvis Island",
    "Jersey",
    "Johnston Atoll",
    "Jordan",
    "Juan de Nova Island",
    "Kazakhstan",
    "Kenya",
    "Kerguelen Archipelago",
    "Kingman Reef",
    "Kiribati",
    "Kosovo",
    "Kuwait",
    "Kyrgyzstan",
    "Laos",
    "Latvia",
    "Lebanon",
    "Lesotho",
    "Liberia",
    "Libya",
    "Liechtenstein",
    "Lithuania",
    "Luxembourg",
    "Macau",
    "Macedonia",
    "Madagascar",
    "Malawi",
    "Malaysia",
    "Maldives",
    "Mali",
    "Malta",
    "Marshall Islands",
    "Martinique",
    "Mauritania",
    "Mauritius",
    "Mayotte",
    "Mediterranean Sea",
    "Mexico",
    "Micronesia",
    "Midway Islands",
    "Moldova",
    "Monaco",
    "Mongolia",
    "Montenegro",
    "Montserrat",
    "Morocco",
    "Mozambique",
    "Myanmar",
    "Namibia",
    "Nauru",
    "Navassa Island",
    "Nepal",
    "Netherlands",
    "New Caledonia",
    "New Zealand",
    "Nicaragua",
    "Niger",
    "Nigeria",
    "Niue",
    "Norfolk Island",
    "Northern Mariana Islands",
    "North Korea",
    "North Sea",
    "Norway",
    "not applicable",
    "not collected",
    "not provided",
    "Oman",
    "Pacific Ocean",
    "Pakistan",
    "Palau",
    "Palmyra Atoll",
    "Panama",
    "Papua New Guinea",
    "Paracel Islands",
    "Paraguay",
    "Peru",
    "Philippines",
    "Pitcairn Islands",
    "Poland",
    "Portugal",
    "Puerto Rico",
    "Qatar",
    "Republic of the Congo",
    "restricted access",
    "Reunion",
    "Romania",
    "Ross Sea",
    "Russia",
    "Rwanda",
    "Saint Helena",
    "Saint Kitts and Nevis",
    "Saint Lucia",
    "Saint Pierre and Miquelon",
    "Saint Vincent and the Grenadines",
    "Samoa",
    "San Marino",
    "Sao Tome and Principe",
    "Saudi Arabia",
    "Senegal",
    "Serbia",
    "Seychelles",
    "Sierra Leone",
    "Singapore",
    "Sint Maarten",
    "Slovakia",
    "Slovenia",
    "Solomon Islands",
    "Somalia",
    "South Africa",
    "Southern Ocean",
    "South Georgia and the South Sandwich Islands",
    "South Korea",
    "Spain",
    "Spratly Islands",
    "Sri Lanka",
    "Sudan",
    "Suriname",
    "Svalbard",
    "Swaziland",
    "Sweden",
    "Switzerland",
    "Syria",
    "Taiwan",
    "Tajikistan",
    "Tanzania",
    "Tasman Sea",
    "Thailand",
    "Togo",
    "Tokelau",
    "Tonga",
    "Trinidad and Tobago",
    "Tromelin Island",
    "Tunisia",
    "Turkey",
    "Turkmenistan",
    "Turks and Caicos Islands",
    "Tuvalu",
    "Uganda",
    "Ukraine",
    "United Arab Emirates",
    "United Kingdom",
    "Uruguay",
    "USA",
    "Uzbekistan",
    "Vanuatu",
    "Venezuela",
    "Viet Nam",
    "Virgin Islands",
    "Wake Island",
    "Wallis and Futuna",
    "West Bank",
    "Western Sahara",
    "Yemen",
    "Zambia",
    "Zimbabwe",
]

BIN_MANDATORY_FIELDS = [
    "genome_name",
    "accessions",
    "assembly_software",
    "binning_software",
    "binning_parameters",
    "stats_generation_software",
    "NCBI_lineage",
    "broad_environment",
    "local_environment",
    "environmental_medium",
    "metagenome",
    "co-assembly",
    "genome_coverage",
    "genome_path",
]

MAG_MANDATORY_FIELDS = ["rRNA_presence", "completeness", "contamination"]

GEOGRAPHY_DIGIT_COORDS = 8

BIN_CHECKLIST = "ERC000050"
BIN_CHECKLIST_TYPE = "binned_metagenome"
MAG_CHECKLIST = "ERC000047"
MAG_CHECKLIST_TYPE = "Metagenome-assembled genome"
