{ "data_id": "42641", "name": "phish_url", "exact_name": "phish_url", "version": 1, "version_label": null, "description": "Rows with NaN and inf values removed. Converted file format from CSV to ARFF.", "format": "ARFF", "uploader": "Rizka Purwanto", "uploader_id": 15317, "visibility": "public", "creator": null, "contributor": "\"Mohammad Saiful Islam Mamun\",\"Mohammad Ahmad Rathore\",\"Arash Habibi Lashkari\",\"Natalia Stakhanova and Ali A. Ghorbani\",\"\"Detecting Malicious URLs Using Lexical Analysis\"\",\"Network and System Security\",\"Springer International Publishing\",\"P467--482\",\"2016.\"", "date": "2020-09-10 11:47:37", "update_comment": null, "last_update": "2020-09-10 11:47:37", "licence": "CC0", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22044419\/All_cleaned.arff", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "phish_url", "Rows with NaN and inf values removed. Converted file format from CSV to ARFF. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 18982, "NumberOfFeatures": 80, "NumberOfClasses": 5, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 79, "NumberOfSymbolicFeatures": 1, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": 0.9997892629471576, "PercentageOfMissingValues": 0, "Dimensionality": 0.004214519018017069, "PercentageOfNumericFeatures": 98.75, "MajorityClassPercentage": 28.14245074280898, "PercentageOfSymbolicFeatures": 1.25, "MajorityClassSize": 5342, "MinorityClassPercentage": 13.04920450953535, "MinorityClassSize": 2477, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "class", "index": "79", "type": "nominal", "distinct": "5", "missing": "0", "target": "1", "distr": [ [ "Defacement", "benign", "malware", "phishing", "spam" ], [ [ "2477", "0", "0", "0", "0" ], [ "0", "2709", "0", "0", "0" ], [ "0", "0", "4440", "0", "0" ], [ "0", "0", "0", "4014", "0" ], [ "0", "0", "0", "0", "5342" ] ] ] }, { "name": "Querylength", "index": "0", "type": "numeric", "distinct": "287", "missing": "0", "min": "0", "max": "1385", "mean": "32", "stdev": "129" }, { "name": "domain_token_count", "index": "1", "type": "numeric", "distinct": "10", "missing": "0", "min": "2", "max": "15", "mean": "3", "stdev": "1" }, { "name": "path_token_count", "index": "2", "type": "numeric", "distinct": "33", "missing": "0", "min": "4", "max": "68", "mean": "9", "stdev": "4" }, { "name": "avgdomaintokenlen", "index": "3", "type": "numeric", "distinct": "102", "missing": "0", "min": "2", "max": "26", "mean": "5", "stdev": "2" }, { "name": "longdomaintokenlen", "index": "4", "type": "numeric", "distinct": "38", "missing": "0", "min": "2", "max": "63", "mean": "9", "stdev": "4" }, { "name": "avgpathtokenlen", "index": "5", "type": "numeric", "distinct": "643", "missing": "0", "min": "1", "max": "34", "mean": "5", "stdev": "2" }, { "name": "tld", "index": "6", "type": "numeric", "distinct": "10", "missing": "0", "min": "2", "max": "15", "mean": "3", "stdev": "1" }, { "name": "charcompvowels", "index": "7", "type": "numeric", "distinct": "147", "missing": "0", "min": "0", "max": "193", "mean": "16", "stdev": "16" }, { "name": "charcompace", "index": "8", "type": "numeric", "distinct": "122", "missing": "0", "min": "0", "max": "142", "mean": "12", "stdev": "14" }, { "name": "ldl_url", "index": "9", "type": "numeric", "distinct": "133", "missing": "0", "min": "0", "max": "207", "mean": "5", "stdev": "18" }, { "name": "ldl_domain", "index": "10", "type": "numeric", "distinct": "12", "missing": "0", "min": "0", "max": "37", "mean": "0", "stdev": "1" }, { "name": "ldl_path", "index": "11", "type": "numeric", "distinct": "132", "missing": "0", "min": "0", "max": "207", "mean": "5", "stdev": "18" }, { "name": "ldl_filename", "index": "12", "type": "numeric", "distinct": "20", "missing": "0", "min": "0", "max": "31", "mean": "0", "stdev": "1" }, { "name": "ldl_getArg", "index": "13", "type": "numeric", "distinct": "132", "missing": "0", "min": "0", "max": "207", "mean": "4", "stdev": "18" }, { "name": "dld_url", "index": "14", "type": "numeric", "distinct": "34", "missing": "0", "min": "0", "max": "38", "mean": "1", "stdev": "3" }, { "name": "dld_domain", "index": "15", "type": "numeric", "distinct": "7", "missing": "0", "min": "0", "max": "21", "mean": "0", "stdev": "0" }, { "name": "dld_path", "index": "16", "type": "numeric", "distinct": "33", "missing": "0", "min": "0", "max": "32", "mean": "1", "stdev": "3" }, { "name": "dld_filename", "index": "17", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "14", "mean": "0", "stdev": "1" }, { "name": "dld_getArg", "index": "18", "type": "numeric", "distinct": "31", "missing": "0", "min": "0", "max": "31", "mean": "1", "stdev": "3" }, { "name": "urlLen", "index": "19", "type": "numeric", "distinct": "369", "missing": "0", "min": "29", "max": "1424", "mean": "105", "stdev": "128" }, { "name": "domainlength", "index": "20", "type": "numeric", "distinct": "57", "missing": "0", "min": "5", "max": "152", "mean": "16", "stdev": "6" }, { "name": "pathLength", "index": "21", "type": "numeric", "distinct": "355", "missing": "0", "min": "10", "max": "1402", "mean": "82", "stdev": "127" }, { "name": "subDirLen", "index": "22", "type": "numeric", "distinct": "355", "missing": "0", "min": "10", "max": "1402", "mean": "82", "stdev": "127" }, { "name": "fileNameLen", "index": "23", "type": "numeric", "distinct": "140", "missing": "0", "min": "1", "max": "200", "mean": "10", "stdev": "16" }, { "name": "this.fileExtLen", "index": "24", "type": "numeric", "distinct": "5", "missing": "0", "min": "1", "max": "5", "mean": "3", "stdev": "1" }, { "name": "ArgLen", "index": "25", "type": "numeric", "distinct": "330", "missing": "0", "min": "0", "max": "1388", "mean": "45", "stdev": "130" }, { "name": "pathurlRatio", "index": "26", "type": "numeric", "distinct": "2052", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "ArgUrlRatio", "index": "27", "type": "numeric", "distinct": "1731", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "argDomanRatio", "index": "28", "type": "numeric", "distinct": "1055", "missing": "0", "min": "0", "max": "93", "mean": "3", "stdev": "9" }, { "name": "domainUrlRatio", "index": "29", "type": "numeric", "distinct": "1869", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "pathDomainRatio", "index": "30", "type": "numeric", "distinct": "1822", "missing": "0", "min": "0", "max": "93", "mean": "5", "stdev": "9" }, { "name": "argPathRatio", "index": "31", "type": "numeric", "distinct": "1555", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "executable", "index": "32", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "isPortEighty", "index": "33", "type": "numeric", "distinct": "2", "missing": "0", "min": "-1", "max": "0", "mean": "-1", "stdev": "0" }, { "name": "NumberofDotsinURL", "index": "34", "type": "numeric", "distinct": "18", "missing": "0", "min": "1", "max": "19", "mean": "3", "stdev": "2" }, { "name": "ISIpAddressInDomainName", "index": "35", "type": "numeric", "distinct": "1", "missing": "0", "min": "-1", "max": "0", "mean": "-1", "stdev": "0" }, { "name": "CharacterContinuityRate", "index": "36", "type": "numeric", "distinct": "252", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "LongestVariableValue", "index": "37", "type": "numeric", "distinct": "274", "missing": "0", "min": "-1", "max": "1385", "mean": "29", "stdev": "129" }, { "name": "URL_DigitCount", "index": "38", "type": "numeric", "distinct": "184", "missing": "0", "min": "0", "max": "236", "mean": "14", "stdev": "23" }, { "name": "host_DigitCount", "index": "39", "type": "numeric", "distinct": "18", "missing": "0", "min": "0", "max": "44", "mean": "1", "stdev": "2" }, { "name": "Directory_DigitCount", "index": "40", "type": "numeric", "distinct": "61", "missing": "0", "min": "-1", "max": "82", "mean": "2", "stdev": "6" }, { "name": "File_name_DigitCount", "index": "41", "type": "numeric", "distinct": "40", "missing": "0", "min": "-1", "max": "91", "mean": "2", "stdev": "4" }, { "name": "Extension_DigitCount", "index": "42", "type": "numeric", "distinct": "165", "missing": "0", "min": "-1", "max": "236", "mean": "10", "stdev": "23" }, { "name": "Query_DigitCount", "index": "43", "type": "numeric", "distinct": "159", "missing": "0", "min": "-1", "max": "236", "mean": "7", "stdev": "22" }, { "name": "URL_Letter_Count", "index": "44", "type": "numeric", "distinct": "469", "missing": "0", "min": "17", "max": "1202", "mean": "76", "stdev": "106" }, { "name": "host_letter_count", "index": "45", "type": "numeric", "distinct": "57", "missing": "0", "min": "2", "max": "127", "mean": "14", "stdev": "6" }, { "name": "Directory_LetterCount", "index": "46", "type": "numeric", "distinct": "120", "missing": "0", "min": "-1", "max": "183", "mean": "16", "stdev": "17" }, { "name": "Filename_LetterCount", "index": "47", "type": "numeric", "distinct": "89", "missing": "0", "min": "-1", "max": "115", "mean": "8", "stdev": "9" }, { "name": "Extension_LetterCount", "index": "48", "type": "numeric", "distinct": "452", "missing": "0", "min": "-1", "max": "1179", "mean": "34", "stdev": "108" }, { "name": "Query_LetterCount", "index": "49", "type": "numeric", "distinct": "429", "missing": "0", "min": "-1", "max": "1173", "mean": "24", "stdev": "108" }, { "name": "LongestPathTokenLength", "index": "50", "type": "numeric", "distinct": "318", "missing": "0", "min": "3", "max": "1393", "mean": "56", "stdev": "128" }, { "name": "Domain_LongestWordLength", "index": "51", "type": "numeric", "distinct": "30", "missing": "0", "min": "2", "max": "33", "mean": "8", "stdev": "4" }, { "name": "Path_LongestWordLength", "index": "52", "type": "numeric", "distinct": "38", "missing": "0", "min": "0", "max": "54", "mean": "9", "stdev": "4" }, { "name": "sub-Directory_LongestWordLength", "index": "53", "type": "numeric", "distinct": "35", "missing": "0", "min": "-1", "max": "48", "mean": "8", "stdev": "4" }, { "name": "Arguments_LongestWordLength", "index": "54", "type": "numeric", "distinct": "36", "missing": "0", "min": "-1", "max": "91", "mean": "2", "stdev": "5" }, { "name": "URL_sensitiveWord", "index": "55", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "URLQueries_variable", "index": "56", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "19", "mean": "1", "stdev": "2" }, { "name": "spcharUrl", "index": "57", "type": "numeric", "distinct": "15", "missing": "0", "min": "2", "max": "17", "mean": "4", "stdev": "2" }, { "name": "delimeter_Domain", "index": "58", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "10", "mean": "0", "stdev": "0" }, { "name": "delimeter_path", "index": "59", "type": "numeric", "distinct": "29", "missing": "0", "min": "0", "max": "64", "mean": "3", "stdev": "3" }, { "name": "delimeter_Count", "index": "60", "type": "numeric", "distinct": "22", "missing": "0", "min": "-1", "max": "37", "mean": "1", "stdev": "4" }, { "name": "NumberRate_URL", "index": "61", "type": "numeric", "distinct": "2442", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "NumberRate_Domain", "index": "62", "type": "numeric", "distinct": "110", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "NumberRate_DirectoryName", "index": "63", "type": "numeric", "distinct": "492", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "0" }, { "name": "NumberRate_FileName", "index": "64", "type": "numeric", "distinct": "1832", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "0" }, { "name": "NumberRate_Extension", "index": "65", "type": "numeric", "distinct": "1516", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "0" }, { "name": "NumberRate_AfterPath", "index": "66", "type": "numeric", "distinct": "1250", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "1" }, { "name": "SymbolCount_URL", "index": "67", "type": "numeric", "distinct": "37", "missing": "0", "min": "3", "max": "47", "mean": "10", "stdev": "5" }, { "name": "SymbolCount_Domain", "index": "68", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "14", "mean": "2", "stdev": "1" }, { "name": "SymbolCount_Directoryname", "index": "69", "type": "numeric", "distinct": "22", "missing": "0", "min": "-1", "max": "24", "mean": "2", "stdev": "3" }, { "name": "SymbolCount_FileName", "index": "70", "type": "numeric", "distinct": "27", "missing": "0", "min": "-1", "max": "33", "mean": "4", "stdev": "4" }, { "name": "SymbolCount_Extension", "index": "71", "type": "numeric", "distinct": "27", "missing": "0", "min": "-1", "max": "32", "mean": "3", "stdev": "4" }, { "name": "SymbolCount_Afterpath", "index": "72", "type": "numeric", "distinct": "26", "missing": "0", "min": "-1", "max": "40", "mean": "2", "stdev": "4" }, { "name": "Entropy_URL", "index": "73", "type": "numeric", "distinct": "11374", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "Entropy_Domain", "index": "74", "type": "numeric", "distinct": "760", "missing": "0", "min": "1", "max": "1", "mean": "1", "stdev": "0" }, { "name": "Entropy_DirectoryName", "index": "75", "type": "numeric", "distinct": "2232", "missing": "0", "min": "-1", "max": "1", "mean": "1", "stdev": "1" }, { "name": "Entropy_Filename", "index": "76", "type": "numeric", "distinct": "5617", "missing": "0", "min": "-1", "max": "1", "mean": "1", "stdev": "1" }, { "name": "Entropy_Extension", "index": "77", "type": "numeric", "distinct": "4315", "missing": "0", "min": "-1", "max": "1", "mean": "1", "stdev": "1" }, { "name": "Entropy_Afterpath", "index": "78", "type": "numeric", "distinct": "3245", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 1, "total_downloads": 1, "reach": 1, "reuse": 6, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 6 }