{ "data_id": "8", "name": "liver-disorders", "exact_name": "liver-disorders", "version": 1, "version_label": "1", "description": "**Author**: BUPA Medical Research Ltd. Donor: Richard S. Forsyth \n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Liver+Disorders) - 5\/15\/1990 \n**Please cite**: \n\n**BUPA liver disorders**\n \nThe first 5 variables are all blood tests which are thought to be sensitive to liver disorders that might arise from excessive alcohol consumption. Each line in the dataset constitutes the record of a single male individual. \n\n**Important note:** The 7th field (selector) has been widely misinterpreted in the past as a dependent variable representing presence or absence of a liver disorder. This is incorrect [1]. The 7th field was created by BUPA researchers as a train\/test selector. It is not suitable as a dependent variable for classification. The dataset does not contain any variable representing presence or absence of a liver disorder. Researchers who wish to use this dataset as a classification benchmark should follow the method used in experiments by the donor (Forsyth & Rada, 1986, Machine learning: applications in expert systems and information retrieval) and others (e.g. Turney, 1995, Cost-sensitive classification: Empirical evaluation of a hybrid genetic decision tree induction algorithm), who used the 6th field (drinks), after dichotomising, as a dependent variable for classification. Because of widespread misinterpretation in the past, researchers should take care to state their method clearly.\n \n**Attribute information** \n 1. mcv mean corpuscular volume \n 2. alkphos alkaline phosphotase \n 3. sgpt alanine aminotransferase \n 4. sgot aspartate aminotransferase \n 5. gammagt gamma-glutamyl transpeptidase \n 6. drinks number of half-pint equivalents of alcoholic beverages drunk per day \n 7. selector field created by the BUPA researchers to split the data into train\/test sets \n\n[1] McDermott & Forsyth 2016, Diagnosing a disorder in a classification benchmark, Pattern Recognition Letters, Volume 73. Note Forsyth is named on the UCI page as the original donor of the dataset.", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": null, "contributor": null, "date": "2014-04-06 23:19:46", "update_comment": "Selector is the field used to define a pre-defined train-test split, should be ignored.", "last_update": "2015-08-21 13:26:43", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/8\/dataset_8_liver-disorders.arff", "default_target_attribute": "drinks", "row_id_attribute": null, "ignore_attribute": "\"selector\"", "runs": 148, "suggest": { "input": [ "liver-disorders", "The first 5 variables are all blood tests which are thought to be sensitive to liver disorders that might arise from excessive alcohol consumption. Each line in the dataset constitutes the record of a single male individual. 1. mcv mean corpuscular volume 2. alkphos alkaline phosphotase 3. sgpt alanine aminotransferase 4. sgot aspartate aminotransferase 5. gammagt gamma-glutamyl transpeptidase 6. drinks number of half-pint equivalents of alcoholic beverages drunk per day 7. selector field create " ], "weight": 5 }, "qualities": { "NumberOfInstances": 345, "NumberOfFeatures": 7, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 1, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 1, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNominalAttDistinctValues": 2, "Quartile1SkewnessOfNumericAtts": 0.4681417978731725, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": 0, "J48.001.ErrRate": null, "MeanSkewnessOfNumericAtts": 1.6886196204361754, "Quartile1StdDevOfNumericAtts": 4.170530797367548, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "J48.001.Kappa": null, "MeanStdDevOfNumericAtts": 15.827503400802176, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassPercentage": null, "MinAttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": 5.901776568203553, "REPTreeDepth2Kappa": null, "ClassEntropy": null, "kNN1NKappa": null, "MajorityClassSize": null, "MinKurtosisOfNumericAtts": 0.7478828309967818, "Quartile2MeansOfNumericAtts": 34.344927536231886, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxAttributeEntropy": null, "MinMeansOfNumericAtts": 3.4550724637681136, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxKurtosisOfNumericAtts": 13.813911752411268, "MinMutualInformation": null, "Quartile2SkewnessOfNumericAtts": 1.918445936350417, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null, "MaxMeansOfNumericAtts": 90.15942028985508, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 14.285714285714285, "Quartile2StdDevOfNumericAtts": 14.206082045720775, "RandomTreeDepth1AUC": null, "Dimensionality": 0.020289855072463767, "MaxMutualInformation": null, "MinSkewnessOfNumericAtts": -0.38843312376254074, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": 2, "MinStdDevOfNumericAtts": 3.337835264577127, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 11.31103980397396, "AutoCorrelation": 0.8255813953488372, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxSkewnessOfNumericAtts": 3.063498642355853, "MinorityClassPercentage": null, "PercentageOfNumericFeatures": 85.71428571428571, "Quartile3MeansOfNumericAtts": 74.94202898550725, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MaxStdDevOfNumericAtts": 39.25461616755085, "MinorityClassSize": null, "PercentageOfSymbolicFeatures": 14.285714285714285, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanAttributeEntropy": null, "NaiveBayesAUC": null, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 2.9154448302673353, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanKurtosisOfNumericAtts": 6.57117573055445, "NaiveBayesErrRate": null, "Quartile1KurtosisOfNumericAtts": 2.1256888395117097, "Quartile3StdDevOfNumericAtts": 24.44788572159709, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMeansOfNumericAtts": 42.802898550724635, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MeansOfNumericAtts": 19.346376811594208, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null }, "tags": [ { "tag": "study_127", "uploader": "4209" }, { "tag": "study_50", "uploader": "64" }, { "tag": "study_88", "uploader": "4209" }, { "tag": "uci", "uploader": "1" } ], "features": [ { "name": "drinks", "index": "5", "type": "numeric", "distinct": "16", "missing": "0", "target": "1", "min": "0", "max": "20", "mean": "3", "stdev": "3" }, { "name": "mcv", "index": "0", "type": "numeric", "distinct": "26", "missing": "0", "min": "65", "max": "103", "mean": "90", "stdev": "4" }, { "name": "alkphos", "index": "1", "type": "numeric", "distinct": "78", "missing": "0", "min": "23", "max": "138", "mean": "70", "stdev": "18" }, { "name": "sgpt", "index": "2", "type": "numeric", "distinct": "67", "missing": "0", "min": "4", "max": "155", "mean": "30", "stdev": "20" }, { "name": "sgot", "index": "3", "type": "numeric", "distinct": "47", "missing": "0", "min": "5", "max": "82", "mean": "25", "stdev": "10" }, { "name": "gammagt", "index": "4", "type": "numeric", "distinct": "94", "missing": "0", "min": "5", "max": "297", "mean": "38", "stdev": "39" }, { "name": "selector", "index": "6", "type": "nominal", "distinct": "2", "missing": "0", "ignore": "1", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 2, "nr_of_downloads": 29, "total_downloads": 43, "reach": 31, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }