From 6937e28cdd386f433caba0f19fb5900992baafa4 Mon Sep 17 00:00:00 2001 From: Niklas Birk Date: Sun, 30 Jun 2019 23:59:49 +0200 Subject: [PATCH] Crossvalidation added --- resources/app1.data | 316 ++++++++++++++++++ resources/app1.test | 157 +++++++++ src/machine_learning/DataClass.java | 12 + src/machine_learning/MachineLearning.java | 9 + src/machine_learning/Vector.java | 10 + .../nearest_neighbour/CrossValidation.java | 106 ++++++ .../nearest_neighbour/DataClass.java | 7 - .../nearest_neighbour/KNearestNeighbour.java | 28 +- .../perceptron/Perceptron.java | 5 +- test/machine_learning/VectorTest.java | 49 ++- .../KNearestNeighbourTest.java | 119 ++++++- .../perceptron/PerceptronTest.java | 3 +- 12 files changed, 787 insertions(+), 34 deletions(-) create mode 100644 resources/app1.data create mode 100644 resources/app1.test create mode 100644 src/machine_learning/DataClass.java create mode 100644 src/machine_learning/MachineLearning.java create mode 100644 src/machine_learning/nearest_neighbour/CrossValidation.java delete mode 100644 src/machine_learning/nearest_neighbour/DataClass.java diff --git a/resources/app1.data b/resources/app1.data new file mode 100644 index 0000000..6eb4534 --- /dev/null +++ b/resources/app1.data @@ -0,0 +1,316 @@ +26,1,0,0,1,0,1,0,1,1,0,37.9,38.8,23100,0,1 +17,2,0,0,1,0,1,0,1,1,0,36.9,37.4,8100,0,0 +28,1,0,0,1,0,0,0,0,0,0,36.7,36.9,9600,0,1 +50,1,0,0,1,0,1,0,1,0,0,38.3,38.8,17600,0,1 +62,1,0,0,1,0,0,0,1,1,0,36.9,37.3,10200,0,1 +6,1,0,0,1,0,1,0,0,0,0,36.6,36.8,9200,0,0 +18,2,0,0,1,0,0,0,0,1,0,37.6,37.9,18700,0,1 +13,2,0,0,1,0,0,0,1,0,0,35.7,37.3,11200,0,0 +12,2,0,0,1,0,1,1,1,1,1,36.5,38.0,16400,0,1 +21,2,0,0,1,0,0,0,0,0,0,36.0,37.4,13300,0,0 +13,1,0,0,1,0,0,0,0,0,0,37.2,37.5,7000,0,0 +19,1,0,0,1,0,0,0,0,0,0,37.0,37.5,16400,0,1 +12,2,0,0,1,1,1,1,1,1,1,39.4,39.9,26800,0,1 +10,2,0,0,1,0,0,0,1,0,0,36.1,37.5,8200,0,0 +47,1,0,0,1,0,1,0,1,0,0,36.8,37.4,10800,0,1 +11,2,1,1,1,1,0,0,1,1,0,37.3,38.1,15000,0,1 +14,2,0,0,1,1,1,0,1,1,0,37.1,37.1,12100,0,1 +17,2,0,0,1,0,1,0,1,0,0,37.9,36.9,7500,0,1 +18,2,0,0,1,1,0,0,1,1,0,35.8,37.5,7200,0,0 +21,2,0,0,1,0,0,0,1,0,0,37.0,37.5,10100,0,0 +9,2,0,0,1,1,1,0,0,0,0,36.6,37.2,11800,0,1 +9,2,1,1,1,0,0,0,0,0,0,37.3,37.9,10200,0,0 +60,2,0,0,1,0,0,0,0,0,1,37.6,37.4,5100,0,0 +30,2,0,0,1,0,0,0,0,0,0,36.2,36.5,12400,0,0 +25,2,0,0,1,0,0,0,0,0,0,37.0,37.7,9700,0,0 +11,2,0,0,1,0,1,0,0,0,0,37.0,37.7,8700,0,0 +26,2,0,0,1,0,0,0,1,1,0,37.3,37.5,17000,0,1 +8,2,1,0,1,0,1,0,0,0,0,36.4,37.5,18800,0,1 +24,2,0,0,1,0,1,0,0,0,0,38.6,38.9,7800,0,0 +21,1,0,0,1,0,1,0,1,1,1,37.3,37.5,15300,0,1 +14,1,1,0,1,0,1,0,1,1,0,36.6,37.1,9400,0,1 +15,2,0,0,1,0,0,0,0,0,0,36.4,36.8,8200,0,0 +18,2,1,1,0,0,0,0,0,0,0,35.8,36.9,8400,0,0 +87,2,1,0,1,1,0,0,1,0,0,36.7,37.2,13100,1,0 +25,1,0,0,1,0,0,0,0,0,0,36.6,37.0,10400,0,0 +19,2,0,0,1,0,0,0,1,1,1,37.7,37.9,17400,0,1 +16,1,0,0,1,0,1,0,1,1,0,37.3,37.9,16900,0,1 +19,2,0,0,1,0,1,0,1,1,0,36.2,36.7,11000,0,1 +5,2,1,1,1,1,1,1,1,1,1,37.6,38.3,20700,0,1 +16,2,0,0,1,0,1,1,1,0,0,37.3,38.3,9200,0,0 +15,2,0,0,1,0,0,0,1,0,0,36.9,37.0,7000,0,0 +12,1,0,0,1,1,0,0,1,0,0,36.1,37.8,12900,0,1 +52,1,0,0,0,0,0,0,0,0,0,36.4,36.8,5600,0,0 +23,2,0,0,1,0,1,0,1,1,1,36.8,37.2,1100,0,1 +9,2,0,0,1,1,1,0,0,0,0,36.1,37.1,5300,0,0 +25,1,0,0,1,0,1,0,0,0,0,37.0,37.5,7800,0,0 +35,2,1,1,1,1,1,1,1,1,1,36.9,37.8,23500,0,1 +16,2,1,0,1,0,0,1,0,1,0,36.8,37.3,14300,0,1 +22,1,0,0,1,0,0,0,1,0,0,37.4,37.1,9700,0,1 +16,1,0,0,1,1,0,0,1,1,0,35.6,38.1,15400,0,1 +46,1,0,0,1,0,1,0,1,1,1,36.9,37.4,16100,0,1 +15,2,0,0,1,0,1,0,1,1,0,36.7,37.1,5500,0,0 +18,1,1,0,1,0,1,0,0,1,0,37.0,37.7,9400,0,0 +58,2,0,0,1,1,0,0,0,0,0,36.5,37.0,8100,0,0 +8,2,0,0,1,0,1,0,1,1,0,38.2,38.8,18000,0,1 +12,2,0,0,1,0,0,0,0,0,0,36.4,37.4,5100,0,1 +79,2,0,0,1,0,1,0,1,0,0,36.5,38.0,8500,0,1 +39,1,0,0,1,0,1,0,1,1,0,37.0,37.7,11400,0,1 +10,2,0,0,1,1,0,0,0,0,0,36.5,37.7,13100,0,1 +21,2,0,0,1,1,1,0,1,1,0,36.9,37.9,6900,0,0 +37,1,0,0,1,0,1,0,1,0,0,36.8,38.0,10800,0,1 +15,1,0,0,1,0,0,0,1,1,0,36.1,37.3,9100,0,0 +12,2,1,0,1,0,1,0,1,1,0,36.9,37.8,6100,0,1 +31,1,0,0,1,0,1,0,1,1,0,36.8,37.8,10700,0,1 +17,2,0,0,1,0,0,0,0,0,0,36.8,37.3,8500,0,0 +18,2,0,0,1,0,1,0,1,1,0,35.2,37.6,13900,0,1 +31,1,0,0,1,0,1,0,0,0,0,38.5,38.9,15600,0,1 +12,2,0,0,1,0,0,0,0,0,0,36.4,37.3,6100,0,0 +17,2,0,0,1,0,0,0,1,0,1,37.8,38.5,18400,0,0 +11,2,0,0,1,0,0,0,0,0,0,36.9,37.8,10600,0,1 +22,2,0,0,1,0,1,0,0,0,0,37.7,37.9,10000,0,0 +21,1,0,0,1,0,0,0,1,0,0,36.4,37.3,11200,0,1 +22,1,0,0,1,0,1,0,1,0,1,36.7,37.9,12800,0,0 +51,1,1,1,1,1,1,0,1,1,0,37.4,38.2,15400,0,1 +16,2,0,0,1,0,0,0,1,0,0,36.0,37.0,10000,0,1 +23,2,0,0,1,0,1,1,1,0,0,36.6,37.5,11200,0,0 +25,2,0,0,1,0,0,0,0,1,0,36.8,37.6,14200,0,0 +19,1,0,0,1,0,1,0,1,1,0,36.4,38.4,4200,0,0 +7,2,0,0,1,0,1,1,1,1,1,37.4,38.4,25600,0,1 +49,2,0,0,1,0,0,0,1,1,0,37.2,37.8,11000,0,1 +36,1,0,0,1,0,1,0,1,1,1,36.8,36.8,7500,0,1 +32,2,0,0,1,0,1,0,1,1,0,37.2,37.7,8100,0,1 +21,2,0,0,1,0,0,0,0,0,0,36.4,37.4,10000,0,0 +16,2,0,0,1,0,0,0,1,1,0,36.8,37.9,22000,0,1 +22,2,0,0,1,0,0,0,0,0,0,36.5,37.0,5200,0,0 +10,1,0,0,1,0,1,0,1,1,1,38.6,39.3,20400,0,1 +21,2,1,0,1,0,0,0,0,1,0,36.5,37.0,7100,0,0 +14,1,0,0,1,0,0,0,0,1,0,36.6,37.0,8500,0,1 +88,2,0,0,1,1,0,0,1,1,1,37.6,39.0,9600,1,1 +11,2,1,0,1,1,1,0,1,1,0,37.3,38.4,18300,0,1 +13,1,0,0,1,0,1,0,1,1,0,36.0,37.5,15200,0,1 +15,2,0,0,1,1,1,0,1,1,0,37.1,38.0,12900,0,1 +66,1,1,1,1,1,1,1,1,1,1,38.2,39.0,20900,1,1 +25,2,0,0,1,0,0,0,0,0,0,36.2,36.8,5700,0,0 +12,2,0,0,1,0,0,0,1,0,1,35.7,37.7,8100,0,1 +68,1,1,1,1,1,1,0,0,1,0,37.5,38.3,11200,0,1 +54,2,0,0,1,0,0,0,0,0,0,37.8,38.9,4200,0,0 +18,1,1,1,1,1,1,1,1,1,1,38.0,38.4,23400,0,1 +54,1,1,0,1,0,0,0,0,0,0,36.8,37.1,8000,0,0 +73,2,0,0,1,0,0,0,1,0,0,36.2,37.6,15400,0,1 +29,1,0,0,1,0,1,0,1,1,0,37.8,38.4,13800,0,1 +23,2,0,0,1,0,0,0,0,0,0,36.2,37.2,6900,0,0 +15,1,0,0,1,0,0,0,0,1,1,36.2,36.8,10700,0,1 +42,2,0,0,1,0,1,0,0,1,1,36.0,37.3,19300,0,1 +15,2,0,0,1,0,0,0,0,0,0,37.0,37.8,7300,0,0 +18,1,0,0,1,0,0,0,1,1,0,36.0,36.2,7100,0,0 +11,2,0,0,1,0,1,0,1,1,0,38.1,39.3,12200,0,1 +32,2,0,0,1,1,1,1,1,1,1,37.2,38.5,10400,0,0 +40,2,0,0,1,0,0,0,0,1,0,37.0,37.3,9900,0,0 +46,1,1,0,1,1,1,1,1,1,1,37.3,38.1,14700,0,1 +20,2,0,0,1,0,1,0,1,0,0,37.0,37.8,11000,0,0 +38,2,0,0,1,0,1,0,1,1,0,36.6,37.8,15200,0,1 +14,2,0,0,1,0,0,0,1,0,0,36.9,37.8,11200,0,0 +16,2,0,0,1,0,0,0,1,1,0,37.3,37.8,15800,0,1 +73,2,1,1,1,1,1,0,0,1,0,36.8,37.2,10700,1,1 +16,1,0,0,1,1,0,0,0,1,0,36.4,37.7,11200,0,0 +14,2,0,0,1,0,1,0,1,0,0,36.2,36.5,6700,0,0 +9,1,1,0,1,0,1,1,1,1,0,36.5,38.1,9000,0,1 +28,1,0,0,1,0,1,0,1,1,0,37.2,38.4,14200,0,0 +18,2,1,0,1,0,0,0,0,1,0,37.1,37.8,4500,0,0 +11,1,0,0,1,0,1,0,1,0,0,36.6,36.8,21800,0,1 +32,2,0,0,1,0,0,0,0,0,0,36.2,36.8,10000,0,0 +26,1,0,0,1,0,0,0,0,1,0,35.9,37.3,8600,0,0 +12,1,0,0,1,0,1,0,1,0,0,37.1,37.6,13300,0,1 +18,2,0,0,1,0,1,0,1,1,0,36.5,37.6,6500,0,0 +22,1,0,0,1,0,1,0,1,0,0,37.7,37.9,17900,0,1 +9,2,0,0,1,0,0,0,1,1,0,37.2,37.0,9300,0,0 +34,2,0,1,0,0,0,0,0,1,0,36.6,37.4,7900,0,0 +26,2,0,0,1,1,1,0,1,1,1,37.2,38.3,23400,0,1 +47,2,0,0,1,0,1,0,1,1,0,37.0,37.8,13000,0,1 +56,1,1,1,1,1,1,1,1,1,0,36.8,38.1,20300,0,1 +25,2,1,0,1,0,1,0,1,1,0,36.8,37.2,13000,0,1 +12,1,0,0,1,0,0,0,1,1,1,36.0,36.7,5800,0,1 +73,1,0,0,1,0,1,0,1,1,1,37.4,38.0,17600,0,1 +24,1,1,0,1,0,1,0,0,0,0,36.6,37.3,9300,0,0 +20,2,1,1,1,0,1,0,1,1,0,36.6,37.1,19900,0,1 +13,1,0,0,1,0,0,0,0,0,0,37.1,37.6,7700,0,1 +11,2,1,0,1,0,1,0,0,0,0,36.4,37.3,8100,0,0 +6,2,0,0,1,1,1,0,1,0,0,38.7,39.4,18100,0,0 +17,2,0,0,1,1,1,0,1,1,0,37.2,37.8,15100,0,1 +65,2,0,0,1,0,0,0,0,0,0,37.5,38.0,7900,0,0 +12,2,0,0,1,0,1,0,0,0,0,36.8,37.2,7800,0,1 +10,2,0,0,1,1,1,0,1,1,0,36.9,37.3,9600,0,1 +62,1,0,0,1,0,1,0,1,1,1,36.7,37.8,12500,0,1 +21,2,0,0,1,0,0,0,1,1,0,37.0,37.6,6900,0,0 +11,2,1,0,1,1,1,0,1,0,0,36.5,37.5,7100,0,1 +13,2,0,0,1,0,1,0,0,0,0,37.5,37.8,13300,0,1 +22,2,0,0,1,0,1,0,1,1,0,37.0,37.8,12100,0,0 +17,1,0,0,1,0,1,0,1,0,0,36.8,38.2,17800,0,1 +69,2,1,1,1,1,0,1,1,1,1,37.2,37.6,21800,1,0 +30,2,0,0,1,1,0,0,0,0,0,36.5,37.0,10900,0,0 +17,1,0,0,1,0,1,0,1,0,0,36.3,36.8,17200,0,1 +15,2,0,0,1,0,0,0,0,0,0,39.0,39.1,10100,0,1 +65,1,1,0,1,0,1,0,0,0,0,35.4,37.1,9200,0,1 +19,2,0,0,1,0,0,0,1,0,0,36.6,37.1,11100,0,1 +84,2,0,0,1,1,0,0,0,0,0,36.3,36.8,11500,1,0 +24,2,0,0,1,1,0,0,0,0,0,36.8,37.2,7900,0,0 +13,2,0,0,1,0,0,0,0,0,0,37.0,37.2,7400,0,1 +27,2,0,0,1,0,0,0,0,0,0,36.3,36.6,11200,0,1 +25,1,0,0,1,0,0,0,0,0,0,36.6,36.8,14600,0,0 +22,2,0,0,1,0,1,0,1,0,1,39.6,40.2,13700,0,0 +30,2,0,0,1,1,0,0,1,0,0,37.6,37.7,15900,0,1 +24,2,0,0,1,0,0,0,0,0,0,36.6,37.4,8900,0,0 +6,1,0,0,1,0,1,1,1,1,1,37.5,38.2,25700,0,1 +29,2,0,0,1,0,1,0,1,1,0,36.4,37.2,10300,0,1 +22,2,0,0,1,0,1,0,0,0,0,37.5,37.7,11700,0,0 +44,1,0,0,1,0,1,0,1,0,1,36.2,36.9,6700,0,1 +20,2,0,0,1,0,0,0,0,0,0,36.6,37.3,9600,0,0 +14,2,1,1,1,0,1,0,1,1,1,37.2,38.0,18000,0,1 +23,1,0,0,1,0,0,0,0,0,0,36.5,36.9,9300,0,0 +27,1,0,0,1,0,1,0,1,1,1,37.1,37.7,15900,0,1 +83,2,0,0,1,0,1,0,1,0,0,37.6,38.1,14500,0,1 +14,2,0,0,1,1,0,0,1,1,0,36.0,36.9,19600,0,1 +42,2,0,0,1,0,1,0,0,1,0,36.2,38.7,14400,0,1 +32,2,0,0,1,1,1,0,1,1,0,37.2,37.5,14900,0,1 +12,1,0,0,1,0,1,0,1,1,1,37.2,38.2,19600,0,1 +12,1,0,0,1,0,1,1,1,1,0,37.4,38.7,19000,0,1 +27,2,0,0,1,0,1,0,1,1,0,36.9,37.9,11400,0,1 +13,1,1,0,1,0,1,0,1,0,0,38.0,39.2,11000,0,0 +32,1,0,0,1,0,0,0,1,1,0,36.8,37.5,10500,0,1 +24,2,0,0,1,0,1,0,1,1,0,36.8,37.5,12700,0,1 +30,1,0,0,1,1,1,0,1,1,0,35.8,37.2,8300,0,1 +19,2,1,1,1,1,1,1,1,1,0,37.2,38.8,12200,0,1 +19,2,0,0,1,0,0,0,0,0,0,36.4,37.2,8200,0,0 +34,2,0,0,1,0,0,0,1,1,0,37.3,37.4,11200,0,1 +28,1,0,0,1,0,1,0,1,1,0,36.7,37.9,12200,0,1 +18,2,0,0,1,0,1,1,1,1,1,37.1,38.4,26900,0,1 +28,2,0,0,1,0,0,0,0,1,0,36.5,37.0,6400,0,0 +33,2,1,0,1,0,0,0,0,0,0,36.0,36.8,7500,0,1 +52,2,0,0,1,0,0,0,1,1,0,37.1,36.3,4800,0,0 +8,2,0,1,1,0,0,0,1,1,0,36.3,36.8,22000,0,0 +11,1,0,0,1,0,1,0,1,1,0,36.5,37.5,16000,0,1 +28,2,0,0,1,0,1,0,0,0,0,36.9,37.6,6700,0,1 +15,2,0,0,1,0,1,0,1,0,0,36.6,37.3,8800,0,1 +14,1,0,0,1,0,1,0,1,0,0,36.6,37.3,16100,0,1 +19,1,0,0,1,0,0,0,0,0,0,36.2,37.2,7100,0,0 +9,2,0,0,1,0,0,0,0,0,0,37.6,38.5,14600,0,1 +38,2,0,0,1,0,0,0,1,1,0,37.7,38.3,14500,0,1 +30,1,0,0,1,0,1,0,1,1,1,37.3,37.7,11300,0,1 +24,2,0,0,1,0,0,0,0,0,0,36.6,36.8,11100,0,0 +34,2,0,0,1,0,0,0,0,0,0,36.3,36.9,8500,0,0 +22,2,0,0,1,0,0,0,1,1,0,36.8,37.5,8300,0,0 +40,2,0,0,1,0,0,0,0,0,0,37.4,38.6,9000,0,0 +25,1,0,0,1,0,0,0,0,0,0,36.7,37.1,8200,0,0 +7,1,0,0,1,0,1,0,1,1,1,37.7,38.0,18900,0,1 +13,2,0,0,1,0,1,0,0,0,0,38.9,39.3,7900,0,0 +17,2,0,0,1,0,1,0,0,1,0,37.5,37.7,13200,0,1 +20,2,1,1,1,1,1,1,1,1,1,37.7,36.9,12300,0,1 +10,1,0,0,1,0,0,0,0,0,0,35.5,36.1,9000,0,1 +32,1,0,0,1,0,1,0,1,0,0,36.1,37.2,9000,0,0 +15,1,0,0,1,0,1,0,1,1,1,37.2,37.6,15300,0,1 +72,1,0,0,1,1,1,0,1,1,0,36.3,37.6,12000,0,1 +15,1,0,0,1,0,0,0,1,1,1,37.2,37.6,16400,0,1 +17,1,0,0,1,0,1,0,1,1,1,36.8,37.9,13000,0,1 +22,2,0,0,1,0,1,0,1,1,0,36.8,37.0,7400,0,1 +23,1,0,0,1,0,1,0,1,1,0,37.0,37.9,12100,0,1 +26,1,0,0,1,0,0,0,0,0,0,36.9,37.0,5200,0,0 +16,1,1,0,1,0,1,0,1,1,0,36.5,37.2,6900,0,0 +21,1,0,0,1,0,1,0,1,0,0,37.9,38.5,18800,0,1 +17,1,0,0,1,0,1,0,1,1,1,38.3,38.5,11800,0,1 +8,2,0,0,1,0,1,0,1,1,0,36.2,37.2,18100,0,1 +12,2,0,0,1,0,1,0,1,1,0,37.1,37.6,6700,0,0 +17,2,0,0,1,0,0,0,1,1,0,36.5,37.3,9000,0,0 +13,2,0,0,1,0,0,0,1,1,0,36.6,37.0,9200,0,0 +20,1,0,0,1,0,1,0,1,0,0,38.0,38.6,11300,0,0 +17,1,0,0,1,0,0,0,1,0,0,37.8,38.2,15300,0,1 +24,2,0,0,1,1,1,0,1,1,0,37.4,38.3,11700,0,0 +33,2,1,1,0,0,0,0,0,1,0,37.2,37.8,5000,0,0 +34,2,0,0,1,1,1,0,1,1,0,36.2,36.8,8800,0,0 +24,1,0,0,1,0,1,0,1,0,0,37.6,38.1,13600,0,1 +24,1,0,0,1,0,1,0,1,1,0,36.9,37.5,13100,0,1 +22,1,0,0,1,0,0,0,0,0,0,36.4,36.8,6100,0,0 +10,1,0,0,1,0,1,0,0,1,0,37.8,38.5,11900,0,1 +22,2,0,0,1,0,1,0,1,1,1,36.5,37.6,13600,0,1 +13,1,0,0,1,0,0,0,0,0,0,36.8,37.4,8600,0,1 +19,1,0,0,1,0,1,0,0,1,0,37.0,37.5,11900,0,1 +16,2,0,0,1,0,1,0,0,0,0,36.8,37.4,15200,0,1 +33,1,0,0,1,0,1,0,1,1,1,36.4,38.0,14000,0,1 +24,1,1,1,1,1,1,1,1,1,1,39.2,39.7,13300,0,1 +18,1,0,0,1,0,0,0,1,0,0,36.9,37.7,10800,0,1 +22,2,0,0,1,0,0,0,0,0,0,36.4,37.0,14200,0,1 +28,2,0,0,1,0,0,0,0,0,0,37.4,37.4,11200,0,1 +38,1,0,0,1,1,0,1,1,1,1,38.6,37.4,19000,0,1 +17,2,0,0,1,0,1,0,0,0,0,36.8,37.9,18100,0,1 +23,1,0,0,1,0,1,0,1,1,1,37.2,37.7,12900,0,1 +16,1,0,0,1,0,1,0,1,1,1,36.8,37.2,19000,0,1 +23,1,0,0,1,1,1,0,1,1,0,39.0,39.4,20300,0,1 +18,2,0,0,1,0,0,0,0,0,0,36.5,36.8,8100,0,0 +23,2,0,0,1,0,1,0,1,1,0,37.3,37.5,12700,0,1 +23,2,1,0,1,0,1,0,1,1,0,37.0,37.4,4500,0,0 +38,1,0,0,1,0,1,0,1,1,1,36.7,37.8,8700,0,1 +14,2,0,0,1,0,1,0,1,0,1,37.0,37.5,7800,0,1 +40,1,1,1,1,1,1,0,1,1,1,37.3,38.2,12000,0,0 +64,2,0,0,1,1,1,0,0,1,0,36.8,37.7,7200,0,1 +5,2,0,0,1,0,1,0,1,0,0,37.2,37.8,13500,0,1 +10,1,0,0,1,0,0,0,0,0,0,36.0,37.1,9900,0,0 +12,2,0,0,1,0,0,0,0,0,0,37.2,37.6,6300,0,0 +24,2,0,0,1,0,1,0,1,0,0,37.7,38.8,12300,0,0 +16,1,0,0,1,0,1,0,1,1,0,36.8,37.4,8200,0,1 +15,2,0,0,1,0,0,0,0,0,0,36.1,36.9,6600,0,0 +12,2,1,0,1,1,1,0,1,1,0,36.0,37.4,9600,0,1 +36,2,0,0,1,0,1,0,1,0,0,36.4,37.0,7900,0,0 +82,2,0,0,1,1,1,0,1,1,0,36.9,38.2,20900,1,1 +11,2,0,1,0,0,0,0,0,0,0,36.2,37.3,9100,0,0 +18,2,0,0,1,0,0,0,1,0,0,36.8,35.6,5200,0,0 +24,2,0,0,1,0,1,0,1,0,0,36.0,37.1,11800,0,1 +19,2,0,0,1,1,0,0,1,0,0,36.1,37.3,11100,0,1 +10,1,1,1,1,1,1,0,1,0,0,37.7,38.4,15000,0,0 +18,2,0,0,1,0,0,0,0,0,0,36.7,36.9,8900,0,0 +31,2,0,0,1,0,1,0,0,0,0,37.6,38.4,13600,0,1 +11,2,1,0,1,0,1,0,1,1,0,36.4,36.8,6100,0,1 +19,1,0,0,1,0,1,0,1,1,1,36.5,37.2,8600,0,1 +37,2,0,0,1,0,1,0,1,1,0,37.5,38.5,11100,0,1 +36,2,0,0,1,1,0,0,0,0,0,36.7,36.8,5800,0,0 +39,2,0,0,1,0,1,0,0,0,0,36.8,37.4,10900,0,0 +10,2,0,0,1,0,0,0,1,0,0,36.0,36.9,5600,0,0 +26,2,0,0,1,0,0,0,0,0,0,36.4,37.0,5200,0,1 +36,2,1,0,1,0,1,0,0,1,0,37.2,37.7,8500,0,1 +33,1,0,0,1,0,1,0,1,1,1,37.9,38.2,18400,0,1 +15,2,0,0,1,0,0,0,0,0,0,37.0,37.0,7700,0,0 +74,2,1,0,1,0,1,0,1,0,0,36.3,35.3,8100,0,0 +25,2,0,0,1,1,0,0,0,0,0,36.3,36.7,4500,0,0 +25,2,0,0,1,1,1,0,1,0,0,38.5,38.7,11600,0,1 +17,1,0,0,1,0,1,0,1,0,0,38.0,38.5,20400,0,1 +33,1,0,0,1,1,1,0,0,0,0,37.4,37.8,8800,0,0 +22,2,0,0,1,0,0,0,1,0,0,36.7,37.9,20000,0,1 +13,1,1,0,1,0,1,0,0,1,0,36.4,36.6,7500,0,0 +33,1,0,0,1,0,0,0,0,1,0,37.0,36.6,6700,0,0 +22,2,0,0,1,0,1,0,0,0,0,37.1,38.1,13000,0,0 +25,2,0,0,1,0,0,0,0,0,0,36.3,36.8,19200,0,0 +19,2,0,0,1,0,0,0,1,0,1,37.0,38.1,11000,0,0 +26,1,0,0,1,1,1,0,0,0,0,35.9,36.9,12000,0,1 +14,1,0,0,1,0,1,0,1,1,0,36.3,36.9,9400,0,0 +19,2,0,0,1,1,0,0,0,0,0,35.9,37.0,6600,0,0 +6,2,1,1,1,0,1,0,1,1,1,38.5,39.0,13000,0,1 +21,2,0,0,1,0,0,0,1,1,0,36.8,37.7,14000,0,0 +21,1,0,0,1,0,0,0,0,0,1,36.6,36.6,5100,0,0 +24,2,0,0,1,0,1,0,0,0,0,36.3,36.9,17700,0,0 +16,1,0,0,1,0,0,0,0,0,0,35.8,36.4,9000,0,0 +18,2,0,0,1,0,1,0,0,0,0,36.7,37.2,10300,0,0 +17,2,0,0,1,0,0,0,0,1,0,36.8,37.2,9600,0,0 +13,2,0,0,1,0,1,0,1,1,0,36.4,36.8,6300,0,0 +15,2,0,0,1,0,1,0,0,0,0,37.0,38.0,12000,0,0 +41,2,0,0,1,0,0,0,0,0,0,36.8,36.9,4900,0,0 +27,1,0,0,1,0,1,0,1,0,0,36.9,37.6,11000,0,1 +11,2,0,0,1,1,0,0,0,0,0,36.4,37.3,13200,0,1 +33,2,0,0,1,0,0,0,0,0,0,37.2,37.7,25000,0,1 +35,2,0,0,1,0,0,0,1,1,1,37.6,38.0,11700,0,1 +20,1,0,0,1,0,0,0,1,0,0,36.3,36.7,5300,0,0 +46,1,0,0,1,1,0,0,0,1,0,36.8,36.8,7500,0,0 +9,1,0,0,1,0,0,0,1,1,0,36.1,37.0,3000,0,0 +45,1,0,0,1,0,1,0,1,1,0,36.6,37.4,14400,0,1 +22,2,0,0,1,0,0,0,0,0,0,36.7,37.4,14800,0,0 +31,2,1,0,1,1,1,0,1,1,1,36.0,36.8,9100,0,1 +37,1,0,0,1,0,1,0,0,0,0,37.4,37.8,25200,0,1 +7,1,0,0,1,0,1,0,1,1,0,36.4,38.0,20400,0,1 \ No newline at end of file diff --git a/resources/app1.test b/resources/app1.test new file mode 100644 index 0000000..fc7a031 --- /dev/null +++ b/resources/app1.test @@ -0,0 +1,157 @@ +19,2,0,0,1,1,0,0,0,0,0,36.6,37.4,5700,0,0 +37,2,0,0,1,0,0,0,0,0,0,36.1,36.9,6000,0,0 +10,1,0,0,1,0,1,0,1,1,0,37.2,37.9,17500,0,1 +12,1,0,0,1,0,0,0,0,0,0,37.6,37.8,11900,0,1 +31,1,0,0,1,0,0,0,0,0,0,36.9,36.5,8400,0,0 +19,2,0,0,1,0,1,0,1,1,1,36.4,37.0,7800,0,0 +67,1,0,0,1,0,1,0,1,0,0,37.1,37.5,10400,1,1 +19,1,1,0,1,0,0,0,0,1,0,37.0,37.5,16000,0,1 +28,1,0,0,1,0,1,0,1,1,1,36.3,37.2,12100,0,1 +16,2,0,0,1,0,0,0,0,0,0,36.8,37.0,10700,0,1 +26,2,0,0,1,0,0,0,0,0,0,36.5,37.0,7300,0,0 +11,2,1,0,1,0,1,1,1,1,0,37.9,38.3,23100,0,1 +80,2,0,0,1,0,1,1,1,1,0,37.0,38.1,6500,0,1 +23,2,0,0,1,0,0,0,0,0,0,36.5,37.0,8600,0,0 +54,2,0,0,1,0,0,0,0,0,0,38.0,38.8,4200,0,1 +13,1,0,0,1,1,0,0,1,1,0,37.4,36.3,5800,0,1 +14,2,0,0,1,0,0,0,0,0,0,36.4,37.3,7900,0,0 +11,2,0,0,1,0,1,0,0,0,0,36.0,38.2,14800,0,0 +23,1,0,0,1,0,1,0,1,1,0,36.4,37.4,16100,0,1 +14,2,0,0,1,0,0,1,1,1,0,36.1,37.5,10700,0,1 +33,2,1,0,1,0,1,0,0,0,0,35.5,37.1,23000,0,1 +46,2,1,0,1,0,1,0,1,1,0,36.0,37.1,15000,0,1 +31,2,0,0,1,0,0,0,0,0,0,36.8,37.2,7800,0,0 +17,1,0,0,1,0,1,0,0,0,0,37.0,37.8,8000,0,1 +79,2,1,0,1,0,0,0,0,0,1,36.4,37.2,13900,0,1 +24,2,0,0,1,0,1,0,0,0,0,37.1,37.7,7600,0,0 +33,1,0,0,1,0,1,0,0,0,0,37.0,37.8,10900,0,0 +11,2,1,0,1,1,1,0,1,1,1,38.0,38.5,17000,0,1 +25,2,0,0,1,0,0,0,1,0,0,36.4,37.0,8800,0,1 +15,2,0,0,1,0,1,0,0,0,0,37.3,37.8,7600,0,0 +26,2,1,1,1,1,1,0,1,1,0,36.8,37.1,10400,0,1 +10,2,0,0,1,0,0,0,1,1,0,38.4,39.1,4500,0,0 +11,1,0,0,1,1,0,0,0,0,0,37.3,35.9,10100,0,0 +16,2,0,0,1,0,1,0,1,1,0,37.1,37.7,22000,0,1 +24,2,0,0,1,1,1,0,1,1,1,37.5,37.6,10600,0,1 +15,2,0,0,1,0,1,0,1,0,1,36.3,37.0,13800,0,0 +12,2,0,0,1,0,1,0,0,0,0,37.2,37.8,15000,0,1 +15,2,0,0,1,0,0,0,1,1,1,36.6,37.3,7500,0,1 +58,1,1,0,1,1,1,1,1,1,1,37.0,37.7,7600,0,1 +58,2,0,0,1,1,1,0,1,0,0,39.5,40.0,12000,0,1 +15,2,0,0,1,0,1,1,1,1,0,37.1,37.8,11300,0,0 +13,1,0,0,1,0,1,0,1,0,0,36.8,37.4,6300,0,1 +61,2,0,0,1,0,0,1,0,1,0,36.8,37.7,21000,0,0 +40,1,1,1,1,1,1,0,1,1,0,38.2,39.0,17900,0,0 +13,1,0,0,1,0,1,0,1,1,1,37.9,37.0,8900,0,1 +19,1,0,0,1,0,1,0,0,0,1,36.3,38.2,11600,0,0 +31,2,0,0,1,1,1,0,0,0,0,37.0,37.2,8000,0,1 +76,2,1,0,1,0,1,0,1,0,0,36.4,37.8,7900,0,1 +11,2,0,0,1,0,1,0,0,0,0,36.4,37.2,17300,0,1 +27,2,0,0,1,0,0,0,0,0,0,36.4,36.8,7600,0,0 +21,2,0,0,1,0,0,0,0,0,0,36.6,36.6,5500,0,1 +20,1,0,0,1,0,0,0,1,1,0,37.0,37.9,14000,0,1 +67,2,0,0,1,0,1,0,1,1,0,38.7,39.0,12300,0,1 +29,2,1,0,1,0,0,0,0,0,0,35.7,37.1,11800,0,1 +23,2,0,0,1,0,1,1,0,0,0,37.0,37.2,4000,0,0 +18,1,0,0,1,0,0,0,0,0,0,37.1,37.4,16500,0,1 +11,1,0,0,1,1,1,0,1,1,0,37.6,37.0,10200,0,1 +37,2,0,0,1,1,0,0,0,0,0,36.1,36.7,14200,0,0 +27,1,0,0,1,0,0,0,1,1,0,36.4,37.4,12400,0,1 +22,1,0,0,1,0,1,0,0,0,0,36.2,37.2,28100,0,1 +13,1,1,0,1,0,0,0,1,1,0,36.0,36.3,5200,0,0 +25,2,0,0,1,0,0,0,0,0,0,37.3,37.1,7800,0,0 +54,2,0,0,1,0,0,0,0,0,0,36.2,36.9,7800,0,0 +24,2,0,0,1,1,1,0,0,0,0,37.0,37.0,9100,0,0 +32,2,0,0,1,0,0,0,0,0,0,36.6,37.8,14700,0,1 +17,2,0,0,1,0,1,0,1,1,0,36.0,36.6,6400,0,1 +31,1,0,0,1,0,1,0,1,1,0,37.1,37.9,10000,0,1 +18,2,0,0,1,0,0,0,0,0,0,36.5,36.8,11700,0,0 +22,1,0,0,1,1,0,0,0,0,0,36.8,37.2,8000,0,0 +35,1,0,0,1,0,1,0,1,1,0,37.5,38.2,10400,0,1 +11,2,0,0,1,0,1,0,1,1,0,36.6,37.2,15100,0,1 +12,2,1,0,1,0,1,0,1,1,0,37.1,36.4,11900,0,1 +44,2,0,0,1,0,1,0,0,0,0,37.2,37.8,8300,0,0 +35,2,0,0,1,0,1,0,1,1,0,36.4,37.3,19100,0,1 +49,1,0,0,1,0,0,0,0,0,0,36.2,36.8,13800,0,1 +27,1,1,1,1,0,0,0,1,1,0,36.9,37.8,6600,0,0 +27,2,0,0,1,0,1,0,0,0,1,36.8,37.4,9200,0,0 +10,1,0,0,1,0,1,0,0,0,0,36.1,37.7,9700,0,0 +30,1,0,0,1,0,1,0,1,0,0,36.8,37.6,13600,0,1 +40,1,1,0,1,0,0,0,0,0,0,36.6,36.8,8700,0,1 +10,2,0,0,1,0,0,0,0,1,0,37.6,36.2,13900,0,1 +15,2,0,0,1,0,0,0,0,0,0,36.8,37.5,6100,0,1 +12,2,0,0,1,0,0,0,1,0,0,36.2,37.1,13700,0,1 +16,1,0,0,1,0,1,1,1,1,1,36.8,37.8,18000,0,1 +10,1,0,0,1,0,0,0,1,1,0,36.6,36.7,6000,0,1 +8,2,0,0,1,0,1,0,1,1,0,36.8,37.6,10000,0,0 +21,1,0,0,1,0,0,0,0,0,0,36.3,36.8,7800,0,0 +12,1,0,0,1,0,1,0,1,1,1,36.0,37.1,21400,0,1 +14,1,0,0,1,1,1,0,1,1,0,37.9,40.0,20100,0,1 +13,2,0,0,1,0,0,0,0,0,1,37.8,38.4,11000,0,0 +30,1,0,0,1,0,1,0,1,1,0,37.1,37.6,17000,0,1 +41,2,0,0,0,0,0,0,0,0,0,36.5,37.0,7900,0,0 +92,2,1,0,1,0,1,1,0,1,0,37.9,38.6,16200,0,0 +14,2,0,0,1,0,0,0,1,1,1,36.8,37.0,6800,0,0 +19,2,0,0,1,0,1,0,0,0,0,37.0,37.6,18700,0,1 +15,2,0,0,1,0,1,0,0,0,0,36.6,37.3,7000,0,0 +11,2,0,0,1,1,1,0,1,1,0,37.0,39.0,13000,0,1 +10,1,0,0,1,0,1,1,1,1,0,37.3,38.2,18100,0,1 +20,1,0,0,1,0,0,0,1,1,0,37.0,38.0,17600,0,1 +9,2,0,0,1,0,1,0,0,0,1,36.6,36.9,18600,0,1 +68,2,0,0,1,1,0,0,0,0,0,36.2,36.8,6100,0,0 +13,2,1,0,1,0,0,0,0,0,0,37.5,37.8,8300,0,0 +21,2,0,0,1,0,0,0,0,0,0,36.6,37.3,14800,0,0 +12,1,1,1,1,1,1,0,0,1,0,36.6,37.3,12000,0,1 +33,2,0,0,1,0,0,0,0,0,1,36.4,37.2,9200,0,0 +36,2,0,0,1,0,1,0,1,1,0,36.8,37.4,5500,0,0 +7,1,0,0,1,0,1,0,1,1,0,37.2,38.1,22500,0,1 +18,2,0,0,1,1,1,0,0,1,0,37.0,37.5,9000,0,1 +25,1,0,0,1,0,1,0,1,1,0,36.8,37.5,12900,0,1 +12,1,0,0,1,0,0,0,1,1,0,37.0,37.9,14900,0,1 +17,2,0,0,1,0,0,0,0,1,0,37.6,37.1,16700,0,1 +8,1,0,0,1,0,1,0,1,1,1,37.6,38.2,16800,0,1 +6,2,1,1,1,1,1,1,1,1,0,37.8,38.7,13700,0,0 +10,2,0,0,1,0,1,0,1,1,0,37.2,37.9,7700,0,0 +20,2,0,0,1,0,0,0,1,1,0,37.0,37.6,12500,0,1 +13,2,1,1,1,1,0,0,1,1,0,37.2,38.0,13100,0,0 +45,1,0,0,1,0,1,0,1,1,0,37.5,38.3,18900,0,0 +13,2,0,0,1,0,0,0,0,0,0,37.4,37.5,8200,0,0 +17,2,0,0,1,0,0,0,1,0,0,36.7,37.4,9000,0,0 +11,2,0,0,1,0,0,0,0,0,0,36.8,37.2,5200,0,1 +42,1,0,0,1,0,1,0,0,0,1,36.5,37.0,20800,0,1 +23,2,0,0,1,0,0,0,1,1,0,37.2,37.9,12900,0,1 +58,1,0,0,1,0,1,0,1,0,1,37.1,37.7,14700,0,0 +9,1,0,0,1,0,1,0,1,1,0,37.2,38.2,12000,0,1 +18,2,0,0,1,0,0,0,0,0,0,36.8,38.4,12000,0,0 +75,1,0,0,1,0,1,0,1,0,0,37.9,38.6,14900,0,1 +17,2,0,0,1,0,0,0,1,1,0,37.1,37.7,22300,0,1 +9,2,0,0,1,0,1,0,1,1,0,37.4,37.4,16000,0,1 +17,2,0,0,1,0,1,0,0,0,0,37.2,38.2,16000,0,0 +64,2,0,0,1,0,1,0,1,0,0,37.8,37.9,12600,0,1 +37,2,0,0,1,0,0,0,0,0,0,37.0,37.5,7800,0,0 +16,2,0,0,1,0,1,0,0,0,1,36.8,37.5,3600,1,0 +37,2,0,0,1,1,1,0,1,0,1,37.4,38.0,12400,0,0 +19,1,0,0,1,0,0,0,0,0,1,36.0,36.5,8900,0,0 +10,1,1,1,1,1,0,0,0,1,0,36.6,37.0,5600,0,0 +39,1,0,0,1,0,1,0,1,1,1,36.8,38.1,15200,0,1 +29,2,0,1,1,1,0,0,0,0,0,36.5,36.9,6600,0,0 +24,2,0,0,1,0,1,0,1,1,1,37.0,38.0,8900,0,1 +45,2,0,0,1,0,0,0,0,0,0,36.5,37.0,5300,0,0 +20,2,0,0,1,0,1,0,1,0,0,37.5,38.2,13200,0,1 +10,1,0,0,1,0,1,0,1,0,0,36.8,37.5,23300,0,1 +14,2,0,0,1,0,0,0,0,0,0,37.3,37.9,6900,0,0 +62,1,0,0,1,0,1,0,0,0,0,36.7,37.5,8500,0,1 +9,2,0,0,1,0,1,0,1,1,1,37.8,38.0,19300,0,1 +12,1,0,0,1,0,0,0,1,1,0,36.4,36.8,13700,0,0 +29,1,1,1,1,0,1,0,1,0,0,35.6,36.7,8200,0,1 +57,2,1,0,1,0,0,0,0,0,0,36.7,37.7,12500,0,0 +82,2,1,0,0,0,0,0,0,0,0,37.2,37.5,8600,0,0 +13,2,0,0,1,0,1,0,1,0,0,36.4,36.7,10300,0,1 +56,1,1,0,1,0,1,0,1,0,0,37.3,37.9,12600,0,1 +23,2,0,0,1,1,0,0,1,1,0,36.5,37.5,11400,0,1 +31,1,0,0,1,0,0,0,0,0,0,35.2,37.0,14400,0,0 +9,2,0,0,1,1,0,0,0,0,0,36.5,37.0,7600,0,1 +16,2,0,0,1,0,1,0,1,1,0,36.8,37.6,23300,0,1 +16,2,0,0,1,0,0,0,0,0,0,36.8,37.3,22100,0,1 +32,2,1,0,1,1,1,0,1,1,0,36.6,37.6,12800,0,1 +34,1,1,1,1,0,1,0,1,1,0,36.6,37.5,12200,0,1 \ No newline at end of file diff --git a/src/machine_learning/DataClass.java b/src/machine_learning/DataClass.java new file mode 100644 index 0000000..98eaf66 --- /dev/null +++ b/src/machine_learning/DataClass.java @@ -0,0 +1,12 @@ +package machine_learning; + +public enum DataClass +{ + NEGATIVE, + POSITIVE; + + public static DataClass valueOf(int i) + { + return i == 0 ? NEGATIVE : POSITIVE; + } +} diff --git a/src/machine_learning/MachineLearning.java b/src/machine_learning/MachineLearning.java new file mode 100644 index 0000000..3a4bcbd --- /dev/null +++ b/src/machine_learning/MachineLearning.java @@ -0,0 +1,9 @@ +package machine_learning; + +import java.util.List; + +public interface MachineLearning +{ + void learn(List positives, List negatives); + DataClass classify(Vector toClassify); +} diff --git a/src/machine_learning/Vector.java b/src/machine_learning/Vector.java index 5a8fc3f..e061574 100644 --- a/src/machine_learning/Vector.java +++ b/src/machine_learning/Vector.java @@ -95,6 +95,16 @@ public class Vector return this.values.get(index); } + public Vector decreasedDimension() + { + return new Vector(this.values.subList(0, this.dimension()-1)); + } + + public Vector normalized() + { + return this.divide(this.euclid()); + } + @Override public boolean equals(Object o) { diff --git a/src/machine_learning/nearest_neighbour/CrossValidation.java b/src/machine_learning/nearest_neighbour/CrossValidation.java new file mode 100644 index 0000000..f19f4d6 --- /dev/null +++ b/src/machine_learning/nearest_neighbour/CrossValidation.java @@ -0,0 +1,106 @@ +package machine_learning.nearest_neighbour; + +import machine_learning.DataClass; +import machine_learning.MachineLearning; +import machine_learning.Vector; + +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class CrossValidation +{ + private int paramMin; + private int paramMax; + + public CrossValidation(int paramMin, int paramMax) + { + this.paramMin = paramMin; + this.paramMax = paramMax; + } + + public KNearestNeighbour validate(List data, int chunkSize) + { + Collections.shuffle(data); + var counter = new AtomicInteger(0); + var chunks = data.stream() + .collect(Collectors.groupingBy(v -> counter.getAndIncrement() / (data.size() / chunkSize))) + .values(); + + var averageFailRates = new HashMap(); + + IntStream.range(paramMin, paramMax).forEach(i -> { + + var failRate = new AtomicReference<>(0d); + + chunks.forEach(chunk -> { + var dataWithoutChunk = data.parallelStream() + .filter(v -> !chunk.contains(v)) + .collect(Collectors.toList()); + + var mapOfClasses = splitIntoClasses(dataWithoutChunk); + var negatives = mapOfClasses.get(DataClass.NEGATIVE); + var positives = mapOfClasses.get(DataClass.POSITIVE); + + var kNearestNeighbour = new KNearestNeighbour(Vector::distance, i); + kNearestNeighbour.learn(positives, negatives); + + var failCount = 0; + + for (var vector : chunk) + { + var expectedClass = DataClass.valueOf(Double.valueOf(vector.get(vector.dimension() - 1)).intValue()); + + var testVector = vector.decreasedDimension().normalized(); + var actualClass = kNearestNeighbour.classify(testVector); + + if (expectedClass != actualClass) + { + failCount++; + } + } + + failRate.set(failRate.get() + failCount * 1d / chunk.size()); + }); + + averageFailRates.put(failRate.get() / chunkSize, i); + }); + + var optimalParam = averageFailRates.get(averageFailRates.keySet().stream().min(Double::compareTo).get()); + var finalKNearestNeighbour = new KNearestNeighbour(Vector::distance, optimalParam); + + System.out.println("Optimaler Parameter k = " + optimalParam + " mit Fehlerrate " + averageFailRates.keySet().stream().min(Double::compareTo).get()*100 + " %"); + + var classes = splitIntoClasses(data); + var negatives = classes.get(DataClass.NEGATIVE); + var positives = classes.get(DataClass.POSITIVE); + finalKNearestNeighbour.learn(positives, negatives); + + return finalKNearestNeighbour; + } + + private Map> splitIntoClasses(List data) + { + var positives = data.parallelStream() + .filter(v -> v.get(v.dimension()-1) == 1) + .collect(Collectors.toList()); + + var negatives = data.parallelStream() + .filter(v -> v.get(v.dimension()-1) == 0) + .collect(Collectors.toList()); + + positives = positives.parallelStream() + .map(Vector::decreasedDimension) + .map(Vector::normalized) + .collect(Collectors.toList()); + + negatives = negatives.parallelStream() + .map(Vector::decreasedDimension) + .map(Vector::normalized) + .collect(Collectors.toList()); + + return Map.ofEntries(Map.entry(DataClass.NEGATIVE, negatives), Map.entry(DataClass.POSITIVE, positives)); + } +} diff --git a/src/machine_learning/nearest_neighbour/DataClass.java b/src/machine_learning/nearest_neighbour/DataClass.java deleted file mode 100644 index c095351..0000000 --- a/src/machine_learning/nearest_neighbour/DataClass.java +++ /dev/null @@ -1,7 +0,0 @@ -package machine_learning.nearest_neighbour; - -public enum DataClass -{ - POSITIVE, - NEGATIVE -} diff --git a/src/machine_learning/nearest_neighbour/KNearestNeighbour.java b/src/machine_learning/nearest_neighbour/KNearestNeighbour.java index e63faac..2dea8c7 100644 --- a/src/machine_learning/nearest_neighbour/KNearestNeighbour.java +++ b/src/machine_learning/nearest_neighbour/KNearestNeighbour.java @@ -1,5 +1,7 @@ package machine_learning.nearest_neighbour; +import machine_learning.DataClass; +import machine_learning.MachineLearning; import machine_learning.Vector; import java.util.List; @@ -8,8 +10,11 @@ import java.util.Random; import java.util.stream.Collectors; import java.util.stream.Stream; -public class KNearestNeighbour +public class KNearestNeighbour implements MachineLearning { + private List positives; + private List negatives; + private Distance distance; private int k; @@ -25,20 +30,26 @@ public class KNearestNeighbour this.k = k; } - public DataClass kNearestNeighbour(List positives, List negatives, Vector toClassify) + public void learn(List positives, List negatives) + { + this.positives = positives; + this.negatives = negatives; + } + + public DataClass classify(Vector toClassify) { var nearestNeighbours = this.nearestNeighbours( - Stream.concat(positives.stream(), negatives.stream()) + Stream.concat(this.positives.stream(), this.negatives.stream()) .collect(Collectors.toList()), toClassify ); var positivesWithNearestNeighboursAmount = nearestNeighbours.stream() - .filter(positives::contains) + .filter(this.positives::contains) .count(); var negativesWithNearestNeighboursAmount = nearestNeighbours.stream() - .filter(negatives::contains) + .filter(this.negatives::contains) .count(); if (positivesWithNearestNeighboursAmount > negativesWithNearestNeighboursAmount) @@ -55,13 +66,12 @@ public class KNearestNeighbour private List nearestNeighbours(List vectors, Vector vector) { - var nearestNeighbours = vectors.stream() + return vectors.parallelStream() .map(v -> Map.entry(this.distance.distance(v, vector), v)) .sorted((e1, e2) -> e1.getKey() >= e2.getKey() ? (e1.getKey().equals(e2.getKey()) ? 0 : 1) : -1) .map(Map.Entry::getValue) - .collect(Collectors.toList()); - - return nearestNeighbours.subList(0, this.k); + .collect(Collectors.toList()) + .subList(0, this.k); } } diff --git a/src/machine_learning/perceptron/Perceptron.java b/src/machine_learning/perceptron/Perceptron.java index 4f55b1b..53382a6 100644 --- a/src/machine_learning/perceptron/Perceptron.java +++ b/src/machine_learning/perceptron/Perceptron.java @@ -1,11 +1,12 @@ package machine_learning.perceptron; +import machine_learning.MachineLearning; import machine_learning.Vector; -import machine_learning.nearest_neighbour.DataClass; +import machine_learning.DataClass; import java.util.List; -public class Perceptron +public class Perceptron implements MachineLearning { private Vector weight; diff --git a/test/machine_learning/VectorTest.java b/test/machine_learning/VectorTest.java index e838cab..49d4446 100644 --- a/test/machine_learning/VectorTest.java +++ b/test/machine_learning/VectorTest.java @@ -24,10 +24,10 @@ class VectorTest var v1 = new Vector(1d, 2d); var v2 = new Vector(3d, 4d); - var result = v1.add(v2); + var actual = v1.add(v2); var expected = new Vector(4d, 6d); - assertEquals(expected, result); + assertEquals(expected, actual); } @Test @@ -36,10 +36,10 @@ class VectorTest var v1 = new Vector(1d, 2d); var v2 = new Vector(3d, 4d); - var result = v1.subtract(v2); + var actual = v1.subtract(v2); var expected = new Vector(-2d, -2d); - assertEquals(expected, result); + assertEquals(expected, actual); } @Test @@ -48,10 +48,10 @@ class VectorTest var v1 = new Vector(1d, 2d); var v2 = new Vector(3d, 4d); - var result = v1.scalar(v2); + var actual = v1.scalar(v2); var expected = 11d; - assertEquals(expected, result); + assertEquals(expected, actual); } @Test @@ -59,10 +59,10 @@ class VectorTest { var v1 = new Vector(1d, 2d); - var result = v1.euclid(); + var actual = v1.euclid(); var expected = Math.sqrt(5); - assertEquals(expected, result); + assertEquals(expected, actual); } @Test @@ -71,10 +71,10 @@ class VectorTest var v1 = new Vector(1d, 2d); var v2 = new Vector(3d, 4d); - var result = v1.distance(v2); + var actual = v1.distance(v2); var expected = Math.sqrt(8); - assertEquals(expected, result); + assertEquals(expected, actual); } @Test @@ -83,9 +83,34 @@ class VectorTest var v1 = new Vector(1d, 2d); var div = 2d; - var result = v1.divide(div); + var actual = v1.divide(div); var expected = new Vector(0.5d, 1d); - assertEquals(expected, result); + assertEquals(expected, actual); + } + + @Test + void shouldDecreaseDimensionCorrect() + { + var v = new Vector(1d, 2d, 3d, 4d); + + var decreasedDimensionVector = v.decreasedDimension(); + + var actual = decreasedDimensionVector.dimension(); + var expected = 3; + + assertEquals(expected, actual); + } + + @Test + void shouldNormalizeCorrect() + { + var v = new Vector(4d, 4d, 4d, 4d); + + var actual = v.normalized(); + + var expected = new Vector(0.5d, 0.5d, 0.5d, 0.5d); + + assertEquals(expected, actual); } } \ No newline at end of file diff --git a/test/machine_learning/nearest_neighbour/KNearestNeighbourTest.java b/test/machine_learning/nearest_neighbour/KNearestNeighbourTest.java index 3b4046c..6f5b629 100644 --- a/test/machine_learning/nearest_neighbour/KNearestNeighbourTest.java +++ b/test/machine_learning/nearest_neighbour/KNearestNeighbourTest.java @@ -1,13 +1,20 @@ package machine_learning.nearest_neighbour; +import machine_learning.DataClass; import machine_learning.Vector; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import org.opentest4j.AssertionFailedError; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.*; @@ -40,9 +47,10 @@ class KNearestNeighbourTest public void shouldReturnCorrectClassForVectorWithKEquals3() { var kNearestNeighbour = new KNearestNeighbour((a ,b) -> Math.abs(a.get(0) - b.get(0)) + Math.abs(a.get(1) - b.get(1)), 3); + kNearestNeighbour.learn(this.positives, this.negatives); var vector = new Vector(8, 3.5); - var actualClass = kNearestNeighbour.kNearestNeighbour(this.positives, this.negatives, vector); + var actualClass = kNearestNeighbour.classify(vector); var expectedClass = DataClass.NEGATIVE; assertEquals(expectedClass, actualClass); @@ -52,11 +60,118 @@ class KNearestNeighbourTest public void shouldReturnCorrectClassForVectorWithKEquals5() { var kNearestNeighbour = new KNearestNeighbour((a ,b) -> Math.abs(a.get(0) - b.get(0)) + Math.abs(a.get(1) - b.get(1)), 5); + kNearestNeighbour.learn(this.positives, this.negatives); var vector = new Vector(8, 3.5); - var actualClass = kNearestNeighbour.kNearestNeighbour(this.positives, this.negatives, vector); + var actualClass = kNearestNeighbour.classify(vector); var expectedClass = DataClass.POSITIVE; assertEquals(expectedClass, actualClass); } + + @Test + public void shouldReturnCorrectClassesForAppendicitisData() + { + var trainDataFile = "./resources/app1.data"; + var testDataFile = "./resources/app1.test"; + + var trainDataVectors = readFromFile(trainDataFile); + + var dataClasses = splitIntoClasses(trainDataVectors); + var negatives = dataClasses.get(DataClass.NEGATIVE); + var positives = dataClasses.get(DataClass.POSITIVE); + + var kNearestNeighbour = new KNearestNeighbour(Vector::distance); + kNearestNeighbour.learn(positives, negatives); + + var testDataVectors = readFromFile(testDataFile); + var failCount = 0; + + for (var vector : testDataVectors) + { + var expectedClass = DataClass.valueOf(Double.valueOf(vector.get(vector.dimension() - 1)).intValue()); + + var testVector = vector.decreasedDimension(); + + var actualClass = kNearestNeighbour.classify(testVector.normalized()); + + try + { + assertEquals(expectedClass, actualClass); + } + catch (AssertionFailedError e) + { + failCount++; + } + } + + System.out.println(failCount + " of " + testDataVectors.size() + " are not correct classified."); + System.out.println("Fail rate of " + Math.round(100d * failCount / testDataVectors.size()) + " %"); + + } + + @Test + public void shouldReturnOptimum() + { + var trainDataFile = "./resources/app1.data"; + var testDataFile = "./resources/app1.test"; + + var trainDataVectors = readFromFile(trainDataFile); + var testDataVectors = readFromFile(testDataFile); + var data = Stream.concat(trainDataVectors.stream(), testDataVectors.stream()) + .collect(Collectors.toList()); + + var crossValidation = new CrossValidation(1, 100); + + var kNearestNeighbour = crossValidation.validate(data, data.size()); + } + + private List readFromFile(String file) + { + List vectorList = new ArrayList<>(); + + try (var reader = new BufferedReader(new FileReader(file))) + { + String line; + + while ((line = reader.readLine()) != null) + { + vectorList.add(new Vector( + Arrays.stream(line.split(",")) + .map(Double::valueOf) + .collect(Collectors.toList()) + )); + } + + } + catch (IOException e) + { + e.printStackTrace(); + } + + return vectorList; + } + + private Map> splitIntoClasses(List data) + { + var positives = data.stream() + .filter(v -> v.get(v.dimension()-1) == 1) + .collect(Collectors.toList()); + + var negatives = data.stream() + .filter(v -> v.get(v.dimension()-1) == 0) + .collect(Collectors.toList()); + + positives = positives.stream() + .map(Vector::decreasedDimension) + .map(Vector::normalized) + .collect(Collectors.toList()); + + negatives = negatives.stream() + .map(Vector::decreasedDimension) + .map(Vector::normalized) + .collect(Collectors.toList()); + + return Map.ofEntries(Map.entry(DataClass.NEGATIVE, negatives), Map.entry(DataClass.POSITIVE, positives)); + } } \ No newline at end of file diff --git a/test/machine_learning/perceptron/PerceptronTest.java b/test/machine_learning/perceptron/PerceptronTest.java index 36cdaf3..4e3ae25 100644 --- a/test/machine_learning/perceptron/PerceptronTest.java +++ b/test/machine_learning/perceptron/PerceptronTest.java @@ -1,8 +1,7 @@ package machine_learning.perceptron; import machine_learning.Vector; -import machine_learning.nearest_neighbour.DataClass; -import org.junit.jupiter.api.Assertions; +import machine_learning.DataClass; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance;