@article{Maabreh_Obeidat_Abu Elsoud_Alnajjar_Alzyoud_Darwish_2022, title={Towards Data-Driven Network Intrusion Detection Systems: Features Dimensionality Reduction and Machine Learning}, volume={16}, url={https://online-journals.org/index.php/i-jim/article/view/30197}, DOI={10.3991/ijim.v16i14.30197}, abstractNote={<p>Cyberattacks have increased in tandem with the exponential expansion <br>of computer networks and network applications throughout the world. In this study, <br>we evaluate and compare four features selection methods, seven classical machine <br>learning algorithms, and the deep learning algorithm on one million random instances <br>of CSE-CIC-IDS2018 big data set for network intrusions. The dataset was <br>preprocessed and cleaned and all learning algorithms were trained on the original <br>values of features. The feature selection methods highlighted the importance of <br>features related to forwarding direction (FWD) and two flow measures (FLOW) in <br>predicting the binary traffic type; benign or attack. Furthermore, the results revealed <br>that whether models are trained on all features or the top 30 features selected by any <br>of the four features selection techniques used in this experiment, there is no significant <br>difference in model performance. Moreover, we may be able to train ML models on <br>only four features and have them perform similarly to models trained on all data,<br>which may result in preferable models in terms of complexity, explainability, and <br>scale for deployment. Furthermore, by choosing four unanimity features instead of all <br>traffic features, training time may be reduced from 10% to 50% of the training time <br>on all features.</p>}, number={14}, journal={International Journal of Interactive Mobile Technologies (iJIM)}, author={Maabreh, Majdi and Obeidat, Ibrahim and Abu Elsoud , Esraa and Alnajjar, Asma and Alzyoud, Rahaf and Darwish, Omar}, year={2022}, month={Jul.}, pages={pp. 123–135} }