columbo/tools.py at master · visma-prodsec/columbo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from warnings import simplefilter
import ntpath
import os

import pandas as pd
import pickle
import run
from colorama import Fore
from pandas import DataFrame

# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)
dir_path = os.path.dirname(os.path.realpath(__file__))


def input_path():
    """
    Check path of the mounted system image and return nothing.

    """
    print(
        Fore.GREEN + 'Provide full path of mounted system image (.vhdx) ' + Fore.YELLOW +
        'e.g. F:\C\Windows or F:\C ')
    print(Fore.GREEN)
    path = str(input('Path:')).strip()
    mount = path[0:2]
    # print (mount)
    if ntpath.ismount(mount):
        # print (mount +  ' is mounted')
        if path == mount + '\C':
            sig_scan(path)
        else:
            sig_scan(path)
    else:
        print(Fore.YELLOW + '\nError -provide correct path. Mounted system image -try again \n')
        input_path()
    return 0


def sig_scan(path):
    """
    Receives the location of the mounted files and runs sigcheck.exe and save the output for later analysis

    """
    dir_path = os.path.dirname(os.path.realpath(__file__))

    sigcheck = dir_path + r'\bin\sigcheck.exe'
    options = '-s -c -e -h -v -vt -w -nobanner'
    save = dir_path + r'\csvFiles\sigcheckToOrganise.csv'
    sig_cmd = sigcheck + ' ' + options + ' ' + save + ' ' + path

    print(Fore.YELLOW + '\nThis execution might take some time....')
    os.system(sig_cmd)

    if os.stat(dir_path + r'\csvFiles\sigcheckToOrganise.csv').st_size <= 317:
        print('Try again\n')
        input_path()
    else:
        analysis()

    return 0


def analysis():
    """
    Analyse the output generated by sigcheck.exe using Machine Learning

    """
    save = dir_path + r'\csvFiles\sigcheckToOrganise.csv'
    sigs = pd.read_csv(save, encoding='utf-16', delimiter=',')
    bigdata = sigs[['Path', 'Verified', 'Machine Type', 'Publisher', 'Description', 'MD5', 'VT detection']]
    organised = DataFrame(bigdata)
    path_organised = organised['Path']
    df1 = organised.loc[organised['Verified'] == 'Unsigned']
    df1 = DataFrame(df1)

    # ML part #

    filename = dir_path + r'\ML\cmdModel.sav'
    vectfile = dir_path + r'\ML\vecFile.sav'

    se_model = pickle.load(open(filename, 'rb'))
    load_vect = pickle.load(open(vectfile, 'rb'))
    text = load_vect.transform(path_organised)
    print_this = se_model.predict(text)
    print_prob = se_model.predict_proba(text) * 100
    listdf = pd.DataFrame(print_this)
    line_pr = pd.DataFrame(data=print_prob)
    linesdf = pd.DataFrame(path_organised)
    listdf = listdf.rename(columns={0: 'ML-Output'})
    linesdf = linesdf.rename(columns={0: 'path'})

    result = pd.concat([linesdf, listdf, line_pr], axis=1, sort=False)
    re = result.sort_values(by='ML-Output', ascending=False)
    re = pd.DataFrame(re)
    dff2 = re.loc[re['ML-Output'] == 1]
    pd.DataFrame(dff2).to_excel(dir_path + r'\ML\Step-3-results' + r'\suspicious_paths.xlsx', index=False)
    pd.DataFrame(re).to_excel(dir_path + r'\ML\Step-3-results' + r'\all_paths.xlsx', index=False)

    if df1.empty:
        print(Fore.YELLOW + 'Nothing verified to be suspicious')
        if pd.DataFrame(dff2).empty:
            print(Fore.YELLOW + '\nMachine Leaning does not find ' + Fore.RED + 'suspicious paths')
            print(
                Fore.YELLOW + 'Please Check both excel files under ' + Fore.GREEN + dir_path + r'\ML\Step-3-results\all_paths'
                                                                                               r'.xlsx ' + Fore.YELLOW + 'and '
                + Fore.GREEN + dir_path + '\ML\Step-3-results\suspicious_paths.xlsx ' + Fore.YELLOW + 'for further information')
            print(
                '\nJust one more thing, make sure you consider the probability facts of both 1 and 0 before selecting '
                'anything for further investigation. ')
        else:
            dff2 = dff2.to_string(index=False, header=True)
            print(Fore.WHITE)
            print(dff2)
            print(Fore.YELLOW + '\nMachine Learning found above to be ' + Fore.RED + 'suspicious paths')
            print(
                Fore.YELLOW + 'Please Check both excel files under ' + Fore.GREEN + dir_path + r'\ML\Step-3-results\all_paths'
                                                                                               r'.xlsx ' + Fore.YELLOW + 'and '
                + Fore.GREEN + dir_path + '\ML\Step-3-results\suspicious_paths.xlsx ' + Fore.YELLOW + 'for further information')
            print(
                '\nJust one more thing, make sure you consider the probability facts of both 1 and 0 before selecting '
                'anything for further investigation. ')
    else:
        df1 = df1.to_string(index=False, header=True)
        print(Fore.WHITE)
        print(df1)
        print(Fore.YELLOW + '\nAbove values are selected, because they are not verified (Unsigned).')
        if pd.DataFrame(dff2).empty:
            # print (dff2) # this should be removed, only for testing
            print('\n')
            print(Fore.YELLOW + '\nMachine Leaning does not find ' + Fore.RED + 'suspicious paths')
            print(
                Fore.YELLOW + 'Please Check both excel files under ' + Fore.GREEN + dir_path + r'\ML\Step-3-results\all_paths'
                                                                                               r'.xlsx ' + Fore.YELLOW + 'and '
                + Fore.GREEN + dir_path + '.\ML\Step-3-results\suspicious_paths.xlsx ' + Fore.YELLOW + 'for further information')
            print(
                '\nJust one more thing, make sure you consider the probability facts of both 1 and 0 before selecting '
                'anything for further investigation. ')
        else:
            dff2 = dff2.to_string(index=False, header=True)
            print(Fore.WHITE)
            print(dff2)
            print(Fore.YELLOW + '\nMachine Learning found above to be ' + Fore.RED + 'suspicious paths')
            print(
                Fore.YELLOW + 'Please Check both excel files under ' + Fore.GREEN + dir_path + r'\ML\Step-3-results\all_paths'
                                                                                               r'.xlsx ' + Fore.YELLOW + 'and '
                + Fore.GREEN + dir_path + '\ML\Step-3-results\suspicious_paths.xlsx ' + Fore.YELLOW + 'for further information')
            print(
                '\nJust one more thing, make sure you consider the probability facts of both 1 and 0 before selecting '
                'anything for further investigation. ')
    os.remove(save)

    return run.user_input()