import logging, sys basic_with_time_format = '%(asctime)s:%(levelname)s:%(name)s:%(message)s' logging_fhandler = logging.FileHandler("cowrie_malware_enrichment.log") logging_fhandler.setFormatter(logging.Formatter(basic_with_time_format)) logging_fhandler.setLevel(logging.WARNING) stdout_handler = logging.StreamHandler(stream = sys.stdout) stdout_handler.setLevel(logging.DEBUG) logging.root.addHandler(logging_fhandler) logging.root.addHandler(stdout_handler) logging.root.setLevel(logging.DEBUG) import json import datetime import requests import argparse import os import ipaddress from functools import lru_cache parser = argparse.ArgumentParser(description='DShield Honeypot Cowrie Data Identifiers') parser.add_argument('--filepath', dest='filepath', type=str, help='Path of cowrie json log file', default='/srv/cowrie/var/log/cowrie/cowrie.json') parser.add_argument('--directory', dest='directory', type=str, help='Path of cowrie json log files', default=None) parser.add_argument('--vtapi', dest='vtapi', type=str, help='VirusTotal API key (required for VT data lookup)') parser.add_argument('--timespan', dest='timespan', type=int, help='Number of seconds in the past to look for data (60 would be any data logged in the last minute)', default=None) args = parser.parse_args() filename = args.filepath directory = args.directory if directory is not None: if directory.endswith("/"): directory = directory[:-1] vt_api = args.vtapi timespan = args.timespan vt_session = requests.session() def find_cowrie_malware(filename, timespan = None): existing_hashes_logged = find_exising_logs() if timespan is not None: if not isinstance(timespan, int): raise TypeError("Timespan supplied for comparison must be an integer!") cowrie_data = [] logging.debug(f"Processing file {filename}") with open(filename, "r") as file: for each_line in file: json_data = json.loads(each_line) cowrie_data.append (json_data) for each_log in cowrie_data: if each_log["eventid"] == "cowrie.session.file_download" or each_log["eventid"] == "cowrie.session.file_upload": timestamp = datetime.datetime.strptime(each_log['timestamp'], "%Y-%m-%dT%H:%M:%S.%fZ") if timespan == None or timespan > (datetime.datetime.now() - timestamp).total_seconds(): logging.debug(f"{each_log['eventid']} found in session {each_log['session']} at {each_log['timestamp']}: hash {each_log['shasum']}") if each_log['shasum'] not in existing_hashes_logged: vt_lookup(vt_api, each_log['shasum']) def find_exising_logs(filename="vt_data"): hashes = set() if os.path.exists(filename): with open(filename, "r") as file: for each_line in file: try: json_data = json.loads(each_line) if "hash" in json_data: hashes.add(json_data["hash"]) except: logging.error(f"Issue reading json from {filename}. Maybe missing data: '{each_line}'") return hashes @lru_cache def vt_lookup(vt_api, hash="a8460f446be540410004b1a8db4083773fa46f7fe76fa84219c93daa1669f8f2"): logging.info(f"Starting VT lookup for {hash}") vt_data = {} vt_data["hash"] = hash vt_session.headers = {'X-Apikey': vt_api} url = "https://www.virustotal.com/api/v3/files/" + hash response = vt_session.get(url) json_response = json.loads(response.text) logging.debug(response.text) if "error" in json_response: for key, value in json_response["error"].items(): logging.error(f"VT Error for hash '{hash}': {value}") elif "data" in json_response: if "attributes" in json_response["data"]: if "last_analysis_stats" in json_response["data"]["attributes"]: for key, value in json_response["data"]["attributes"]["last_analysis_stats"].items(): vt_data[key] = value if "last_analysis_date" in json_response["data"]["attributes"]: vt_data["last_analysis_date"] = json_response["data"]["attributes"]["last_analysis_date"] if "trid" in json_response["data"]["attributes"]: vt_data["filetype"] = json_response["data"]["attributes"]["trid"][0]["file_type"] if "type_tag" in json_response["data"]["attributes"]: vt_data["typetag"] = json_response["data"]["attributes"]["type_tag"] if "type_description" in json_response["data"]["attributes"]: vt_data["description"] = json_response["data"]["attributes"]["type_description"] if "meaningful_name" in json_response["data"]["attributes"]: vt_data["filename"] = json_response["data"]["attributes"]["meaningful_name"] if "popular_threat_classification" in json_response["data"]["attributes"]: vt_data["classification"] = json_response["data"]["attributes"]["popular_threat_classification"]["suggested_threat_label"] filehandle = open("vt_data", "a") filehandle.write(json.dumps(vt_data) +"\n") filehandle.close() if directory is not None: for each_file in os.listdir(directory): if "cowrie.json" in each_file: find_cowrie_malware(f"{directory}/{each_file}", timespan) else: find_cowrie_malware(filename, timespan) vt_session.close()