# LocalOsquery Data Provider

https://msticpy.readthedocs.io/en/v1.1.0/data_acquisition/DataProviders.html#using-local-data-the-localdata-provider

## Imports

In [1]:
#Check we are running Python 3.6
import sys
MIN_REQ_PYTHON = (3,6)
if sys.version_info < MIN_REQ_PYTHON:
    print('Check the Kernel->Change Kernel menu and ensure that Python 3.6')
    print('or later is selected as the active kernel.')
    sys.exit("Python %s.%s or later is required.\n" % MIN_REQ_PYTHON)

#imports
import json
import yaml
import msticpy.nbtools as nbtools

#data library imports
from msticpy.data.data_providers import QueryProvider
import msticpy.nbtools as mas

print('Imports Complete')

Imports Complete


## Variables

In [2]:
# directory with osqueryd.results.log or other *.log files
# Tested with single file (osqueryd.results.log) and double (osqueryd.results.log + osqueryd.snapshots.log)
datadir = "/path/to/var/log/osquery"
# directory with queries yaml file
query_path = "/path/to"

## Load Data

In [3]:
# Specify path to look for data files
data_path = datadir
qry_prov = QueryProvider("LocalOsquery",
                         data_paths=[data_path],
                         query_paths=[query_path]
                        )

In [4]:
%%time
# Show the schema of the data files read in
# Slow for log file ~1MB
print(qry_prov.schema)

{'pack_osquery-custom-pack2_processes': {'name': 'object', 'hostIdentifier': 'object', 'calendarTime': 'object', 'unixTime': 'datetime64[ns]', 'epoch': 'int64', 'counter': 'int64', 'numerics': 'bool', 'action': 'object', 'decorations_host_uuid': 'object', 'decorations_username': 'object', 'columns_cmdline': 'object', 'columns_euid': 'object', 'columns_name': 'object', 'columns_parent': 'object', 'columns_path': 'object', 'columns_pcmdline': 'object', 'columns_pid': 'object', 'columns_uid': 'object', 'columns_username': 'object'}, 'pack_osquery-custom-pack2_process_binding_to_ports': {'name': 'object', 'hostIdentifier': 'object', 'calendarTime': 'object', 'unixTime': 'datetime64[ns]', 'epoch': 'int64', 'counter': 'int64', 'numerics': 'bool', 'action': 'object', 'decorations_host_uuid': 'object', 'decorations_username': 'object', 'columns_name': 'object', 'columns_pid': 'object', 'columns_port': 'object', 'columns_protocol': 'object'}, 'pack_osquery-monitoring_osquery_info': {'name': 'ob

In [5]:
print(json.dumps(qry_prov.schema, indent=2))

{
  "pack_osquery-custom-pack2_processes": {
    "name": "object",
    "hostIdentifier": "object",
    "calendarTime": "object",
    "unixTime": "datetime64[ns]",
    "epoch": "int64",
    "counter": "int64",
    "numerics": "bool",
    "action": "object",
    "decorations_host_uuid": "object",
    "decorations_username": "object",
    "columns_cmdline": "object",
    "columns_euid": "object",
    "columns_name": "object",
    "columns_parent": "object",
    "columns_path": "object",
    "columns_pcmdline": "object",
    "columns_pid": "object",
    "columns_uid": "object",
    "columns_username": "object"
  },
  "pack_osquery-custom-pack2_process_binding_to_ports": {
    "name": "object",
    "hostIdentifier": "object",
    "calendarTime": "object",
    "unixTime": "datetime64[ns]",
    "epoch": "int64",
    "counter": "int64",
    "numerics": "bool",
    "action": "object",
    "decorations_host_uuid": "object",
    "decorations_username": "object",
    "columns_name": "object",
    

In [6]:
qry_prov.list_queries()

['file.deb_packages',
 'file.fim',
 'linux.deb_packages',
 'linux.fim',
 'linux.osquery_info',
 'linux.outbound_connections',
 'linux.process_binding_to_ports',
 'linux.processes',
 'linux.shell_history',
 'network.outbound_connections',
 'network.process_binding_to_ports',
 'process.process_binding_to_ports',
 'process.processes',
 'shell.shell_history']

In [7]:
%%time
df_fim = qry_prov.linux.fim()
df_fim.head(1)

CPU times: user 2min 44s, sys: 26.8 ms, total: 2min 44s
Wall time: 2min 45s


Unnamed: 0,name,hostIdentifier,calendarTime,unixTime,epoch,counter,numerics,action,decorations_host_uuid,decorations_username,...,columns_action,columns_atime,columns_category,columns_ctime,columns_mode,columns_mtime,columns_sha256,columns_size,columns_target_path,columns_time
793,fim,HOSTNAME,Fri Feb 3 11:52:32 2023 UTC,1675425152,0,8,False,added,F7E6787D-B2D8-4830-854E-33AF0A1338B8,,...,DELETED,1675425150,roothome,1675425150,600,1675425150,,30306,/root/.viminfo,1675425150


In [8]:
%%time
df_process = qry_prov.linux.processes()
df_process.head(1)

CPU times: user 2min 46s, sys: 30.1 ms, total: 2min 46s
Wall time: 2min 48s


Unnamed: 0,name,hostIdentifier,calendarTime,unixTime,epoch,counter,numerics,action,decorations_host_uuid,decorations_username,columns_cmdline,columns_euid,columns_name,columns_parent,columns_path,columns_pcmdline,columns_pid,columns_uid,columns_username
0,pack_osquery-custom-pack2_processes,HOSTNAME,Fri Feb 3 06:28:25 2023 UTC,1675405705,0,876,False,removed,F7E6787D-B2D8-4830-854E-33AF0A1338B8,,/bin/sh /usr/local/scripts/audispd_report.sh,102,sudo,54935,,sudo -u syslog /usr/local/scripts/audispd_repo...,54940,102,syslog


In [9]:
%%time
df_outbound_conn = qry_prov.linux.outbound_connections()
df_outbound_conn.head(1)

CPU times: user 2min 43s, sys: 27.6 ms, total: 2min 43s
Wall time: 2min 46s


Unnamed: 0,name,hostIdentifier,calendarTime,unixTime,epoch,counter,numerics,action,decorations_host_uuid,decorations_username,columns_cmdline,columns_name,columns_path,columns_pcmdline,columns_pid,columns_username,columns_local_port,columns_md5,columns_remote_address,columns_remote_port
90,pack_osquery-custom-pack2_outbound_connections,HOSTNAME,Fri Feb 3 07:00:47 2023 UTC,1675407647,0,59,False,removed,F7E6787D-B2D8-4830-854E-33AF0A1338B8,,/usr/local/bin/prometheus --storage.tsdb.path=...,prometheus,/usr/local/bin/prometheus,/sbin/init,1510,prometheus,34404,,10.8.0.77,9100


## Analysis examples

In [10]:
# https://msticpy.readthedocs.io/en/latest/visualization/ProcessTree.html
from msticpy.vis import process_tree
from msticpy.transform.proc_tree_builder import OSQUERY_EVENT_SCH

In [28]:
p_tree_lx = process_tree.build_process_tree(df_process, schema=OSQUERY_EVENT_SCH)

In [29]:
# partial tree - 10 processes only
process_tree.plot_process_tree(data=df_process[50:60], legend_col="columns_name")

(Figure(id='4118', ...), Row(id='4237', ...))

In [12]:
# FIXME! schema correct above but not here. time columns not datetime64
df_fim.dtypes

name                     object
hostIdentifier           object
calendarTime             object
unixTime                  int64
epoch                     int64
counter                   int64
numerics                   bool
action                   object
decorations_host_uuid    object
decorations_username     object
columns_uid              object
columns_username         object
columns_md5              object
columns_action           object
columns_atime            object
columns_category         object
columns_ctime            object
columns_mode             object
columns_mtime            object
columns_sha256           object
columns_size             object
columns_target_path      object
columns_time             object
dtype: object

In [None]:
df_fim.mp_plot.timeline(
   title="FIM by action",
   # group_by="columns.action",
   # group_by="columns.username",
   group_by="columns_target_path", 
   source_columns=["columns_username", "columns_action", "columns_category", "columns_target_path"],
   time_column="columns_time",
   legend="left",
   height=200,
)

In [None]:
df_outbound_conn.mp_plot.matrix(x="columns_name", y="columns_remote_address", title="Process name vs remote address Interaction")