/*
  regdump.c - Dump a registry hive.

  Jason Hood, 29 January to 9 February, 2019.
  Based on code by Ladislav Nevery, 2008.

  Dump one or more registry hives as text, one line per value.	Normally values
  and empty keys are written; use "-v" to only show values, or "-k" to only
  show keys (along with the time of last write).

  Key names, value names and strings will only use ASCII characters, other
  characters will be written as "<XX>" or "<XXXX>", using the hexadecimal code
  of the character.

  String types will stop at the first null (or double null, for multi), adding
  "<...>" to indicate if there is more non-null data; use "-s" to display
  everything (although trailing nulls are still not shown).  Multi-strings will
  be separated by "<>".

  If binary data is predominantly ASCII (7 out of 8 bytes, or 3 out of 4 words)
  it will be displayed as a string, always showing everything (including
  trailing nulls).  If 8-byte data matches a 21st century FILETIME it will be
  shown as date and time (local), as well as data.

  Some non-standard value types are supported.	Types under the "Properties"
  key having the high 16 bits set will be treated as a device property type
  (0xFFFF0000 | DEVPROP_TYPE...) and translated to a corresponding standard
  type.  Types under the "DriverPackages" key will mask out the high word,
  resulting in a standard type.

  Note: assumes the hive and CPU are little-endian.

  References:

  https://www.codeproject.com/KB/recipes/RegistryDumper.aspx
  https://github.com/msuhanov/regf/blob/master/Windows%20registry%20file%20format%20specification.md
*/

#ifdef _WIN32
# define _CRT_SECURE_NO_WARNINGS
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# define int64_t __int64
# define PRId64 "I64d"
# define PRIX64 "I64X"
#else
# include <inttypes.h>
# include <time.h>
  typedef int  BOOL;
# define TRUE  1
# define FALSE 0
#endif
#include <string.h>
#include <stdio.h>
#include <stdlib.h>


BOOL hex_type, only_values, only_keys, all_string, time_sec, time_full;
BOOL big_data;


typedef struct
{
  char signature[4];		// "regf"
  int  primary_sequence_number;
  int  secondary_sequence_number;
  int  last_written_timestamp[2];	// avoid alignment issues with int64_t
  int  major_version;
  int  minor_version;
  int  file_type;
  int  file_format;
  int  root_cell_offset;
  // and more of no interest
} base_block;


typedef struct
{
  int block_size;
  int offsets[1];
} offsets;


typedef struct
{
  int	block_size;
  char	block_type[2];		// "lf" "lh" "li" "ri" "db"
  short count;
  int	offsets[1];
  int	hash;			// only for "lf" "lh", ignored
} list_block;


typedef struct
{
  int	  block_size;
  char	  block_type[2];	// "nk"
  short   flags;
  int64_t timestamp;
  char	  dummya[8];
  int	  subkey_count;
  char	  dummyb[4];
  int	  subkeys;
  char	  dummyc[4];
  int	  value_count;
  int	  values;
  char	  dummyd[28];
  short   len;
  short   du;
  char	  name[1];
} key_block;


typedef struct
{
  int	block_size;
  char	block_type[2];		// "vk"
  short name_len;
  int	size;
  int	offset;
  int	value_type;
  short flags;
  short dummy;
  char	name[1];
} value_block;


#define KEY_COMP_NAME	0x20
#define VALUE_COMP_NAME 0x01


#ifndef _WIN32
enum
{
  REG_NONE,
  REG_SZ,
  REG_EXPAND_SZ,
  REG_BINARY,
  REG_DWORD,
  REG_DWORD_BIG_ENDIAN,
  REG_LINK,
  REG_MULTI_SZ,
  REG_RESOURCE_LIST,
  REG_FULL_RESOURCE_DESCRIPTOR,
  REG_RESOURCE_REQUIREMENTS_LIST,
  REG_QWORD
};
#endif


enum
{
  DEVPROP_TYPE_INT16 = 4,
  DEVPROP_TYPE_UINT16,
  DEVPROP_TYPE_INT32,
  DEVPROP_TYPE_UINT32,
  DEVPROP_TYPE_INT64,
  DEVPROP_TYPE_UINT64,
  DEVPROP_TYPE_FILETIME = 0x10,
  DEVPROP_TYPE_BOOLEAN,
  DEVPROP_TYPE_STRING,
  DEVPROP_TYPE_STRING_LIST = 0x2000 | DEVPROP_TYPE_STRING,
  DEVPROP_TYPE_STRING_INDIRECT = 0x19
};


char* make_name( char* out, char* in, int len, int comp )
{
  int i;

  if (comp)
  {
    unsigned char* uc = (unsigned char*)in;
    for (i = 0; i < len; ++uc, ++i)
    {
      if (*uc == '/')
	out += sprintf( out, "\xe2\x81\x84" );
      else if (*uc >= 32 && *uc < 127)
	*out++ = *uc;
      else
	out += sprintf( out, "<%02X>", *uc );
    }
  }
  else
  {
    unsigned short* us = (unsigned short*)in;
    for (i = 0; i < len / 2; ++us, ++i)
    {
      if (*us == '/')
	out += sprintf( out, "\xe2\x81\x84" );
      else if (*us >= 32 && *us < 127)
	*out++ = (char)*us;
      else
	out += sprintf( out, "<%0*X>", (*us < 0x100) ? 2 : 4, *us );
    }
  }
  *out = '\0';
  return out;
}


void print_time( int64_t t, BOOL full, BOOL brackets )
{
#ifdef _WIN32
  SYSTEMTIME st;
#else
  time_t secs;
  struct tm* lt;
#endif

  if (brackets)
    putchar( '[' );

#ifdef _WIN32
  FileTimeToSystemTime( (FILETIME*)&t, &st );
  SystemTimeToTzSpecificLocalTime( NULL, &st, &st );
  printf( "%u-%02u-%02u %02u:%02u:%02u",
	  st.wYear, st.wMonth, st.wDay, st.wHour, st.wMinute, st.wSecond );
#else
  // Translate 100-nanosecond intervals from 1601 to seconds from 1970.
  secs = (time_t)(t / 10000000) - 11644473600;
  lt = gmtime( &secs );
  printf( "%d-%02d-%02d %02d:%02d:%02d",
	  lt->tm_year+1900, lt->tm_mon+1, lt->tm_mday,
	  lt->tm_hour, lt->tm_min, lt->tm_sec );
#endif
  if (full)
    printf( ".%07d", (int)(t % 10000000) );

  if (brackets)
  {
    putchar( ']' );
    putchar( ' ' );
  }
}


static char *root, *full;

void walk( char* path, key_block* key )
{
  static BOOL properties, driverpackages;
  offsets* val_list;
  int	size, type;
  char* data;
  char* data_block = NULL;
  BOOL* leave_key = NULL;
  BOOL	empty_key;
  int	bintext;
  int	o, i;

  // Add current key name to printed path.
  *path++ = '/';
  path = make_name( path, key->name, key->len, key->flags & KEY_COMP_NAME );

  if (only_keys)
  {
    print_time( key->timestamp, time_full, TRUE );
    printf( "%s\n", full );
    empty_key = FALSE;
    goto children;
  }

  if (!properties)
  {
    if (key->len == 10 && memcmp( "Properties", key->name, key->len ) == 0)
    {
      properties = TRUE;
      leave_key = &properties;
    }
  }
  if (!driverpackages)
  {
    if (key->len == 14 && memcmp( "DriverPackages", key->name, key->len ) == 0)
    {
      driverpackages = TRUE;
      leave_key = &driverpackages;
    }
  }

  empty_key = (key->value_count == 0);

  // Print all contained values.
  val_list = (offsets*)(key->values + root);
  for (o = 0; o < key->value_count; ++o)
  {
    value_block* val = (value_block*)(val_list->offsets[o] + root);

    *path = '/';
    if (val->name_len == 0)
    {
      path[1] = '@';
      path[2] = '\0';
    }
    else
    {
      make_name( path+1, val->name, val->name_len, val->flags & VALUE_COMP_NAME );
    }

    if (time_sec || time_full)
      print_time( key->timestamp, time_full, TRUE );

    size = val->size & 0x7fffffff;
    if (hex_type)
      printf( "[%08X:%08X] %s = ", val->value_type, size, full );
    else
      printf( "%s [%d:%d] = ", full, val->value_type, size );

    // Data are usually in separate blocks without types, but for small values
    // MS added optimization where if bit 31 is set data are contained within
    // the key itself to save space.
    if (val->size & (1 << 31))
      data = (char*)&val->offset;
    else
    {
      data = val->offset + root + 4;
      if (size > 16344 && big_data && *data == 'd' && data[1] == 'b')
      {
	list_block* item;
	offsets* datalist;
	int left;

	item = (list_block*)(data - 4);
	datalist = (offsets*)(item->offsets[0] + root);
	left = size;
	data = data_block = malloc( size );
	for (i = 0; i < item->count; ++i)
	{
	  memcpy( data, datalist->offsets[i] + root + 4,
		  (left > 16344) ? 16344 : left );
	  data += 16344;
	  left -= 16344;
	}
	data = data_block;
      }
    }

    type = val->value_type;
    if (properties && (type & 0xFFFF0000) == 0xFFFF0000)
    {
      switch (type & 0xFFFF)
      {
	case DEVPROP_TYPE_INT32:
	case DEVPROP_TYPE_UINT32:
	  type = REG_DWORD;
	  break;
	case DEVPROP_TYPE_INT64:
	case DEVPROP_TYPE_UINT64:
	case DEVPROP_TYPE_FILETIME:
	  type = REG_QWORD;
	  break;
	case DEVPROP_TYPE_STRING:
	case DEVPROP_TYPE_STRING_INDIRECT:
	  type = REG_SZ;
	  break;
	case DEVPROP_TYPE_STRING_LIST:
	  type = REG_MULTI_SZ;
	  break;
      }
    }
    else if (driverpackages)
      type &= 0xFFFF;

    // See if binary data is text.
    bintext = 0;
    if ((type == REG_BINARY || type == REG_NONE) && size >= 8)
    {
      int ascii = 0, min = 8;
      if (data[1] == 0 && data[3] == 0)
      {
	unsigned short* us = (unsigned short*)data;
	if (*us >= 32 && *us < 127 &&
	    us[1] >= 32 && us[1] < 127)
	{
	  ascii = 2;
	  for (i = 2; i < size / 2; ++i)
	    if (us[i] >= 32 && us[i] < 127)
	      ++ascii;
	  ascii *= 2;
	  min = 6;
	}
      }
      else if (*data >= 32 && *data < 127 &&
	       data[1] >= 32 && data[1] < 127)
      {
	ascii = 2;
	for (i = 2; i < size; ++i)
	  if (data[i] >= 32 && data[i] < 127)
	    ++ascii;
	min = 7;
      }
      if (ascii * 8 >= size * min)
	bintext = (data[1] == 0) ? 16 : 8;
    }

    if (type == REG_DWORD && size == 4)
    {
      printf( "0x%X (%d)", *(int*)data, *(int*)data );
    }
    else if (properties && size == 1 &&
	     (type == (0xFFFF0000 | DEVPROP_TYPE_BOOLEAN)))
    {
      if (*data == -1)
	printf( "true" );
      else if (*data == 0)
	printf( "false" );
      else
	printf( "%02X", *(unsigned char*)data );
    }
    else if (properties && size == 2 &&
	     (type == (0xFFFF0000 | DEVPROP_TYPE_UINT16) ||
	      type == (0xFFFF0000 | DEVPROP_TYPE_INT16)))
    {
      if ((type & 0xFFFF) == DEVPROP_TYPE_UINT16)
	printf( "0x%X (%u)", *(unsigned short*)data, *(unsigned short*)data );
      else
	printf( "0x%X (%d)", *(unsigned short*)data, *(short*)data );
    }
    // See if 8 bytes is a 21st century FILETIME.
    else if (size == 8 &&
	     (type == REG_QWORD || type == REG_BINARY || type == REG_NONE) &&
	     *(int64_t*)data >= (int64_t)126227808000000000 &&	// 2001-01-01
	     *(int64_t*)data < (int64_t)157784544000000000)	// 2101-01-01
    {
      print_time( *(int64_t*)data, FALSE, FALSE );
      if (type == REG_QWORD)
	printf( " (0x%" PRIX64 "; %" PRId64 ")", *(int64_t*)data, *(int64_t*)data );
      else
      {
	putchar( ' ' );
	putchar( '(' );
	for (i = 0; i < size; i++)
	{
	  if (i)
	    putchar( ',' );
	  printf( "%02X", (unsigned char)data[i] );
	}
	putchar( ')' );
      }
    }
    else if (type == REG_QWORD && size == 8)
    {
      printf( "0x%" PRIX64 " (%" PRId64 ")", *(int64_t*)data, *(int64_t*)data );
    }
    // Strings are stored as Unicode (UTF-16LE).
    else if (type == REG_SZ ||
	     type == REG_MULTI_SZ ||
	     type == REG_EXPAND_SZ ||
	     type == REG_LINK ||
	     bintext == 16)
    {
      unsigned short* us = (unsigned short*)data;
      size /= 2;
      if (!bintext)
	while (size > 0 && us[size-1] == '\0')
	  --size;
      for (i = 0; i < size; ++i)
      {
	if (us[i] >= 32 && us[i] < 127)
	  putchar( us[i] );
	else if (us[i] == '\0' && type == REG_MULTI_SZ && i+1 < size && us[i+1] != '\0')
	  printf( "<>" );
	else if (us[i] == '\0' && !all_string && !bintext)
	{
	  printf( " <...>" );
	  break;
	}
	else
	  printf( "<%0*X>", (us[i] < 0x100) ? 2 : 4, us[i] );
      }
    }
    else if (bintext /*== 8*/)
    {
      for (i = 0; i < size; ++i)
      {
	if (data[i] >= 32 && data[i] < 127)
	  putchar( data[i] );
	else
	  printf( "<%02X>", (unsigned char)data[i] );
      }
    }
    else
    {
      for (i = 0; i < size; ++i)
      {
	if (i)
	  putchar( ',' );
	printf( "%02X", (unsigned char)data[i] );
      }
    }
    putchar( '\n' );

    if (data_block)
    {
      free( data_block );
      data_block = NULL;
    }
  }

children:
  // For simplicity we can imagine keys as directories in filesystem and values
  // as files.	Since we already dumped values for this dir we will now iterate
  // through subdirectories in the same way.
  if (key->subkeys != -1)
  {
    list_block* item = (list_block*)(key->subkeys + root);

    if (item->count)
      empty_key = FALSE;

    if (item->block_type[0] == 'l')
    {
      int ii = (item->block_type[1] == 'i') ? 1 : 2;
      for (i = 0; i < item->count; ++i)
	walk( path, (key_block*)(item->offsets[i*ii] + root) );
    }
    else
    {
      for (i = 0; i < item->count; ++i)
      {
	// In case of too many subkeys this list contains just other lists.
	list_block* subitem = (list_block*)(item->offsets[i] + root);
	int j, jj = (subitem->block_type[1] == 'i') ? 1 : 2;
	for (j = 0; j < subitem->count; ++j)
	  walk( path, (key_block*)(subitem->offsets[j*jj] + root) );
      }
    }
  }

  if (empty_key && !only_values)
  {
    if (time_sec || time_full)
      print_time( key->timestamp, time_full, TRUE );
    if (hex_type)
      printf( "%20c", ' ' );
    printf( "%s\n", full );
  }

  if (leave_key)
    *leave_key = FALSE;
}


int main( int argc, char* argv[] )
{
  char	path[0x4000];
  char* data;
  base_block* regf;
  FILE* f;
  int	size;
  BOOL	show_hive;
  int	rc = 0;
  const char* errmsg;

  if (argc == 1 || strcmp( argv[1], "/?" ) == 0
		|| strcmp( argv[1], "-?" ) == 0
		|| strcmp( argv[1], "--help" ) == 0)
  {
    printf( "Dump a registry hive as text, one line per value.\n"
	    "https://github.com/adoxa/regdump\n"
	    "\n"
	    "regdump [-hkstTv] HIVE...\n"
	    "\n"
	    "-h  use hexadecimal for type & size, placed before key\n"
	    "-k  keys only (implies -t)\n"
	    "-s  include the entire string data (excluding trailing nulls)\n"
	    "-t  include key timestamp (seconds)\n"
	    "-T  include key timestamp (full resolution)\n"
	    "-v  values only\n"
	  );
    return 0;
  }

  while (argc > 1 && *argv[1] == '-')
  {
    while (*++argv[1])
    {
      switch (*argv[1])
      {
	case 'h': hex_type    = TRUE; break;
	case 's': all_string  = TRUE; break;
	case 'v': only_values = TRUE; break;
	case 'k': only_keys   = TRUE; // fall through
	case 't': time_sec    = TRUE; break;
	case 'T': time_full   = TRUE; break;
	default:
	  fprintf( stderr, "%c: unknown option.\n", *argv[1] );
	  return 1;
      }
    }
    ++argv;
    --argc;
  }

  full = path;
  show_hive = (argc > 2);

  for (; argc > 1; ++argv, --argc)
  {
    f = fopen( argv[1], "rb" );
    if (!f)
    {
      perror( argv[1] );
      rc = 1;
      continue;
    }

    if (fread( path, 4, 1, f ) != 1 || memcmp( path, "regf", 4 ) != 0)
    {
      errmsg = "invalid file ('regf' signature not found)";
    error:
      fprintf( stderr, "%s: %s.\n", argv[1], errmsg );
      fclose( f );
      rc = 1;
      continue;
    }

    fseek( f, 0x1000, SEEK_SET );
    if (fread( path, 4, 1, f ) != 1 || memcmp( path, "hbin", 4 ) != 0)
    {
      errmsg = "invalid file ('hbin' signature not found)";
      goto error;
    }

    fseek( f, 0, SEEK_END );
    size = ftell( f );
    data = malloc( size );
    if (!data)
    {
      errmsg = "insufficient memory";
      goto error;
    }

    rewind( f );
    if (fread( data, size, 1, f ) != 1)
    {
      free( data );
      errmsg = "read error";
      goto error;
    }
    fclose( f );

    regf = (base_block*)data;
    big_data = (regf->major_version > 1 || regf->minor_version > 3);

    if (show_hive)
      printf( "%s\n\n", argv[1] );

    // We just skip header and start walking root key tree.
    root = data + 0x1000;
    walk( path, (key_block*)(regf->root_cell_offset + root) );
    free( data );

    if (show_hive && argc > 2)
      putchar( '\n' );
  }

  return rc;
}