#!/usr/bin/env perl ##---------------------------------------------------------------------------## ## File: ## @(#) configure ## Author: ## Robert Hubley ## Description: ## Configuration utility for the RepeatMasker package. ## #****************************************************************************** #* Copyright (C) Institute for Systems Biology 2003-2024 Developed by #* Robert Hubley. #* #* This work is licensed under the Open Source License v2.1. To view a copy #* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or #* see the license.txt file contained in this distribution. #* ############################################################################### =head1 NAME configure - Configure the RepeatMasker package =head1 SYNOPSIS perl ./configure [options] =head1 DESCRIPTION Interactive configuration script for RepeatMasker. Run from inside the RepeatMasker installation directory. =head1 OPTIONS =over 4 =item -perlbin Path to the perl interpreter to embed in script headers. =item -trf_prgm Full path to the TRF (Tandem Repeat Finder) executable. =item -rmblast_dir Path to the RMBlast bin directory. =item -hmmer_dir Path to the HMMER bin directory. =item -crossmatch_dir Path to the Crossmatch bin directory. =item -abblast_dir Path to the ABBlast bin directory. =item -famdb_dir Path to the FamDB installation directory (containing famdb.py). =item -default_search_engine Default search engine: rmblast, hmmer, crossmatch, or abblast. =back =head1 SEE ALSO RepeatMasker =head1 COPYRIGHT Copyright 2003-2024 Robert Hubley, Institute for Systems Biology =head1 AUTHOR Robert Hubley =cut use strict; use warnings; use Config; use Cwd; use FindBin; use File::Spec; use File::Basename; use Getopt::Long; use Pod::Text; use lib $FindBin::Bin; use RepeatMaskerConfig; $| = 1; my $VERSION = $RepeatMaskerConfig::VERSION; my $INSTALL_DIR = $FindBin::RealBin; ## ## Command-line option processing ## my %opts; Getopt::Long::config( "noignorecase", "bundling_override" ); GetOptions( \%opts, 'version', 'perlbin=s', 'trf_prgm=s', 'rmblast_dir=s', 'hmmer_dir=s', 'crossmatch_dir=s', 'abblast_dir=s', 'famdb_dir=s', 'default_search_engine=s', ) or usage(); if ( $opts{version} ) { print "RepeatMasker $VERSION\n"; exit 0; } ## ## Must be run from the installation directory ## if ( getcwd() ne $INSTALL_DIR ) { die "\n ERROR: configure must be run from inside the RepeatMasker\n" . " installation directory:\n\n" . " cd $INSTALL_DIR\n" . " ./configure\n\n"; } ## Apply command-line overrides to configuration before any validation RepeatMaskerConfig::resolveConfiguration( \%opts ); my $config = $RepeatMaskerConfig::configuration; ## ## Search engine definitions (must be declared before any phase that references them) ## my @ENGINES = ( { key => 'RMBLAST_DIR', label => 'RMBlast', defname => 'rmblast', probe => 'rmblastn', url => 'https://www.repeatmasker.org/rmblast/' }, { key => 'HMMER_DIR', label => 'HMMER', defname => 'hmmer', probe => 'nhmmer', url => 'http://hmmer.org/' }, { key => 'CROSSMATCH_DIR', label => 'Crossmatch', defname => 'crossmatch', probe => 'cross_match', url => 'http://www.phrap.org/' }, { key => 'ABBLAST_DIR', label => 'ABBlast', defname => 'abblast', probe => 'blastp', url => 'https://blast.advbiocomp.com/' }, ); ## ## Phase 1: Dependency checks (non-interactive, run before clearing screen) ## check_dependencies(); ## ## Phase 2: Fix perl shebang lines in all RepeatMasker scripts ## my $perl = resolve_perl( $opts{perlbin} ); update_shebangs( $perl ); ## ## Phase 3: Interactive configuration ## system("clear"); print_banner(); configure_trf(); configure_search_engines(); configure_famdb(); ## ## Phase 4: Save and summarise ## RepeatMaskerConfig::updateConfigFile(); print_summary(); exit 0; ########################################################################### ## Subroutines ########################################################################### sub usage { my $p = Pod::Text->new( sentence => 0, width => 78 ); $p->output_fh(*STDOUT); $p->parse_file($0); exit 1; } sub print_banner { my $w = 62; print "=" x $w . "\n"; printf " %-*s\n", $w - 2, " RepeatMasker v$VERSION -- Configuration"; print "=" x $w . "\n\n"; } sub section_header { my ($title) = @_; print "\n" . "-" x 62 . "\n"; print " $title\n"; print "-" x 62 . "\n"; } sub status_ok { printf " [ OK ] %s\n", $_[0] } sub status_info { printf " [ ] %s\n", $_[0] } sub status_warn { printf " [WARN] %s\n", $_[0] } ## ## Read a line from STDIN, strip newline, return $default if blank. ## sub prompt { my ( $message, $default ) = @_; if ( defined $default && $default ne "" ) { print " $message [$default]: "; } else { print " $message: "; } my $answer = // ""; $answer =~ s/[\r\n]+$//; return ( $answer eq "" && defined $default ) ? $default : $answer; } ## ## Prompt for Y/N. Returns 1 for yes, 0 for no. ## sub prompt_yn { my ( $message, $default ) = @_; $default //= "n"; my $hint = ( lc($default) eq "y" ) ? "[Y/n]" : "[y/N]"; my $answer; do { print " $message $hint: "; $answer = // ""; $answer =~ s/[\r\n]+$//; $answer = $default if $answer eq ""; } while ( $answer !~ /^[YyNn]$/i ); return $answer =~ /^[Yy]$/i ? 1 : 0; } ## ## Search PATH for an executable. Returns full path or "". ## sub find_in_path { my ($name) = @_; for my $dir ( split /:/, ( $ENV{PATH} // "" ) ) { my $full = "$dir/$name"; return $full if -f $full && -x $full; } return ""; } ## ## Resolve the perl interpreter to use for shebang lines. ## sub resolve_perl { my ($perlbin) = @_; my $perl = $^X; if ( $perlbin ) { if ( -d $perlbin ) { for my $candidate ( "$perlbin/perl", "$perlbin/bin/perl" ) { if ( -f $candidate && -x $candidate ) { $perl = $candidate; last; } } } elsif ( -f $perlbin && -x $perlbin ) { $perl = $perlbin; } else { die "\n ERROR: -perlbin '$perlbin' does not point to a perl executable.\n\n"; } } unless ( File::Spec->file_name_is_absolute($perl) ) { $perl = $Config{perlpath} . ( $Config{_exe} // "" ); } die "\n ERROR: Cannot determine an absolute path for the perl interpreter.\n" . " Try: ./configure -perlbin /path/to/perl\n\n" unless File::Spec->file_name_is_absolute($perl) && -x $perl; return $perl; } ## ## Check required Perl modules and minimum versions. ## sub check_dependencies { section_header("Checking Dependencies"); ## Perl version if ( $] < 5.008 ) { print " [ FAIL ] Perl $] detected -- version 5.8 or higher is required.\n\n"; exit 1; } status_ok( "Perl " . sprintf( "%vd", $^V ) ); ## Required modules my @required = qw( Tie::File Getopt::Long POSIX File::Copy File::Path Data::Dumper Cwd Storable ); my @missing; for my $mod ( @required ) { push @missing, $mod unless eval "require $mod; 1"; } if ( @missing ) { print " [ FAIL ] Missing Perl modules: " . join( ", ", @missing ) . "\n"; print "\n Install via CPAN or your system package manager, then re-run configure.\n\n"; exit 1; } status_ok("Required Perl modules present"); ## Scalar::Util must be the XS-compiled version unless ( eval "use Scalar::Util qw(weaken); 1" ) { print " [ FAIL ] Scalar::Util is not compiled with XS support.\n"; print "\n Please reinstall Scalar::Util from CPAN and re-run configure.\n\n"; exit 1; } status_ok("Scalar::Util (XS)"); ## Storable minimum version require Storable; if ( $Storable::VERSION < 2.06 ) { print " [ FAIL ] Storable $Storable::VERSION -- version 2.06 or higher required.\n\n"; exit 1; } status_ok("Storable $Storable::VERSION"); } ## ## Rewrite the perl shebang line in every RepeatMasker script. ## sub update_shebangs { my ($perl) = @_; section_header("Perl Interpreter"); status_ok($perl); my @scripts = qw( RepeatMasker ProcessRepeats RepeatProteinMask DupMasker util/calcDivergenceFromAlign.pl util/createRepeatLandscape.pl util/maskFile.pl util/rmOutToGFF3.pl util/buildRMLibFromEMBL.pl util/rmToUCSCTables.pl ); my $perlEsc = $perl; $perlEsc =~ s/\//\\\//g; for my $script ( @scripts ) { if ( -s $script ) { system( qq($perl -i -0pe 's/^#\\!.*perl.*/#\\!$perlEsc/g;' $script) ); } else { die "\n ERROR: $script is missing from the RepeatMasker distribution.\n\n"; } } } ## ## Configure TRF (Tandem Repeat Finder). ## sub configure_trf { section_header("TRF - Tandem Repeat Finder"); print "\n TRF identifies simple tandem repeats and is required for all runs.\n"; print " https://github.com/Benson-Genomics-Lab/TRF\n\n"; # Accept CLI value without prompting if ( $opts{trf_prgm} ) { if ( RepeatMaskerConfig::validateParam('TRF_PRGM') ) { status_ok( $config->{'TRF_PRGM'}->{'value'} ); return; } status_warn("Specified TRF path is not valid: $opts{trf_prgm}"); print "\n"; } # Seed the prompt with the current configured value or a PATH search my $current = $config->{'TRF_PRGM'}->{'value'}; $current = find_in_path("trf") if !$current || !-x $current; while (1) { my $trf = prompt( "Full path to TRF executable", $current ); if ( -f $trf && -x $trf ) { $config->{'TRF_PRGM'}->{'value'} = $trf; status_ok($trf); return; } print "\n '$trf' does not exist or is not executable.\n\n"; $current = $trf; } } ## ## Configure search engines. ## sub configure_search_engines { section_header("Search Engines"); print "\n At least one search engine is required.\n"; print " RMBlast is recommended for most uses.\n"; # Non-interactive path: all engines given on command line if ( $opts{rmblast_dir} || $opts{hmmer_dir} || $opts{crossmatch_dir} || $opts{abblast_dir} ) { my @valid = grep { RepeatMaskerConfig::validateParam( $_->{key} ) } @ENGINES; if ( !@valid ) { print "\n ERROR: None of the specified engine directories are valid.\n\n"; exit 1; } _auto_set_default( \@valid ); print "\n"; _print_engine_status(); return; } # Interactive menu my $done = 0; while ( !$done ) { print "\n"; my $i = 1; for my $e ( @ENGINES ) { my $val = $config->{ $e->{key} }->{'value'} // ""; my $default = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // ""; my $tag = RepeatMaskerConfig::validateParam( $e->{key} ) ? ( lc($default) eq $e->{defname} ? " [DEFAULT]" : " [configured]" ) : ""; my $display = RepeatMaskerConfig::validateParam( $e->{key} ) ? $val : "(not configured)"; printf " %d. %-12s %s%s\n", $i++, $e->{label}, $display, $tag; } print " $i. Done\n"; my $sel = prompt("\n Select an engine to configure, or $i to finish", ""); $sel =~ s/\s+//g; if ( $sel =~ /^\d+$/ && $sel >= 1 && $sel < $i ) { _configure_one_engine( $ENGINES[$sel - 1] ); } elsif ( $sel eq "$i" ) { my @valid = grep { RepeatMaskerConfig::validateParam( $_->{key} ) } @ENGINES; if ( !@valid ) { print "\n At least one search engine must be configured.\n"; next; } _ensure_default( \@valid ); $done = 1; } else { print "\n Invalid selection.\n"; } } print "\n"; _print_engine_status(); } ## ## Prompt for and validate one engine's directory. ## sub _configure_one_engine { my ($e) = @_; # Seed from current config or PATH my $current = $config->{ $e->{key} }->{'value'} // ""; if ( !$current || !RepeatMaskerConfig::validateParam( $e->{key} ) ) { my $found = find_in_path( $e->{probe} ); $current = dirname($found) if $found; } print "\n Configuring $e->{label}\n"; print " $e->{url}\n\n"; my $expected = join( ", ", @{ $config->{ $e->{key} }->{'expected_binaries'} } ); while (1) { my $dir = prompt( "Path to $e->{label} bin directory", $current ); $config->{ $e->{key} }->{'value'} = $dir; if ( RepeatMaskerConfig::validateParam( $e->{key} ) ) { # First engine or no default yet → make it the default automatically my $cur_default = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // ""; my @already_valid = grep { $_->{key} ne $e->{key} && RepeatMaskerConfig::validateParam( $_->{key} ) } @ENGINES; if ( !$cur_default || !@already_valid ) { $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $e->{defname}; } elsif ( $cur_default ne $e->{defname} ) { if ( prompt_yn( "Make $e->{label} the default search engine?", "n" ) ) { $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $e->{defname}; } } return; } print "\n Could not find required binaries in '$dir'.\n"; print " Expected: $expected\n\n"; } } ## ## If the current default engine is not valid, pick the first valid one. ## sub _auto_set_default { my ($valid_ref) = @_; my $cur = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // ""; my $still_valid = grep { $_->{defname} eq lc($cur) } @$valid_ref; unless ( $still_valid ) { $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[0]{defname}; } } ## ## After the menu, ensure a valid default is set; prompt if ambiguous. ## sub _ensure_default { my ($valid_ref) = @_; my $cur = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" ); my $still_valid = grep { $_->{defname} eq $cur } @$valid_ref; return if $still_valid; if ( @$valid_ref == 1 ) { $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[0]{defname}; return; } print "\n Multiple engines are configured. Select the default:\n\n"; my $i = 1; for my $e ( @$valid_ref ) { printf " %d. %s\n", $i++, $e->{label}; } while (1) { my $d = prompt( "Default engine", "1" ); if ( $d =~ /^\d+$/ && $d >= 1 && $d <= @$valid_ref ) { $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} = $valid_ref->[$d - 1]{defname}; return; } print " Invalid selection.\n"; } } sub _print_engine_status { my $default = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" ); for my $e ( @ENGINES ) { next unless RepeatMaskerConfig::validateParam( $e->{key} ); my $tag = ( $e->{defname} eq $default ) ? " [DEFAULT]" : ""; status_ok( "$e->{label}: " . $config->{ $e->{key} }->{'value'} . $tag ); } } ## ## Optionally configure FamDB. ## sub configure_famdb { section_header("FamDB (optional, but highly recommended)"); print "\n FamDB provides access to the Dfam database for taxonomy-based\n"; print " repeat searches (-species option). Without it, only custom\n"; print " library searches (-lib) are supported.\n"; print "\n https://github.com/Dfam-consortium/FamDB\n\n"; # Non-interactive: -famdb_dir supplied on command line if ( $opts{famdb_dir} ) { if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) { status_ok( "FamDB: " . $config->{'FAMDB_DIR'}->{'value'} ); return; } status_warn("Specified FamDB directory is not valid: $opts{famdb_dir}"); print " famdb.py must exist and be executable in that directory.\n\n"; # Fall through to interactive prompt } my $current = $config->{'FAMDB_DIR'}->{'value'} // ""; my $currently_valid = $current && RepeatMaskerConfig::validateParam('FAMDB_DIR'); if ( $currently_valid ) { print " Currently configured: $current\n\n"; if ( prompt_yn( "Keep this FamDB configuration?", "y" ) ) { status_ok("FamDB: $current"); return; } print "\n"; unless ( prompt_yn( "Configure a different FamDB installation?", "y" ) ) { $config->{'FAMDB_DIR'}->{'value'} = ""; status_info("FamDB not configured. Only -lib searches will be available."); return; } } else { unless ( prompt_yn( "Configure FamDB now?", "n" ) ) { $config->{'FAMDB_DIR'}->{'value'} = ""; status_info("FamDB not configured. Only -lib searches will be available."); return; } } print "\n"; while (1) { my $dir = prompt( "Path to FamDB installation (directory containing famdb.py)", $current ); $config->{'FAMDB_DIR'}->{'value'} = $dir; if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) { status_ok("FamDB: $dir"); return; } print "\n famdb.py not found or not executable in '$dir'.\n\n"; $current = $dir; } } ## ## Print a final summary of the installed configuration. ## sub print_summary { my $w = 62; print "\n" . "=" x $w . "\n"; printf " %-*s\n", $w - 2, " Configuration Summary"; print "=" x $w . "\n\n"; my $trf = $config->{'TRF_PRGM'}->{'value'}; my $default = lc( $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'} // "" ); printf " %-14s %s\n", "Libraries:", "$INSTALL_DIR/Libraries"; printf " %-14s %s\n", "TRF:", $trf; my %defname_to_key = map { $_->{defname} => $_ } @ENGINES; for my $e ( @ENGINES ) { next unless RepeatMaskerConfig::validateParam( $e->{key} ); my $tag = ( $e->{defname} eq $default ) ? " [DEFAULT]" : ""; printf " %-14s %s%s\n", "$e->{label}:", $config->{ $e->{key} }->{'value'}, $tag; } if ( RepeatMaskerConfig::validateParam('FAMDB_DIR') ) { my $famdb_dir = $config->{'FAMDB_DIR'}->{'value'}; printf " %-14s %s\n", "FamDB:", $famdb_dir; my $info = `$famdb_dir/famdb.py info 2>/dev/null` // ""; if ( $info ) { my ($db_name, $db_ver, $db_date, $db_count); $db_name = $1 if $info =~ /^Database\s*:\s*(\S.*)/m; $db_ver = $1 if $info =~ /^Version\s*:\s*(\S.*)/m; $db_date = $1 if $info =~ /^Date\s*:\s*(\S.*)/m; $db_count = $1 if $info =~ /^Total consensus sequences present\s*:\s*(\d+)/m; $db_count = $1 if !$db_count && $info =~ /^Total HMMs present\s*:\s*(\d+)/m; if ( $db_name ) { printf " %-14s %s %s (%s)\n", "", $db_name, $db_ver // "", $db_date // ""; printf " %-14s %s families\n", "", _commify($db_count) if $db_count; } } } else { printf " %-14s (not configured -- only -lib searches available)\n", "FamDB:"; } print "\n" . "=" x $w . "\n"; printf " %-*s\n", $w - 2, " RepeatMasker v$VERSION is configured and ready."; print "=" x $w . "\n\n"; } sub _commify { my $n = reverse( $_[0] // 0 ); $n =~ s/(\d{3})(?=\d)/$1,/g; return scalar reverse $n; } 1;