import sys import numpy as np import tensorflow as tf import scipy.io.wavfile as wav import time import os os.environ['CUDA_VISIBLE_DEVICES'] = '' import sys import pandas as pd try: import pydub import struct except: print("pydub was not loaded, MP3 compression will not work") sys.path.append("DeepSpeech") import DeepSpeech from tf_logits import get_logits from deepspeech_training.util.flags import create_flags, FLAGS from deepspeech_training.util.config import Config, initialize_globals from ds_ctcdecoder import ctc_beam_search_decoder, Scorer import absl.flags f = absl.flags # define parsing arguments f.DEFINE_string('input', None, 'Input audio .wav file(s), at 16KHz (separated by spaces)') f.DEFINE_string('restore_path', None, 'Path to the DeepSpeech checkpoint (ending in best_dev-1466475)') f.register_validator('input', os.path.isfile, message='The input audio pointed to by --input must exist and be readable.') def classify(): with tf.Session() as sess: if FLAGS.input.split(".")[-1] == 'mp3': raw = pydub.AudioSegment.from_mp3(FLAGS.input) audio = np.array([struct.unpack("