```python  
import numpy as np  
import matplotlib.pyplot as plt  
from scipy.io import wavfile  
from scipy.signal import butter, lfilter

# Load the audio file  
sample_rate, audio_data = wavfile.read('mastermind.wav')

# Function to apply a lowpass filter  
def butter_lowpass_filter(data, cutoff, fs, order=5):  
nyquist = 0.5 * fs  
normal_cutoff = cutoff / nyquist  
b, a = butter(order, normal_cutoff, btype='low', analog=False)  
y = lfilter(b, a, data)  
return y

# Apply a lowpass filter  
cutoff_frequency = 3000  
filtered_audio_data = butter_lowpass_filter(audio_data, cutoff_frequency,
sample_rate)

# Function to perform sliding window FFT  
def sliding_window_fft(audio, window_size, step_size, sample_rate):  
fft_values = []  
times = []  
num_windows = (len(audio) - window_size) // step_size  
for i in range(num_windows):  
start = i * step_size  
end = start + window_size  
windowed_audio = audio[start:end] * np.hanning(window_size)  
fft_value = np.fft.fft(windowed_audio)  
fft_values.append(np.abs(fft_value[:window_size//2]))  
times.append((start + end) // 2 / sample_rate)  
return np.array(fft_values).T, np.array(times)

# Perform sliding window FFT  
window_size = 1024  
step_size = 256  
fft_values, times = sliding_window_fft(filtered_audio_data, window_size,
step_size, sample_rate)  
frequencies = np.fft.fftfreq(window_size, 1/sample_rate)[:window_size//2]

# DTMF keypad frequencies mapping  
low_frequencies = [697, 770, 852, 941]  
high_frequencies = [1209, 1336, 1477, 1633]  
dtmf_mapping = {  
(697, 1209): '1', (697, 1336): '2', (697, 1477): '3', (697, 1633): 'A',  
(770, 1209): '4', (770, 1336): '5', (770, 1477): '6', (770, 1633): 'B',  
(852, 1209): '7', (852, 1336): '8', (852, 1477): '9', (852, 1633): 'C',  
(941, 1209): '*', (941, 1336): '0', (941, 1477): '#', (941, 1633): 'D'  
}

# Find the nearest frequency in the given list  
def find_nearest_frequency(frequency, frequency_list):  
return min(frequency_list, key=lambda x: abs(x - frequency))

# Function to format timestamp  
def format_timestamp(time_in_seconds):  
seconds = int(time_in_seconds)  
milliseconds = int((time_in_seconds % 1) * 1000)  
return f"{seconds}.{milliseconds:03d}"  
```

```python  
import pandas as pd

# Function to detect DTMF tones with top-2 FFT frequencies and detected
characters  
def detect_dtmf_tones_with_characters(fft_values, frequencies,
low_frequencies, high_frequencies, dtmf_mapping):  
detected_tones = []  
for i, time_slice in enumerate(fft_values.T):  
# Find the indices of the two most intensive frequencies  
top_two_indices = np.argsort(time_slice)[-2:][::-1]  
top_two_frequencies = frequencies[top_two_indices]  
top_two_intensities = time_slice[top_two_indices]  
  
fft_freq1 = min(top_two_frequencies)  
fft_freq2 = max(top_two_frequencies)  
# Identify low and high frequencies based on the top two frequencies  
low_freq = find_nearest_frequency(fft_freq1, low_frequencies)  
high_freq = find_nearest_frequency(fft_freq2, high_frequencies)  
  
# Map to DTMF character  
detected_character = dtmf_mapping.get((low_freq, high_freq), None)  
  
# Get the corresponding time in seconds  
time_in_seconds = times[i]  
  
# Append to detected tones  
detected_tones.append((time_in_seconds, low_freq, high_freq, [fft_freq1,
fft_freq2], top_two_intensities, detected_character))

return detected_tones

# Detect DTMF tones with top-2 FFT frequencies and detected characters  
detected_tones_with_characters = detect_dtmf_tones_with_characters(fft_values,
frequencies, low_frequencies, high_frequencies, dtmf_mapping)

# Creating a list of dictionaries for detected tones with characters  
detected_tones_data_with_characters = []  
for result in detected_tones_with_characters:  
timestamp, low_freq, high_freq, fft_freqs, intensities, character = result  
detected_tones_data_with_characters.append({  
'timestamp': timestamp,  
'low_freq': low_freq,  
'high_freq': high_freq,  
'fft_freq1': fft_freqs[0],  
'fft_freq2': fft_freqs[1],  
'intensity1': intensities[0],  
'intensity2': intensities[1],  
'character': character  
})

# Converting the list of dictionaries into a pandas DataFrame with characters  
detected_tones_df_with_characters =
pd.DataFrame(detected_tones_data_with_characters)

# Displaying the first 10 rows of the DataFrame with characters  
detected_tones_df_with_characters.head(10)

```

```python  
tones = detected_tones_df_with_characters  
```

```python  
tones = tones[(tones.intensity2 * tones.intensity1) > 1e12]  
```

```python  
tones.sample(10)  
```

```python  
tones  
```

```python  
tones['delta'] = ((tones['fft_freq1'] - tones['low_freq']).abs() *
(tones['fft_freq2'] - tones['high_freq']).abs())  
```

/tmp/ipykernel_2009135/824670098.py:1: SettingWithCopyWarning:  
A value is trying to be set on a copy of a slice from a DataFrame.  
Try using .loc[row_indexer,col_indexer] = value instead  
  
See the caveats in the documentation: https://pandas.pydata.org/pandas-
docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy  
tones['delta'] = ((tones['fft_freq1'] - tones['low_freq']).abs() *
(tones['fft_freq2'] - tones['high_freq']).abs())

```python

```

```python  
tones['delta'].plot()  
```

<Axes: >

  
![png](output_8_1.png)  

```python  
tones = tones[tones['delta'] < 10 ** 4]  
```

```python  
tones['character_prev'] = tones['character'].shift()  
```

/tmp/ipykernel_2009135/3924296832.py:1: SettingWithCopyWarning:  
A value is trying to be set on a copy of a slice from a DataFrame.  
Try using .loc[row_indexer,col_indexer] = value instead  
  
See the caveats in the documentation: https://pandas.pydata.org/pandas-
docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy  
tones['character_prev'] = tones['character'].shift()

```python  
tones_sequence = tones.copy()  
tones_sequence['sequence_index'] = (~(tones['character'] ==
tones['character_prev'])).cumsum()  
# [~(tones['character'] == tones['character_prev'])][['timestamp',
'character']]  
```

```python  
tones_sequence['count'] = 1  
```

```python  
tones_sequence = tones_sequence.groupby('sequence_index').agg({'count': 'sum',
'character': 'first', 'timestamp': 'first'})  
```

```python  
tones_sequence['count'] = (tones_sequence['count'] / 28).round().astype(int)  
```

```python  
tones_sequence['phrase_index'] = ((tones_sequence['timestamp'] -
tones_sequence['timestamp'].shift()) / 0.35 > 3).cumsum()  
```

```python  
tones_sequence = tones_sequence.groupby('phrase_index').agg({'character':
list, 'count': list})  
```

```python  
tones_sequence.apply(lambda x: ''.join([a * b for a, b in zip(*x)]), axis=1)  
```

phrase_index  
0 41323036267601217574  
1 36710992825315281347  
2 60924906937541136999  
3 02333  
dtype: object

```python  
value = int(''.join(tones_sequence.apply(lambda x: ''.join([a * b for a, b in
zip(*x)]), axis=1)))  
```

```python  
# Converting to hexadecimal  
hex_representation = hex(value)  
hex_representation

hex_string = hex_representation[2:] # Removing the '0x' prefix  
bytes_representation = bytes.fromhex(hex_string)  
bytes_representation.decode()  
```

'dsc{m0th3r_1s_m0th3r1ng_ts}'

```python

```  

Original writeup
(https://github.com/dremovd/ctf/blob/main/deconstruct/Mastermind%20(phone%20tones%20recognition).ipynb).