环境:ubuntu14.04+python3.5+anaconda+midi
我在GitHub看到了一个使用RNN生成经典音乐的项目:biaxial-rnn-music-composition,它是基于Theano的。本帖改为使用TensorFlow生成音乐,代码逻辑在很大程度上基于前者。
https://deeplearning4j.org/restrictedboltzmannmachine.html
https://magenta.tensorflow.org/2016/06/10/recurrent-neural-network-generation-tutorial/
https://deepmind.com/blog/wavenet-generative-model-raw-audio
Google的项目Magenta:生成音乐、绘画或视频
http://www.hexahedria.com/2015/08/03/composing-music-with-recurrent-neural-networks/
数据集:首先准备一些MIDI音乐,可以去freemidi.org下载。
另一个关于音乐的数据集 MusicNet
我下载了50多个MIDI文件(貌似有点少)。
有了MIDI音乐,我们还需要一个可以操作MIDI的Python库:python-midi。
安装python-midi:
对于python2
git clone https://github.com/vishnubob/python-midi
cd python-midi
# $ git checkout feature/python3 # 如果使用Python3,checkout对应分支
python setup.py install
对于python3
git clone https://github.com/louisabraham/python3-midi
cd python-midi
# $ git checkout feature/python3 # 如果使用Python3,checkout对应分支
python setup.py install
tensorflow while_loop用法:
# 代码取自Stack OverFlow
import tensorflow as tf
import numpy as np
def body(x):
a = tf.random_uniform(shape=[2, 2], dtype=tf.int32, maxval=100)
b = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.int32)
c = a + b
return tf.nn.relu(x + c)
def condition(x):
return tf.reduce_sum(x) < 100
x = tf.Variable(tf.constant(0, shape=[2, 2]))
with tf.Session():
tf.initialize_all_variables().run()
result = tf.while_loop(condition, body, [x])
print(result.eval())
TensorFlow生成mid音乐完整代码(已修改):
import tensorflow as tf
import midi
import numpy as np
import os
import io as stringIOModule
lower_bound = 24
upper_bound = 102
span = upper_bound - lower_bound
# midi文件转Note(音符)
def midiToNoteStateMatrix(midi_file_path, squash=True, span=span):
pattern = midi.read_midifile(midi_file_path)
time_left = []
for track in pattern:
time_left.append(track[0].tick)
posns = [0 for track in pattern]
statematrix = []
time = 0
state = [[0,0] for x in range(span)]
statematrix.append(state)
condition = True
while condition:
if time % (pattern.resolution / 4) == (pattern.resolution / 8):
oldstate = state
state = [[oldstate[x][0],0] for x in range(span)]
statematrix.append(state)
for i in range(len(time_left)):
if not condition:
break
while time_left[i] == 0:
track = pattern[i]
pos = posns[i]
evt = track[pos]
if isinstance(evt, midi.NoteEvent):
if (evt.pitch < lower_bound) or (evt.pitch >= upper_bound):
pass
else:
if isinstance(evt, midi.NoteOffEvent) or evt.velocity == 0:
state[evt.pitch-lower_bound] = [0, 0]
else:
state[evt.pitch-lower_bound] = [1, 1]
elif isinstance(evt, midi.TimeSignatureEvent):
if evt.numerator not in (2, 4):
out = statematrix
condition = False
break
try:
time_left[i] = track[pos + 1].tick
posns[i] += 1
except IndexError:
time_left[i] = None
if time_left[i] is not None:
time_left[i] -= 1
if all(t is None for t in time_left):
break
time += 1
S = np.array(statematrix)
statematrix = np.hstack((S[:, :, 0], S[:, :, 1]))
statematrix = np.asarray(statematrix).tolist()
return statematrix
# Note转midi文件
def noteStateMatrixToMidi(statematrix, filename="output_file", span=span):
statematrix = np.array(statematrix)
if not len(statematrix.shape) == 3:
statematrix = np.dstack((statematrix[:, :span], statematrix[:, span:]))
statematrix = np.asarray(statematrix)
pattern = midi.Pattern()
track = midi.Track()
pattern.append(track)
span = upper_bound-lower_bound
tickscale = 55
lastcmdtime = 0
prevstate = [[0,0] for x in range(span)]
for time, state in enumerate(statematrix + [prevstate[:]]):
offNotes = []
onNotes = []
for i in range(span):
n = state[i]
p = prevstate[i]
if p[0] == 1:
if n[0] == 0:
offNotes.append(i)
elif n[1] == 1:
offNotes.append(i)
onNotes.append(i)
elif n[0] == 1:
onNotes.append(i)
for note in offNotes:
track.append(midi.NoteOffEvent(tick=(time-lastcmdtime)*tickscale, pitch=note+lower_bound))
lastcmdtime = time
for note in onNotes:
track.append(midi.NoteOnEvent(tick=(time-lastcmdtime)*tickscale, velocity=40, pitch=note+lower_bound))
lastcmdtime = time
prevstate = state
eot = midi.EndOfTrackEvent(tick=1)
track.append(eot)
midi.write_midifile("{}.mid".format(filename), pattern)
# 读取midi数据
def get_songs(midi_path):
files = os.listdir(midi_path)
songs = []
for f in files:
f = midi_path+'/'+f
print('加载:', f)
try:
song = np.array(midiToNoteStateMatrix(f))
if np.array(song).shape[0] > 64:
songs.append(song)
except Exception as e:
print('数据无效: ', e)
print("读取的有效midi文件个数: ", len(songs))
return songs
# midi目录中包含了下载的midi文件
songs = get_songs('midi')
note_range = upper_bound - lower_bound
# 音乐长度
n_timesteps = 128
n_input = 2 * note_range * n_timesteps
n_hidden = 64
X = tf.placeholder(tf.float32, [None, n_input])
W = None
bh = None
bv = None
def sample(probs):
return tf.floor(probs + tf.random_uniform(tf.shape(probs), 0, 1))
def gibbs_sample(k):
def body(count, k, xk):
hk = sample(tf.sigmoid(tf.matmul(xk, W) + bh))
xk = sample(tf.sigmoid(tf.matmul(hk, tf.transpose(W)) + bv))
return count+1, k, xk
count = tf.constant(0)
def condition(count, k, xk):
return count < k
[_, _, x_sample] = tf.while_loop(condition, body, [count, tf.constant(k), X])
x_sample = tf.stop_gradient(x_sample)
return x_sample
#定义神经网络
def neural_network():
global W
W = tf.Variable(tf.random_normal([n_input, n_hidden], 0.01))
global bh
bh = tf.Variable(tf.zeros([1, n_hidden], tf.float32))
global bv
bv = tf.Variable(tf.zeros([1, n_input], tf.float32))
x_sample = gibbs_sample(1)
h = sample(tf.sigmoid(tf.matmul(X, W) + bh))
h_sample = sample(tf.sigmoid(tf.matmul(x_sample, W) + bh))
learning_rate = tf.constant(0.005, tf.float32)
size_bt = tf.cast(tf.shape(X)[0], tf.float32)
W_adder = tf.multiply(learning_rate/size_bt, tf.subtract(tf.matmul(tf.transpose(X), h), tf.matmul(tf.transpose(x_sample), h_sample)))
bv_adder = tf.multiply(learning_rate/size_bt, tf.reduce_sum(tf.subtract(X, x_sample), 0, True))
bh_adder = tf.multiply(learning_rate/size_bt, tf.reduce_sum(tf.subtract(h, h_sample), 0, True))
update = [W.assign_add(W_adder), bv.assign_add(bv_adder), bh.assign_add(bh_adder)]
return update
# 训练神经网络
def train_neural_network():
update = neural_network()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver(tf.all_variables())
epochs = 256
batch_size = 64
for epoch in range(epochs):
for song in songs:
song = np.array(song)
song = song[:int(np.floor(song.shape[0]/n_timesteps) * n_timesteps)]
song = np.reshape(song, [song.shape[0]//n_timesteps, song.shape[1] * n_timesteps])
for i in range(1, len(song), batch_size):
train_x = song[i:i+batch_size]
sess.run(update, feed_dict={X: train_x})
print(epoch)
# 保存模型
if epoch == epochs - 1:
saver.save(sess, 'midi.module')
# 生成midi
sample = gibbs_sample(1).eval(session=sess, feed_dict={X: np.zeros((1, n_input))})
S = np.reshape(sample[0,:], (n_timesteps, 2 * note_range))
noteStateMatrixToMidi(S, "auto_gen_music")
print('生成auto_gen_music.mid文件')
train_neural_network()
生成的mid音乐:auto_gen_music
音乐分类:https://github.com/despoisj/DeepAudioClassification
原文链接:http://blog.topspeedsnail.com/archives/10508