diff --git a/inference_sameRes_multiplespeakers.py b/inference_sameRes_multiplespeakers.py
new file mode 100644
index 0000000000000000000000000000000000000000..bab35b9f69a2d37da2d0e064dac090772881af23
--- /dev/null
+++ b/inference_sameRes_multiplespeakers.py
@@ -0,0 +1,153 @@
+###############################################################################
+#
+#  Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+###############################################################################
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pylab as plt
+
+import os
+import argparse
+import json
+import sys
+import numpy as np
+import torch
+
+
+from flowtron import Flowtron
+from torch.utils.data import DataLoader
+from data import Data
+from train import update_params
+
+sys.path.insert(0, "tacotron2")
+sys.path.insert(0, "tacotron2/waveglow")
+from glow import WaveGlow
+from scipy.io.wavfile import write
+from denoiser import Denoiser
+
+def infer(flowtron_path, waveglow_path, output_dir, text, speaker_id, n_frames,
+          sigma, gate_threshold, seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+
+    # load waveglow
+    waveglow = torch.load(waveglow_path)['model'].cuda().eval()
+    waveglow.cuda().half()
+    for k in waveglow.convinv:
+        k.float()
+    waveglow.eval()
+    denoiser = Denoiser(waveglow)
+
+    # load flowtron
+    model = Flowtron(**model_config).cuda()
+    pretrained_dict = torch.load(flowtron_path, map_location='cpu')
+    if 'model' in pretrained_dict:
+        state_dict = pretrained_dict['model'].state_dict()
+    else:
+        state_dict = pretrained_dict['state_dict']
+    
+    #state_dict = torch.load(flowtron_path, map_location='cpu')['state_dict']
+    model.load_state_dict(state_dict)
+    model.eval()
+    print("Loaded checkpoint '{}')" .format(flowtron_path))
+
+    ignore_keys = ['training_files', 'validation_files']
+    trainset = Data(
+        data_config['training_files'],
+        **dict((k, v) for k, v in data_config.items() if k not in ignore_keys))
+    
+    text = trainset.get_text(text).cuda()
+    text = text[None]
+
+    #residual = torch.cuda.FloatTensor(1, 80, n_frames).normal_() * sigma
+    residual = torch.cuda.FloatTensor((torch.zeros(1, 80, n_frames)+0.1).cuda())
+    
+    for speaker_id in [14,34]:
+      speaker_vecs = trainset.get_speaker_id(speaker_id).cuda()
+      print (speaker_vecs.cpu())
+      speaker_vecs = speaker_vecs[None]
+      with torch.no_grad():
+        mels, attentions = model.infer(
+            residual, speaker_vecs, text, gate_threshold=gate_threshold)
+
+    
+
+    #with torch.no_grad():
+        audio = waveglow.infer(mels.half(), sigma=0.8).float()
+        audio_denoised = denoiser(audio, strength=0.1)[:, 0]
+      for k in range(len(attentions)):
+        attention = torch.cat(attentions[k]).cpu().numpy()
+        fig, axes = plt.subplots(1, 2, figsize=(16, 4))
+        axes[0].imshow(mels[0].cpu().numpy(), origin='lower', aspect='auto')
+        axes[1].imshow(attention[:, 0].transpose(), origin='lower', aspect='auto')
+        fig.savefig(os.path.join(output_dir, 'sid{}_sigma{}_attnlayer{}.png'.format(str(speaker_id).zfill(2), sigma, k)))
+        plt.close("all")
+
+
+      #audio = audio.cpu().numpy()[0]
+        # normalize audio for now
+      #audio = audio / np.abs(audio).max()
+      print(audio.shape[1])
+    
+      #write(os.path.join(output_dir, 'sid{}_sigma{}.wav'.format(speaker_id, sigma)),
+      #        data_config['sampling_rate'], audio)
+      audio_denoised = audio_denoised.cpu().numpy()[0]
+        # normalize audio for now
+      audio_denoised = audio_denoised / np.abs(audio_denoised).max()
+          
+      write(os.path.join(output_dir, 'sid{}_sigma{}_denoised.wav'.format(str(speaker_id).zfill(2), sigma)),data_config['sampling_rate'], audio_denoised)
+             
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-c', '--config', type=str,
+                        help='JSON file for configuration')
+    parser.add_argument('-p', '--params', nargs='+', default=[])
+    parser.add_argument('-f', '--flowtron_path',
+                        help='Path to flowtron state dict', type=str)
+    parser.add_argument('-w', '--waveglow_path',
+                        help='Path to waveglow state dict', type=str)
+    parser.add_argument('-t', '--text', help='Text to synthesize', type=str)
+    parser.add_argument('-i', '--id', help='Speaker id', type=int)
+    parser.add_argument('-n', '--n_frames', help='Number of frames',
+                        default=400, type=int)
+    parser.add_argument('-o', "--output_dir", default="results/")
+    parser.add_argument("-s", "--sigma", default=0.5, type=float)
+    parser.add_argument("-g", "--gate", default=0.5, type=float)
+    parser.add_argument("--seed", default=1234, type=int)
+    args = parser.parse_args()
+
+    # Parse configs.  Globals nicer in this case
+    with open(args.config) as f:
+        data = f.read()
+
+    global config
+    config = json.loads(data)
+    update_params(config, args.params)
+
+    data_config = config["data_config"]
+    global model_config
+    model_config = config["model_config"]
+
+    # Make directory if it doesn't exist
+    if not os.path.isdir(args.output_dir):
+        os.makedirs(args.output_dir)
+        os.chmod(args.output_dir, 0o775)
+
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = False
+    infer(args.flowtron_path, args.waveglow_path, args.output_dir, args.text,
+          args.id, args.n_frames, args.sigma, args.gate, args.seed)
diff --git a/inference_style_transfer.py b/inference_style_transfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..861ce74569aea985b5574b640c29d3834b003c29
--- /dev/null
+++ b/inference_style_transfer.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ## Flowtron Style Transfer Demo
+
+# #### Import libraries and setup matplotlib
+
+# In[1]:
+
+
+#get_ipython().run_line_magic('matplotlib', 'inline')
+import matplotlib
+import matplotlib.pylab as plt
+import IPython.display as ipd
+import numpy as np
+import json
+import sys,os
+import torch
+from torch.distributions import Normal
+
+from scipy.io.wavfile import write
+from flowtron import Flowtron
+from data import Data
+from train import update_params
+sys.path.insert(0, "tacotron2")
+sys.path.insert(0, "tacotron2/waveglow")
+from denoiser import Denoiser
+
+
+# #### Load Flowtron
+
+# In[2]:
+
+
+config_path = "config_SWARA_ALL_noSIL_noSPK.json"
+params = ["model_config.dummy_speaker_embedding=0",
+          "data_config.p_arpabet=1.0"]
+
+with open(config_path) as f:
+    data = f.read()
+
+config = json.loads(data)
+update_params(config, params)
+
+data_config = config["data_config"]
+model_config = config["model_config"]
+
+
+# In[3]:
+
+
+model_path = "outdir_Flowtron2021_SWARA_ALL_fromMaraTacotron2_noSPK/model_5900000"
+
+pretrained_dict = torch.load(model_path, map_location='cpu')
+if 'model' in pretrained_dict:
+        #pretrained_dict = pretrained_dict['model'].state_dict()
+        state_dict = torch.load(model_path, map_location='cpu')['model'].state_dict()
+else:
+        state_dict = torch.load(model_path, map_location='cpu')['state_dict']
+ #       pretrained_dict = pretrained_dict['state_dict']
+
+
+#state_dict = torch.load(model_path, map_location='cpu')['state_dict']
+model = Flowtron(**model_config)
+model.load_state_dict(state_dict)
+_ = model.eval().cuda()
+
+
+# #### Load WaveGlow
+
+# In[4]:
+
+
+waveglow_path = '../../NVIDIA/0_MODELS/EN/waveglow-models/waveglow_256channels_v4.pt'
+waveglow = torch.load(waveglow_path)['model']
+_ = waveglow.eval().cuda()
+denoiser = Denoiser(waveglow).cuda().eval()
+
+
+# #### Download samples with surprised style and unzip them in the 'data' folder
+# [Surprised samples](https://drive.google.com/file/d/100YJu80Y-k5katrwzzE6rFoEHJ2rLmkc/view?usp=sharing) https://drive.google.com/file/d/100YJu80Y-k5katrwzzE6rFoEHJ2rLmkc/view?usp=sharing
+
+# #### Prepare the dataloader
+
+# In[5]:
+
+
+dataset_path = 'filelists/eme_data.txt'
+dataset = Data(
+    dataset_path,
+    **dict((k, v) for k, v in data_config.items() if k not in ['training_files', 'validation_files']))
+
+
+# #### Collect z values
+
+# In[6]:
+
+
+z_values = []
+force_speaker_id =-1
+for i in range(len(dataset)):
+    mel, sid, text, attn_prior = dataset[i]
+    mel, sid, text = mel[None].cuda(), sid.cuda(), text[None].cuda()
+    if force_speaker_id > -1:
+        sid = sid * 0 + force_speaker_id
+    in_lens = torch.LongTensor([text.shape[1]]).cuda()
+    out_lens = torch.LongTensor([mel.shape[2]]).cuda()
+    with torch.no_grad():
+        z = model(mel, sid, text, in_lens, out_lens)[0]
+        z_values.append(z.permute(1, 2, 0))
+
+
+# #### Compute the posterior distribution
+
+# In[7]:
+
+
+lambd = 0.0001
+sigma = 0.5
+n_frames = 300
+aggregation_type = 'batch'
+
+if aggregation_type == 'time_and_batch':
+    z_mean = torch.cat([z.mean(dim=2) for z in z_values])
+    z_mean = torch.mean(z_mean, dim=0)[:, None]
+    ratio = len(z_values) / lambd
+    mu_posterior = (ratio * z_mean / (ratio + 1))
+elif aggregation_type == 'batch':    
+    for k in range(len(z_values)):
+        expand = z_values[k]
+        while expand.size(2) < n_frames:
+            expand = torch.cat((expand, z_values[k]), 2)
+        z_values[k] = expand[:, :, :n_frames]
+
+    z_mean = torch.mean(torch.cat(z_values, dim=0), dim=0)[None]
+    z_mean_size = z_mean.size()
+    z_mean = z_mean.flatten()
+    ratio = len(z_values) / float(lambd)
+    mu_posterior = (ratio * z_mean / (ratio + 1)).flatten()
+    mu_posterior = mu_posterior.view(80, -1)
+
+print(ratio)
+dist = Normal(mu_posterior.cpu(), sigma)
+
+
+# In[8]:
+
+
+z_baseline = torch.FloatTensor(1, 80, n_frames).cuda().normal_() * sigma
+if aggregation_type == 'time_and_batch':
+    z_posterior = dist.sample([n_frames]).permute(2,1,0).cuda()
+elif aggregation_type == 'batch':
+    z_posterior = dist.sample().view(1, 80, -1)[..., :n_frames].cuda()
+
+
+# In[9]:
+
+
+text = "De ce e mai râioasă capra, de aia stă cu coada mai sus."
+text_encoded = dataset.get_text(text).cuda()[None]
+
+
+# #### Perform inference sampling the posterior and a standard gaussian baseline
+
+# In[10]:
+
+
+speaker = 0
+speaker_id = torch.LongTensor([speaker]).cuda()
+with torch.no_grad():
+    mel_posterior = model.infer(z_posterior, speaker_id, text_encoded)[0]
+    mel_baseline = model.infer(z_baseline, speaker_id, text_encoded)[0]
+
+
+# In[11]:
+
+
+fig, axes = plt.subplots(2, 2, figsize=(16, 6))
+axes[0, 0].imshow(mel_posterior[0].cpu(), aspect='auto', origin='lower', interpolation='none')
+im = axes[0, 1].imshow(z_posterior[0].cpu(), aspect='auto', origin='lower', interpolation='none')
+plt.colorbar(im, ax=axes[0, 1])
+axes[1, 0].imshow(mel_baseline[0].cpu(), aspect='auto', origin='lower', interpolation='none')
+im = axes[1, 1].imshow(z_baseline[0].cpu(), aspect='auto', origin='lower', interpolation='none')
+plt.colorbar(im, ax=axes[1, 1])
+
+
+
+output_dir = 'results/'
+
+# #### Posterior sample
+
+with torch.no_grad():
+    #audio = #denoiser(waveglow.infer(mel_posterior, sigma=0.8), 0.001)
+    audio = waveglow.infer(mel_posterior, sigma=0.8)
+audio = audio.cpu().numpy()[0]
+audio = np.transpose(audio / np.abs(audio).max())
+print(audio.shape)        
+
+
+write(os.path.join(output_dir, 'sid{}_sigma{}-posterior.wav'.format(str(speaker).zfill(2), sigma)),data_config['sampling_rate'], audio)
+
+# #### Baseline sample
+with torch.no_grad():
+#    audio = denoiser(waveglow.infer(mel_baseline, sigma=0.8), 0.001)
+    audio = waveglow.infer(mel_baseline, sigma=0.8)
+
+audio = audio.cpu().numpy()[0]
+audio = np.transpose(audio / np.abs(audio).max())
+print(audio.shape)         
+write(os.path.join(output_dir, 'sid{}_sigma{}-baseline.wav'.format(str(speaker).zfill(2), sigma)),data_config['sampling_rate'], audio)
+