Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
ac6765e
add text matching compression & delete regression
Apr 30, 2019
5cba830
modify the Tutorial.md
Apr 30, 2019
7876d64
modify the Tutorial.md
Apr 30, 2019
2e7982c
add teacher model name
Apr 30, 2019
3be3052
modify tutorial.md
Apr 30, 2019
3a3ff7c
Merge branch 'master' into dev/quanjia
May 5, 2019
fcb20ac
fix transform params2tensors problem
May 7, 2019
efb2270
Merge branch 'master' into dev/quanjia
May 7, 2019
3778da7
add softmax output layer for slot tagging
May 7, 2019
1da9927
add slot_tagging metrics
May 15, 2019
b42c747
Merge branch 'master' into dev/quanjia
May 15, 2019
44bfb43
Merge branch 'master' into dev/quanjia
May 15, 2019
53e7233
modify make word emb matrix
May 16, 2019
309921b
Delete dev.tsv
adolphk-yk May 16, 2019
cfad91e
Delete test.tsv
adolphk-yk May 16, 2019
de9b3b3
Delete train.tsv
adolphk-yk May 16, 2019
b8a34a1
delate conll data
May 16, 2019
385ec7a
Merge branch 'dev/quanjia' of https://github.com/Microsoft/NeuronBloc…
May 16, 2019
cf57b98
Update Contributing.md
boshining May 28, 2019
b18fe12
Update tools
ShiningBo May 28, 2019
0957603
Merge branch 'dev/shining' of github.com:microsoft/NeuronBlocks into …
ShiningBo May 28, 2019
3d9b3a0
Update README.md
boshining May 28, 2019
2897020
Update Contributing.md
boshining May 28, 2019
4fdcf3e
Update README.md
boshining May 28, 2019
861e6bd
Update autotest.sh
ShiningBo May 28, 2019
4d3c70f
update get_results.py
ShiningBo May 28, 2019
5c5f841
fix sequence tagging workflow
May 29, 2019
f7e122f
Merge branch 'master' into dev/quanjia
May 29, 2019
f1daf76
add model type judgement for optimizer
May 29, 2019
a0fd463
delete full atis dataset and unuseful config filee
May 29, 2019
a085554
add slot_tagging sample data
May 29, 2019
5579545
fix load embedding slow problem
May 29, 2019
9f98dd3
fix Char embedding CNN problem
May 30, 2019
a4e2644
Merge branch 'dev/quanjia' into dev/shining
ShiningBo May 30, 2019
9acd52d
add lower token when load embedding matrix
May 31, 2019
d22ed5b
Merge branch 'master' into dev/quanjia
Jun 5, 2019
31bca9e
add word level length for char emb
Jun 6, 2019
3143519
Update Conv
ShiningBo Jun 6, 2019
b430e9f
Merge branch 'dev/quanjia' into dev/shining
ShiningBo Jun 6, 2019
92cd783
merge quanjia
ShiningBo Jun 21, 2019
992070b
Merge branch 'master' into dev/shining
Jun 25, 2019
b715945
Add ARCI & ARCII module and Modify Conv block
Jul 11, 2019
10c8d8f
Merge branch 'master' into dev/shining
Jul 11, 2019
8ce3e7e
Update to the same as master
Jul 15, 2019
30ce22e
update Linear layer
Jul 15, 2019
6380f32
Add block - Calculate Distance of Two Vectors
Jul 18, 2019
c8e8d47
Merge branch 'master' into dev/shining
Jul 18, 2019
883785c
update tutorial_zh_CN
Jul 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Tutorial_zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ Question answer matching is a crucial subtask of the question answering problem,
CNN (NeuronBlocks) | 0.747
BiLSTM (NeuronBlocks) | 0.767
BiLSTM+Attn (NeuronBlocks) | 0.754
[ARC-I](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7508
[ARC-II](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7612
[MatchPyramid](https://arxiv.org/abs/1602.06359) (NeuronBlocks) | 0.763
BiLSTM+Match Attention (NeuronBlocks) | 0.786

Expand Down
5 changes: 4 additions & 1 deletion block_zoo/Conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def inference(self):

self.output_dim = [-1]
if self.input_dims[0][1] != -1:
self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
if self.padding_type == 'SAME':
self.output_dim.append(self.input_dims[0][1])
else:
self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
else:
self.output_dim.append(-1)
self.output_dim.append(self.output_channel_num)
Expand Down
14 changes: 12 additions & 2 deletions block_zoo/Linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def default(self):
self.activation = 'PReLU'
self.last_hidden_activation = True
self.last_hidden_softmax = False
self.keep_dim = True # for exmaple if the output shape is [?, len, 1]. you want to squeeze it, set keep_dim=False, the the output shape is [?, len]

@DocInherit
def declare(self):
Expand All @@ -42,10 +43,16 @@ def declare(self):
def inference(self):
if isinstance(self.hidden_dim, int):
self.output_dim = copy.deepcopy(self.input_dims[0])
self.output_dim[-1] = self.hidden_dim
if not self.keep_dim and self.hidden_dim == 1:
self.output_dim.pop()
else:
self.output_dim[-1] = self.hidden_dim
elif isinstance(self.hidden_dim, list):
self.output_dim = copy.deepcopy(self.input_dims[0])
self.output_dim[-1] = self.hidden_dim[-1]
if not self.keep_dim and self.hidden_dim[-1] == 1:
self.output_dim.pop()
else:
self.output_dim[-1] = self.hidden_dim[-1]

super(LinearConf, self).inference() # PUT THIS LINE AT THE END OF inference()

Expand Down Expand Up @@ -87,6 +94,7 @@ class Linear(BaseLayer):
def __init__(self, layer_conf):

super(Linear, self).__init__(layer_conf)
self.layer_conf = layer_conf

if layer_conf.input_ranks[0] == 3 and layer_conf.batch_norm is True:
layer_conf.batch_norm = False
Expand Down Expand Up @@ -139,6 +147,8 @@ def forward(self, string, string_len=None):
masks = masks.to(device)
string = string * masks
string_out = self.linear(string.float())
if not self.layer_conf.keep_dim:
string_out = torch.squeeze(string_out, -1)
return string_out, string_len


104 changes: 104 additions & 0 deletions block_zoo/Pooling1D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

from block_zoo.BaseLayer import BaseLayer, BaseConf
from utils.DocInherit import DocInherit


class Pooling1DConf(BaseConf):
"""

Args:
pool_type (str): 'max' or 'mean', default is 'max'.
stride (int): which axis to conduct pooling, default is 1.
padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
window_size (int): the size of the pooling

"""

def __init__(self, **kwargs):
super(Pooling1DConf, self).__init__(**kwargs)

@DocInherit
def default(self):
self.pool_type = 'max' # Supported: ['max', mean']
self.stride = 1
self.padding = 0
self.window_size = 3

@DocInherit
def declare(self):
self.num_of_inputs = 1
self.input_ranks = [3]


@DocInherit
def inference(self):

self.output_dim = [self.input_dims[0][0]]
if self.input_dims[0][1] != -1:
self.output_dim.append(
(self.input_dims[0][1] + 2 * self.padding - self.window_size) // self.stride + 1)
else:
self.output_dim.append(-1)

self.output_dim.append(self.input_dims[0][-1])
# DON'T MODIFY THIS
self.output_rank = len(self.output_dim)

@DocInherit
def verify(self):
super(Pooling1DConf, self).verify()

necessary_attrs_for_user = ['pool_type']
for attr in necessary_attrs_for_user:
self.add_attr_exist_assertion_for_user(attr)

self.add_attr_value_assertion('pool_type', ['max', 'mean'])

assert self.output_dim[
-1] != -1, "The shape of input is %s , and the input channel number of pooling should not be -1." % (
str(self.input_dims[0]))


class Pooling1D(BaseLayer):
""" Pooling layer

Args:
layer_conf (PoolingConf): configuration of a layer
"""

def __init__(self, layer_conf):
super(Pooling1D, self).__init__(layer_conf)
self.pool = None
if layer_conf.pool_type == "max":
self.pool = nn.MaxPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
padding=layer_conf.padding)
elif layer_conf.pool_type == "mean":
self.pool = nn.AvgPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
padding=layer_conf.padding)

def forward(self, string, string_len=None):
""" process inputs

Args:
string (Tensor): tensor with shape: [batch_size, length, feature_dim]
string_len (Tensor): [batch_size], default is None.

Returns:
Tensor: Pooling result of string

"""

string = string.permute([0, 2, 1]).contiguous()
string = self.pool(string)
string = string.permute([0, 2, 1]).contiguous()
return string, string_len


1 change: 0 additions & 1 deletion block_zoo/Pooling2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class Pooling2DConf(BaseConf):
stride (int): which axis to conduct pooling, default is 1.
padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
window_size (int): the size of the pooling
activation (string): activation functions, e.g. ReLU

"""
def __init__(self, **kwargs):
Expand Down
7 changes: 6 additions & 1 deletion block_zoo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from .Dropout import Dropout, DropoutConf

from .Conv2D import Conv2D, Conv2DConf
from .Pooling1D import Pooling1D, Pooling1DConf
from .Pooling2D import Pooling2D, Pooling2DConf

from .embedding import CNNCharEmbedding, CNNCharEmbeddingConf
from .embedding import LSTMCharEmbedding, LSTMCharEmbeddingConf

from .CRF import CRFConf, CRF

Expand Down Expand Up @@ -51,4 +53,7 @@

from .normalizations import LayerNorm, LayerNormConf

from .HighwayLinear import HighwayLinear, HighwayLinearConf
from .HighwayLinear import HighwayLinear, HighwayLinearConf

from .Gating import Gating, GatingConf
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about this?

from .HistogramMapping import HistogramMapping, HistogramMappingConf
3 changes: 1 addition & 2 deletions block_zoo/embedding/LSTMCharEmbedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def forward(self, string):
'input_ranks': [3],
'use_gpu': True
}
layer_conf = CNNCharEmbeddingConf(**conf)
layer_conf = LSTMCharEmbeddingConf(**conf)

# make a fake input: [bs, seq_len, char num in words]
# assume in this batch, the padded sentence length is 3 and the each word has 5 chars, including padding 0.
Expand All @@ -135,4 +135,3 @@ def forward(self, string):
print(output)



97 changes: 97 additions & 0 deletions block_zoo/op/CalculateDistance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

import torch
import torch.nn as nn
import torch.nn.functional as F
import logging

from ..BaseLayer import BaseConf, BaseLayer
from utils.DocInherit import DocInherit
from utils.exceptions import ConfigurationError
import copy


class CalculateDistanceConf(BaseConf):
""" Configuration of CalculateDistance Layer

Args:
operations (list): a subset of ["cos", "euclidean", "manhattan", "chebyshev"].
"""

# init the args
def __init__(self, **kwargs):
super(CalculateDistanceConf, self).__init__(**kwargs)

# set default params
@DocInherit
def default(self):
self.operations = ["cos", "euclidean", "manhattan", "chebyshev"]

@DocInherit
def declare(self):
self.num_of_inputs = 2
self.input_ranks = [2]

@DocInherit
def inference(self):
self.output_dim = copy.deepcopy(self.input_dims[0])
self.output_dim[-1] = 1

super(CalculateDistanceConf, self).inference()

@DocInherit
def verify(self):
super(CalculateDistanceConf, self).verify()

assert len(self.input_dims) == 2, "Operation requires that there should be two inputs"

# to check if the ranks of all the inputs are equal
rank_equal_flag = True
for i in range(len(self.input_ranks)):
if self.input_ranks[i] != self.input_ranks[0] or self.input_ranks[i] != 2:
rank_equal_flag = False
break
if rank_equal_flag == False:
raise ConfigurationError("For layer CalculateDistance, the ranks of each inputs should be equal and 2!")


class CalculateDistance(BaseLayer):
""" CalculateDistance layer to calculate the distance of sequences(2D representation)

Args:
layer_conf (CalculateDistanceConf): configuration of a layer
"""

def __init__(self, layer_conf):
super(CalculateDistance, self).__init__(layer_conf)
self.layer_conf = layer_conf


def forward(self, x, x_len, y, y_len):
"""

Args:
x: [batch_size, dim]
x_len: [batch_size]
y: [batch_size, dim]
y_len: [batch_size]
Returns:
Tensor: [batch_size, 1], None

"""

batch_size = x.size()[0]
if "cos" in self.layer_conf.operations:
result = F.cosine_similarity(x , y)
elif "euclidean" in self.layer_conf.operations:
result = torch.sqrt(torch.sum((x-y)**2, dim=1))
elif "manhattan" in self.layer_conf.operations:
result = torch.sum(torch.abs((x - y)), dim=1)
elif "chebyshev" in self.layer_conf.operations:
result = torch.abs((x - y)).max(dim=1)
else:
raise ConfigurationError("This operation is not supported!")

result = result.view(batch_size, 1)
return result, None
1 change: 0 additions & 1 deletion block_zoo/op/Combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def inference(self):
self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims])) # difference operation requires dimension of all the inputs should be equal
if "dot_multiply" in self.operations:
self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims])) # dot_multiply operation requires dimension of all the inputs should be equal

super(CombinationConf, self).inference()

@DocInherit
Expand Down
76 changes: 76 additions & 0 deletions block_zoo/op/Expand_plus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

# Come from http://www.hangli-hl.com/uploads/3/1/6/8/3168008/hu-etal-nips2014.pdf [ARC-II]

import torch
import torch.nn as nn
import copy

from block_zoo.BaseLayer import BaseLayer, BaseConf
from utils.DocInherit import DocInherit
from utils.exceptions import ConfigurationError

class Expand_plusConf(BaseConf):
"""Configuration for Expand_plus layer

"""
def __init__(self, **kwargs):
super(Expand_plusConf, self).__init__(**kwargs)

@DocInherit
def default(self):
self.operation = 'Plus'

@DocInherit
def declare(self):
self.num_of_inputs = 2
self.input_ranks = [3, 3]

@DocInherit
def inference(self):
self.output_dim = copy.deepcopy(self.input_dims[0])
if self.input_dims[0][1] == -1 or self.input_dims[1][1] == -1:
raise ConfigurationError("For Expand_plus layer, the sequence length should be fixed")
self.output_dim.insert(2, self.input_dims[1][1]) # y_len
super(Expand_plusConf, self).inference() # PUT THIS LINE AT THE END OF inference()

@DocInherit
def verify(self):
super(Expand_plusConf, self).verify()


class Expand_plus(BaseLayer):
""" Expand_plus layer
Given sequences X and Y, put X and Y expand_dim, and then add.

Args:
layer_conf (Expand_plusConf): configuration of a layer

"""
def __init__(self, layer_conf):

super(Expand_plus, self).__init__(layer_conf)
assert layer_conf.input_dims[0][-1] == layer_conf.input_dims[1][-1]


def forward(self, x, x_len, y, y_len):
"""

Args:
x: [batch_size, x_max_len, dim].
x_len: [batch_size], default is None.
y: [batch_size, y_max_len, dim].
y_len: [batch_size], default is None.

Returns:
output: batch_size, x_max_len, y_max_len, dim].

"""

x_new = torch.stack([x]*y.size()[1], 2) # [batch_size, x_max_len, y_max_len, dim]
y_new = torch.stack([y]*x.size()[1], 1) # [batch_size, x_max_len, y_max_len, dim]

return x_new + y_new, None


Loading