Merge pull request #90 from dmmiller612/bert-last-4

Added 2 new aggregate algorithms
dmmiller612 · Dec 23, 2020 · 78311a9 · 78311a9
2 parents c5777c6 + feab2df
commit 78311a9
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 6 deletions.
diff --git a/setup.py b/setup.py
@@ -2,13 +2,13 @@
 from setuptools import find_packages
 
 setup(name='bert-extractive-summarizer',
-      version='0.5.1',
+      version='0.5.2',
       description='Extractive Text Summarization with BERT',
       keywords = ['bert', 'pytorch', 'machine learning', 'deep learning', 'extractive summarization', 'summary'],
       long_description=open("README.md", "r", encoding='utf-8').read(),
       long_description_content_type="text/markdown",
       url='https://github.com/dmmiller612/bert-extractive-summarizer',
-      download_url='https://github.com/dmmiller612/bert-extractive-summarizer/archive/0.5.1.tar.gz',
+      download_url='https://github.com/dmmiller612/bert-extractive-summarizer/archive/0.5.2.tar.gz',
       author='Derek Miller',
       author_email='[email protected]',
       install_requires=['transformers', 'scikit-learn', 'spacy'],

diff --git a/summarizer/bert_parent.py b/summarizer/bert_parent.py
@@ -75,7 +75,7 @@ def extract_embeddings(
         :param hidden: The hidden layer to use for a readout handler
         :param squeeze: If we should squeeze the outputs (required for some layers)
         :param reduce_option: How we should reduce the items.
-        :return: A numpy array.
+        :return: A torch vector.
         """
 
         tokens_tensor = self.tokenize_input(text)
@@ -84,13 +84,22 @@ def extract_embeddings(
         if -1 > hidden > -12:
 
             if reduce_option == 'max':
-                pooled = hidden_states[hidden].max(dim=1)[0]
+                pooled = hidden_states[hidden].max(dim=1)[0].squeeze()
 
             elif reduce_option == 'median':
-                pooled = hidden_states[hidden].median(dim=1)[0]
+                pooled = hidden_states[hidden].median(dim=1)[0].squeeze()
+
+            elif reduce_option == 'concat_last_4':
+                last_4 = [hidden_states[i] for i in (-1, -2, -3, -4)]
+                cat_hidden_states = torch.cat(tuple(last_4), dim=-1)
+                pooled = torch.mean(cat_hidden_states, dim=1).squeeze()
+
+            elif reduce_option == 'reduce_last_4':
+                last_4 = [hidden_states[i] for i in (-1, -2, -3, -4)]
+                pooled = torch.cat(tuple(last_4), dim=1).mean(axis=1).squeeze()
 
             else:
-                pooled = hidden_states[hidden].mean(dim=1)
+                pooled = hidden_states[hidden].mean(dim=1).squeeze()
 
         return pooled