From 987b70317af3b8983eeab3b3febe1b782cd52e57 Mon Sep 17 00:00:00 2001 From: calebchiam Date: Sat, 30 May 2020 12:34:10 +0800 Subject: [PATCH] support for compressed and uncompressed arrays --- convokit/paired_prediction/util.py | 9 +++++++-- convokit/text_processing/textProcessor.py | 8 ++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/convokit/paired_prediction/util.py b/convokit/paired_prediction/util.py index e2ff6c77..3442924f 100644 --- a/convokit/paired_prediction/util.py +++ b/convokit/paired_prediction/util.py @@ -1,7 +1,7 @@ from random import choice, shuffle from pandas import DataFrame import numpy as np -from scipy.sparse import csr_matrix, vstack +from scipy.sparse import csr_matrix, vstack, issparse from convokit.classifier.util import extract_feats_from_obj @@ -36,7 +36,12 @@ def generate_bow_paired_X_y(pair_orientation_feat_name, pair_id_to_objs, vector_ X.append(diff) - return vstack(X), np.array(y) + if issparse(X[0]): # for csr_matrix + X = vstack(X) + else: # for non-compressed numpy arrays + X = np.vstack(X) + + return X, np.array(y) def generate_paired_X_y(pred_feats, pair_orientation_feat_name, pair_id_to_objs): diff --git a/convokit/text_processing/textProcessor.py b/convokit/text_processing/textProcessor.py index 34530eb4..6581bf16 100644 --- a/convokit/text_processing/textProcessor.py +++ b/convokit/text_processing/textProcessor.py @@ -66,11 +66,11 @@ def transform(self, corpus: Corpus) -> Corpus: def transform_utterance(self, utt, override_input_filter=False): """ - Computes per-utterance attributes of an individual utterance or string. For utterances which do not contain all of the `input_field` attributes as specified in the constructor, or for utterances which return `False` on `input_filter`, this call will not annotate the utterance. For strings, will convert the string to an utterance and return the utterance, annotating it if `input_field` is not set to `None` at initialization. + Computes per-utterance attributes of an individual utterance or string. For utterances which do not contain all of the `input_field` attributes as specified in the constructor, or for utterances which return `False` on `input_filter`, this call will not annotate the utterance. For strings, will convert the string to an utterance and return the utterance, annotating it if `input_field` is not set to `None` at initialization. - :param utt: utterance or a string - :param override_input_filter: ignore `input_filter` and compute attribute for all utterances - :return: the utterance + :param utt: utterance or a string + :param override_input_filter: ignore `input_filter` and compute attribute for all utterances + :return: the utterance """ if isinstance(utt, str):