Fix stdout noise on auth breaking ls json output (#942)

* fix `ls --json` being corrupted by `Using https://...` msg * refactor ls json streaming * remove leftovers after ls json streaming lefactor
Backblaze · Nov 10, 2023 · 357b626 · 357b626
1 parent e46ece6
commit 357b626
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 58 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+* Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption
+
+### Changed
+* Stream `ls --json` JSON output instead of dumping it only after all objects have been fetched
+
 ## [3.12.0] - 2023-10-28
 
 ### Added

diff --git a/b2/console_tool.py b/b2/console_tool.py
@@ -685,23 +685,50 @@ def _parse_file_infos(cls, args_info):
             file_infos[parts[0]] = parts[1]
         return file_infos
 
-    def _print_json(self, data):
-        self._print(
+    def _print_json(self, data) -> None:
+        return self._print(
             json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder), enforce_output=True
         )
 
-    def _print(self, *args, enforce_output=False):
-        self._print_standard_descriptor(self.stdout, 'stdout', *args, enforce_output=enforce_output)
+    def _print(self, *args, enforce_output: bool = False, end: Optional[str] = None) -> None:
+        return self._print_standard_descriptor(
+            self.stdout, "stdout", *args, enforce_output=enforce_output, end=end
+        )
 
-    def _print_stderr(self, *args, **kwargs):
-        self._print_standard_descriptor(self.stderr, 'stderr', *args, enforce_output=True)
+    def _print_stderr(self, *args, end: Optional[str] = None) -> None:
+        return self._print_standard_descriptor(
+            self.stderr, "stderr", *args, enforce_output=True, end=end
+        )
 
-    def _print_standard_descriptor(self, descriptor, descriptor_name, *args, enforce_output=False):
+    def _print_standard_descriptor(
+        self,
+        descriptor,
+        descriptor_name: str,
+        *args,
+        enforce_output: bool = False,
+        end: Optional[str] = None,
+    ) -> None:
+        """
+        Prints to fd, unless quiet is set.
+
+        :param descriptor: file descriptor to print to
+        :param descriptor_name: name of the descriptor, used for error reporting
+        :param args: object to be printed
+        :param enforce_output: overrides quiet setting; Should not be used for anything other than data
+        :param end: end of the line characters; None for default newline
+        """
         if not self.quiet or enforce_output:
-            self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args)
+            self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args, end=end)
 
     @classmethod
-    def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
+    def _print_helper(
+        cls,
+        descriptor,
+        descriptor_encoding: str,
+        descriptor_name: str,
+        *args,
+        end: Optional[str] = None
+    ):
         try:
             descriptor.write(' '.join(args))
         except UnicodeEncodeError:
@@ -714,7 +741,7 @@ def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
             args = [arg.encode('ascii', 'backslashreplace').decode() for arg in args]
             sys.stderr.write("Trying to print: %s\n" % args)
             descriptor.write(' '.join(args))
-        descriptor.write('\n')
+        descriptor.write("\n" if end is None else end)
 
     def __str__(self):
         return f'{self.__class__.__module__}.{self.__class__.__name__}'
@@ -861,7 +888,7 @@ def authorize(self, application_key_id, application_key, realm):
         :return: exit status
         """
         url = REALM_URLS.get(realm, realm)
-        self._print('Using %s' % url)
+        self._print_stderr(f"Using {url}")
         try:
             self.api.authorize_account(realm, application_key_id, application_key)
 
@@ -1889,15 +1916,12 @@ def _setup_parser(cls, parser):
         parser.add_argument('folderName', nargs='?').completer = file_name_completer
         super()._setup_parser(parser)
 
-    def run(self, args):
-        super().run(args)
+    def _print_files(self, args):
         generator = self._get_ls_generator(args)
 
         for file_version, folder_name in generator:
             self._print_file_version(args, file_version, folder_name)
 
-        return 0
-
     def _print_file_version(
         self,
         args,
@@ -1991,17 +2015,19 @@ def _setup_parser(cls, parser):
         super()._setup_parser(parser)
 
     def run(self, args):
+        super().run(args)
         if args.json:
-            # TODO: Make this work for an infinite generation.
-            #   Use `_print_file_version` to print a single `file_version` and manage the external JSON list
-            #   e.g. manually. However, to do it right, some sort of state needs to be kept e.g. info whether
-            #   at least one element was written to the stream, so we can add a `,` on the start of another.
-            #   That would sadly lead to an ugly formatting, so `_print` needs to be expanded with an ability
-            #   to not print end-line character(s).
-            self._print_json([file_version for file_version, _ in self._get_ls_generator(args)])
-            return 0
-
-        return super().run(args)
+            i = -1
+            for i, (file_version, _) in enumerate(self._get_ls_generator(args)):
+                if i:
+                    self._print(',', end='')
+                else:
+                    self._print('[')
+                self._print_json(file_version)
+            self._print(']' if i >= 0 else '[]')
+        else:
+            self._print_files(args)
+        return 0
 
     def _print_file_version(
         self,
@@ -2215,9 +2241,10 @@ def _setup_parser(cls, parser):
         super()._setup_parser(parser)
 
     def run(self, args):
-        if args.dryRun:
-            return super().run(args)
         super().run(args)
+        if args.dryRun:
+            self._print_files(args)
+            return 0
         failed_on_any_file = False
         messages_queue = queue.Queue()
 

diff --git a/test/integration/helpers.py b/test/integration/helpers.py
@@ -337,6 +337,7 @@ def should_equal(expected, actual):
 class CommandLine:
 
     EXPECTED_STDERR_PATTERNS = [
+        re.compile(r'^Using https?://[\w.]+$'),  # account auth
         re.compile(r'.*B/s]$', re.DOTALL),  # progress bar
         re.compile(r'^\r?$'),  # empty line
         re.compile(

diff --git a/test/unit/test_console_tool.py b/test/unit/test_console_tool.py
@@ -84,7 +84,7 @@ def _run_command_ignore_output(self, argv):
             print('ACTUAL STDERR:  ', repr(actual_stderr))
             print(actual_stderr)
 
-        self.assertEqual('', actual_stderr, 'stderr')
+        assert re.match(r'^(|Using https?://[\w.]+)$', actual_stderr), f"stderr: {actual_stderr!r}"
         self.assertEqual(0, actual_status, 'exit status code')
 
     def _trim_leading_spaces(self, s):
@@ -253,11 +253,9 @@ def _upload_multiple_files(cls, bucket):
 
 class TestConsoleTool(BaseConsoleToolTest):
     def test_authorize_with_bad_key(self):
-        expected_stdout = '''
-        Using http://production.example.com
-        '''
-
+        expected_stdout = ''
         expected_stderr = '''
+        Using http://production.example.com
         ERROR: unable to authorize account: Invalid authorization token. Server said: secret key is wrong (unauthorized)
         '''
 
@@ -271,12 +269,12 @@ def test_authorize_with_good_key_using_hyphen(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://production.example.com
         """
 
         self._run_command(
-            ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
+            ['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0
         )
 
         # Auth token should be in account info now
@@ -287,12 +285,11 @@ def test_authorize_with_good_key_using_underscore(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://production.example.com
         """
-
         self._run_command(
-            ['authorize_account', self.account_id, self.master_key], expected_stdout, '', 0
+            ['authorize_account', self.account_id, self.master_key], '', expected_stderr, 0
         )
 
         # Auth token should be in account info now
@@ -303,7 +300,7 @@ def test_authorize_using_env_variables(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://production.example.com
         """
 
@@ -317,7 +314,7 @@ def test_authorize_using_env_variables(self):
             assert B2_APPLICATION_KEY_ID_ENV_VAR in os.environ
             assert B2_APPLICATION_KEY_ENV_VAR in os.environ
 
-            self._run_command(['authorize-account'], expected_stdout, '', 0)
+            self._run_command(['authorize-account'], '', expected_stderr, 0)
 
         # Auth token should be in account info now
         assert self.account_info.get_account_auth_token() is not None
@@ -327,7 +324,7 @@ def test_authorize_towards_custom_realm(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://custom.example.com
         """
 
@@ -336,13 +333,13 @@ def test_authorize_towards_custom_realm(self):
             [
                 'authorize-account', '--environment', 'http://custom.example.com', self.account_id,
                 self.master_key
-            ], expected_stdout, '', 0
+            ], '', expected_stderr, 0
         )
 
         # Auth token should be in account info now
         assert self.account_info.get_account_auth_token() is not None
 
-        expected_stdout = """
+        expected_stderr = """
         Using http://custom2.example.com
         """
         # realm provided with env var
@@ -352,7 +349,7 @@ def test_authorize_towards_custom_realm(self):
             }
         ):
             self._run_command(
-                ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
+                ['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0
             )
 
     def test_create_key_and_authorize_with_it(self):
@@ -370,8 +367,8 @@ def test_create_key_and_authorize_with_it(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
             '',
+            'Using http://production.example.com\n',
             0,
         )
 
@@ -394,9 +391,8 @@ def test_create_key_with_authorization_from_env_vars(self):
             # The first time we're running on this cache there will be output from the implicit "authorize-account" call
             self._run_command(
                 ['create-key', 'key1', 'listBuckets,listKeys'],
-                'Using http://production.example.com\n'
                 'appKeyId0 appKey0\n',
-                '',
+                'Using http://production.example.com\n',
                 0,
             )
 
@@ -417,9 +413,8 @@ def test_create_key_with_authorization_from_env_vars(self):
                 # "authorize-account" is called when the key changes
                 self._run_command(
                     ['create-key', 'key1', 'listBuckets,listKeys'],
-                    'Using http://production.example.com\n'
                     'appKeyId2 appKey2\n',
-                    '',
+                    'Using http://production.example.com\n',
                     0,
                 )
 
@@ -431,9 +426,8 @@ def test_create_key_with_authorization_from_env_vars(self):
                 ):
                     self._run_command(
                         ['create-key', 'key1', 'listBuckets,listKeys'],
-                        'Using http://custom.example.com\n'
                         'appKeyId3 appKey3\n',
-                        '',
+                        'Using http://custom.example.com\n',
                         0,
                     )
 
@@ -446,7 +440,8 @@ def test_authorize_key_without_list_buckets(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            'Using http://production.example.com\n'
             'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
             1,
         )
@@ -519,8 +514,8 @@ def test_create_bucket_key_and_authorize_with_it(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
             '',
+            'Using http://production.example.com\n',
             0,
         )
 
@@ -772,8 +767,8 @@ def test_keys(self):
 
         # authorize and make calls using application key with no restrictions
         self._run_command(
-            ['authorize-account', 'appKeyId0', 'appKey0'], 'Using http://production.example.com\n',
-            '', 0
+            ['authorize-account', 'appKeyId0', 'appKey0'], '',
+            'Using http://production.example.com\n', 0
         )
         self._run_command(
             ['list-buckets'],
@@ -798,8 +793,8 @@ def test_keys(self):
 
         # authorize and make calls using an application key with bucket restrictions
         self._run_command(
-            ['authorize-account', 'appKeyId1', 'appKey1'], 'Using http://production.example.com\n',
-            '', 0
+            ['authorize-account', 'appKeyId1', 'appKey1'], '',
+            'Using http://production.example.com\n', 0
         )
 
         self._run_command(
@@ -2351,7 +2346,8 @@ def test_list_buckets_not_allowed_for_app_key(self):
         # Authorize with the key, which should result in an error.
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            'Using http://production.example.com\n'
             'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
             1,
         )
@@ -2379,7 +2375,8 @@ def test_bucket_missing_for_bucket_key(self):
         # to be able to look up the name of the bucket.
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            "Using http://production.example.com\n"
             "ERROR: unable to authorize account: Application key is restricted to a bucket that doesn't exist\n",
             1,
         )
@@ -2739,6 +2736,8 @@ def test_rm_skipping_over_errors(self):
         '''
         self._run_command(['ls', '--recursive', 'my-bucket'], expected_stdout)
 
+
+class TestVersionConsoleTool(BaseConsoleToolTest):
     def test_version(self):
         self._run_command(['version', '--short'], expected_stdout=f'{VERSION}\n')
         self._run_command(['version'], expected_stdout=f'b2 command line tool, version {VERSION}\n')