diff --git a/backend/helpers/provision.py b/backend/helpers/provision.py index 9575138e6894f1206651bed144cb3bf9ef200e89..001b75f7787ea17ae1a9a7f598759c83f922ad70 100644 --- a/backend/helpers/provision.py +++ b/backend/helpers/provision.py @@ -276,81 +276,97 @@ class Provision: db.session.commit() return - def _get_existing_users(self, app): + def _scim_list_users(self, app): logging.info(f"Info: Getting list of current users from {app.slug} via SCIM.") - url = f"{app.scim_url}/Users" - scim_headers = { - 'Authorization': 'Bearer ' + app.scim_token - } - response = requests.get(url, headers=scim_headers) - logging.info(f"SCIM http status: {response.status_code}") - try: - response_json = response.json() - except json.decoder.JSONDecodeError as e: - logging.info("SCIM result was not json") - logging.info(response.content) - raise ProvisionError(f"Failed to get existing users from {app.slug}") - logging.info(f"All existing users for {app.slug}: {response_json}") + # SCIM prescribes a 1-based index. + startIndex = 1 + # Get this many items per request. The application might further reduce + # this number so we can't count on it. + count = 100 + users = [] + # Track how many users we've received thus far so we know when to stop. + running_total = 0 + while True: + url = f"{app.scim_url}Users?count={count}&startIndex={startIndex}" + scim_headers = { + 'Authorization': 'Bearer ' + app.scim_token + } + response = requests.get(url, headers=scim_headers) + logging.info(f"SCIM http status: {response.status_code}") + try: + response_json = response.json() + except json.decoder.JSONDecodeError as e: + logging.info("SCIM result was not json") + logging.info(response.content) + raise ProvisionError(f"Failed to get existing users from {app.slug}") + new_users = response_json['Resources'] + users = users + new_users + added = len(new_users) + running_total = running_total + added + if running_total == response_json['totalResults'] or added == 0: + # We've got them all. + logging.info(f"All existing users for {app.slug}: {users}") + return users + else: + startIndex = startIndex + added + + def _get_existing_users(self, app): + scim_users = self._scim_list_users(app) # Make a dictionary of the users, using their externalId as key, which # is the kratos user ID. users = {} - for u in response_json['Resources']: + for u in scim_users: kratos_id = u.get('externalId') if not kratos_id: logging.info(f"Got user without externalId: {u}") - # Users that were created in Nextcloud by SSO, before SCIM was - # introduced in Stackspin, will not have `externalId` set, so - # we get the Kratos ID from the `id` attribute instead. - if app.slug == 'nextcloud' and u['id'].startswith('stackspin-'): - kratos_id = u['id'][len('stackspin-'):] - else: - # Attempt to look up the user from our Stackspin database, - # based on the app slug and SCIM ID. + # Users that were created just-in-time when logging in to the + # app will not have `externalId` set, so we attempt to look up + # the user from our Stackspin database based on the app ID and + # SCIM ID. + app_role = db.session.query(AppRole).filter_by( + app_id=app.id, + scim_id=u['id'] + ).first() + if app_role is None: + logging.info(f" SCIM ID {u['id']} not listed in database.") + # We can't find this app user in our Stackspin database, at + # least based on the SCIM ID. It could be that it was + # created before the introduction of SCIM, or was created + # on-the-fly on login by the app before SCIM got a chance + # to create it. To cover that case, we try to find the + # matching Stackspin user by email address. + try: + if app.slug == 'zulip': + email_address = u['userName'] + else: + email_address = u['emails'][0]['value'] + kratos_user = KratosUser.find_by_email(self.kratos_identity_api, email_address) + except KeyError: + # The `emails` field is not set, so give up. + kratos_user = None + except IndexError: + # The list of email addresses is empty, so give up. + kratos_user = None + if kratos_user is None: + # This user is not known at all by Stackspin, so + # we'll ignore it. + logging.info(f" SCIM user unknown, ignoring.") + continue + # We found the user based on email address. We'll + # store the SCIM ID for this user in the Stackspin + # database so we don't need to do this email + # address matching again next time. app_role = db.session.query(AppRole).filter_by( app_id=app.id, - scim_id=u['id'] + user_id=kratos_user.uuid ).first() - if app_role is None: - logging.info(f" SCIM ID {u['id']} not listed in database.") - # We can't find this app user in our Stackspin - # database, at least based on the SCIM ID. It could be - # that it was created before the introduction of SCIM, - # or was created on-the-fly on login by the app before - # SCIM got a chance to create it. To cover that case, - # we try to find the matching Stackspin user by email - # address. - try: - if app.slug == 'zulip': - email_address = u['userName'] - else: - email_address = u['emails'][0]['value'] - kratos_user = KratosUser.find_by_email(self.kratos_identity_api, email_address) - except KeyError: - # The `emails` field is not set, so give up. - kratos_user = None - except IndexError: - # The list of email addresses is empty, so give up. - kratos_user = None - if kratos_user is None: - # This user is not known at all by Stackspin, so - # we'll ignore it. - logging.info(f" SCIM user unknown, ignoring.") - continue - # We found the user based on email address. We'll - # store the SCIM ID for this user in the Stackspin - # database so we don't need to do this email - # address matching again next time. - app_role = db.session.query(AppRole).filter_by( - app_id=app.id, - user_id=kratos_user.uuid - ).first() - if app_role is not None: - app_role.scim_id = u['id'] - db.session.commit() - logging.info(f" Stored SCIM ID {u['id']} for user {kratos_user.uuid} for app {app.slug}") - kratos_id = kratos_user.uuid - else: - kratos_id = app_role.user_id + if app_role is not None: + app_role.scim_id = u['id'] + db.session.commit() + logging.info(f" Stored SCIM ID {u['id']} for user {kratos_user.uuid} for app {app.slug}") + kratos_id = kratos_user.uuid + else: + kratos_id = app_role.user_id users[kratos_id] = User(kratos_id, u['id'], u['displayName']) return users