diff --git a/CHANGELOG.md b/CHANGELOG.md index 7433f87..936ba21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [Version 1.1.0](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.1.0) - Feature release - 2023-05-17 + +- Adding dataset for documents metadata retrieval + ## [Version 1.0.14](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.0.14) - Bugfix release - 2023-04-18 - Updated code-env descriptor for DSS 12 diff --git a/plugin.json b/plugin.json index 321795d..86d2771 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "sharepoint-online", - "version": "1.0.14", + "version": "1.1.0", "meta": { "label": "SharePoint Online", "description": "Read and write data from/to your SharePoint Online account", diff --git a/python-connectors/sharepoint-online_documents-metadata/connector.json b/python-connectors/sharepoint-online_documents-metadata/connector.json new file mode 100644 index 0000000..ea2b4d1 --- /dev/null +++ b/python-connectors/sharepoint-online_documents-metadata/connector.json @@ -0,0 +1,78 @@ +{ + "meta" : { + "label": "Documents' metadata", + "description": "Retrieve metadata for all documents stored on your SharePoint server", + "icon": "icon-cloud" + }, + "readable": true, + "writable": false, + "params": [ + { + "name": "auth_type", + "label": "Type of authentication", + "type": "SELECT", + "selectChoices": [ + { + "value": "login", + "label": "User name / password" + }, + { + "value": "oauth", + "label": "Azure Single Sign On" + }, + { + "value": "site-app-permissions", + "label": "Site App Permissions" + } + ] + }, + { + "name": "sharepoint_oauth", + "label": "Azure preset", + "type": "PRESET", + "parameterSetId": "oauth-login", + "visibilityCondition": "model.auth_type == 'oauth'" + }, + { + "name": "sharepoint_sharepy", + "label": "SharePoint preset", + "type": "PRESET", + "parameterSetId": "sharepoint-login", + "visibilityCondition": "model.auth_type == 'login'" + }, + { + "name": "site_app_permissions", + "label": "Site App preset", + "type": "PRESET", + "parameterSetId": "site-app-permissions", + "visibilityCondition": "model.auth_type == 'site-app-permissions'" + }, + { + "name": "advanced_parameters", + "label": "Show advanced parameters", + "description": "", + "type": "BOOLEAN", + "defaultValue": false + }, + { + "name": "sharepoint_site_overwrite", + "label": "Site path preset overwrite", + "type": "STRING", + "description": "sites/site_name/subsite...", + "visibilityCondition": "model.advanced_parameters == true" + }, + { + "name": "sharepoint_root_overwrite", + "label": "Root directory preset overwrite", + "type": "STRING", + "description": "", + "visibilityCondition": "model.advanced_parameters == true" + }, + { + "name": "search_path", + "label": "Path to folder", + "type": "STRING", + "description": "Path to your folder of interest..." + } + ] +} diff --git a/python-connectors/sharepoint-online_documents-metadata/connector.py b/python-connectors/sharepoint-online_documents-metadata/connector.py new file mode 100644 index 0000000..b5dccdd --- /dev/null +++ b/python-connectors/sharepoint-online_documents-metadata/connector.py @@ -0,0 +1,55 @@ +from sharepoint_client import SharePointClient +from common import ItemsLimit +from dataiku.connector import Connector +from safe_logger import SafeLogger +from dss_constants import DSSConstants + + +logger = SafeLogger("sharepoint-online plugin", DSSConstants.SECRET_PARAMETERS_KEYS) + + +class SharePointDocumentsMetadataConnector(Connector): + + def __init__(self, config, plugin_config): + Connector.__init__(self, config, plugin_config) + logger.info('SharePoint Online plugin metadata dataset v{}'.format(DSSConstants.PLUGIN_VERSION)) + self.client = SharePointClient(config) + self.search_path = config.get("search_path", None) + + def get_read_schema(self): + return None + + def generate_rows(self, dataset_schema=None, dataset_partitioning=None, + partition_id=None, records_limit=-1): + limit = ItemsLimit(records_limit) + for row in self.client.get_documents_medatada(search_path=self.search_path): + yield row + if limit.is_reached(): + break + + def get_writer(self, dataset_schema=None, dataset_partitioning=None, + partition_id=None): + raise NotImplementedError + + def get_partitioning(self): + raise NotImplementedError + + def list_partitions(self, partitioning): + return [] + + def partition_exists(self, partitioning, partition_id): + raise NotImplementedError + + def get_records_count(self, partitioning=None, partition_id=None): + raise NotImplementedError + + +class CustomDatasetWriter(object): + def __init__(self): + pass + + def write_row(self, row): + raise NotImplementedError + + def close(self): + pass diff --git a/python-lib/dss_constants.py b/python-lib/dss_constants.py index 6a49f44..44396a9 100644 --- a/python-lib/dss_constants.py +++ b/python-lib/dss_constants.py @@ -28,6 +28,7 @@ class DSSConstants(object): "sharepoint_oauth": "The access token is missing" } PATH = 'path' + PLUGIN_VERSION = '1.1.0-beta.1' SECRET_PARAMETERS_KEYS = ["Authorization", "sharepoint_username", "sharepoint_password", "client_secret"] SITE_APP_DETAILS = { "sharepoint_tenant": "The tenant name is missing", diff --git a/python-lib/sharepoint_client.py b/python-lib/sharepoint_client.py index 248a079..26a00e6 100644 --- a/python-lib/sharepoint_client.py +++ b/python-lib/sharepoint_client.py @@ -324,6 +324,30 @@ def get_list_items(self, list_title, params=None): self.assert_response_ok(response, calling_method="get_list_items") return response.json().get("ListData", {}) + def get_documents_medatada(self, search_path=None): + headers = DSSConstants.JSON_HEADERS + next_page_url = "{}/{}/_vti_bin/listdata.svc/Documents".format(self.sharepoint_origin, self.sharepoint_site) + first = True + initial_params = {"Query": "*"} + if search_path: + initial_params.update({"$filter": "Path eq '/{}/{}/{}'".format(self.sharepoint_site, self.sharepoint_root, search_path.strip("/"))}) + while next_page_url: + params = None + if first: + params = initial_params + first = False + response = self.session.get( + url=next_page_url, + headers=headers, + params=params + ) + self.assert_response_ok(response, calling_method="get_documents_medatada") + json_response = response.json() + next_page_url = get_value_from_path(json_response, [SharePointConstants.RESULTS_CONTAINER_V2, SharePointConstants.NEXT_PAGE]) + rows = get_value_from_path(json_response, [SharePointConstants.RESULTS_CONTAINER_V2, "results"]) + for row in rows: + yield row + def create_list(self, list_name): headers = DSSConstants.JSON_HEADERS data = {