abacusai.api_client_utils ========================= .. py:module:: abacusai.api_client_utils Attributes ---------- .. autoapisummary:: abacusai.api_client_utils.INVALID_PANDAS_COLUMN_NAME_CHARACTERS Classes ------- .. autoapisummary:: abacusai.api_client_utils.StreamingHandler abacusai.api_client_utils.StreamType abacusai.api_client_utils.DocstoreUtils Functions --------- .. autoapisummary:: abacusai.api_client_utils.clean_column_name abacusai.api_client_utils.avro_to_pandas_dtype abacusai.api_client_utils._get_spark_incompatible_columns abacusai.api_client_utils.get_non_nullable_type abacusai.api_client_utils.get_object_from_context abacusai.api_client_utils.load_as_pandas_from_avro_fd abacusai.api_client_utils.load_as_pandas_from_avro_files abacusai.api_client_utils.validate_workflow_node_inputs abacusai.api_client_utils.run abacusai.api_client_utils.evaluate_edge_condition abacusai.api_client_utils.execute_python_source abacusai.api_client_utils.process_node_response abacusai.api_client_utils.try_abacus_internal_copy Module Contents --------------- .. py:data:: INVALID_PANDAS_COLUMN_NAME_CHARACTERS :value: '[^A-Za-z0-9_]' .. py:function:: clean_column_name(column) .. py:function:: avro_to_pandas_dtype(avro_type) .. py:function:: _get_spark_incompatible_columns(df) .. py:function:: get_non_nullable_type(types) .. py:class:: StreamingHandler Bases: :py:obj:`str` str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'. .. py:method:: process_streaming_data(value, context, section_key, data_type, is_transient) :classmethod: .. py:function:: get_object_from_context(client, context, variable_name, return_type) .. py:function:: load_as_pandas_from_avro_fd(fd) .. py:function:: load_as_pandas_from_avro_files(files, download_method, max_workers = 10) .. py:function:: validate_workflow_node_inputs(nodes_info, agent_workflow_node_id, keyword_arguments, sample_user_inputs, filtered_workflow_vars) .. py:function:: run(nodes, primary_start_node, graph_info, sample_user_inputs = None, agent_workflow_node_id = None, workflow_vars = {}, topological_dfs_stack = []) .. py:function:: evaluate_edge_condition(source, target, details, workflow_vars) .. py:function:: execute_python_source(python_expression, variables) .. py:function:: process_node_response(node_response) .. py:class:: StreamType Bases: :py:obj:`enum.Enum` Generic enumeration. Derive from this class to define new enumerations. .. py:attribute:: MESSAGE :value: 'message' .. py:attribute:: SECTION_OUTPUT :value: 'section_output' .. py:attribute:: SEGMENT :value: 'segment' .. py:class:: DocstoreUtils Utility class for loading docstore data. Needs to be updated if docstore formats change. .. py:attribute:: DOC_ID :value: 'doc_id' .. py:attribute:: PREDICTION_PREFIX :value: 'prediction' .. py:attribute:: FIRST_PAGE :value: 'first_page' .. py:attribute:: LAST_PAGE :value: 'last_page' .. py:attribute:: PAGE_TEXT :value: 'page_text' .. py:attribute:: PAGES :value: 'pages' .. py:attribute:: CONTENT :value: 'content' .. py:attribute:: TOKENS :value: 'tokens' .. py:attribute:: PAGES_ZIP_METADATA :value: 'pages_zip_metadata' .. py:attribute:: PAGE_DATA :value: 'page_data' .. py:attribute:: HEIGHT :value: 'height' .. py:attribute:: WIDTH :value: 'width' .. py:attribute:: METADATA :value: 'metadata' .. py:attribute:: PAGE :value: 'page' .. py:attribute:: BLOCK :value: 'block' .. py:attribute:: LINE :value: 'line' .. py:attribute:: EXTRACTED_TEXT :value: 'extracted_text' .. py:attribute:: EMBEDDED_TEXT :value: 'embedded_text' .. py:attribute:: PAGE_MARKDOWN :value: 'page_markdown' .. py:attribute:: PAGE_LLM_OCR :value: 'page_llm_ocr' .. py:attribute:: PAGE_TABLE_TEXT :value: 'page_table_text' .. py:attribute:: MARKDOWN_FEATURES :value: 'markdown_features' .. py:attribute:: MULTI_MODE_OCR_TEXT :value: 'multi_mode_ocr_text' .. py:attribute:: DOCUMENT_PROCESSING_CONFIG :value: 'document_processing_config' .. py:attribute:: DOCUMENT_PROCESSING_VERSION :value: 'document_processing_version' .. py:method:: get_archive_id(doc_id) :staticmethod: .. py:method:: get_page_id(doc_id, page) :staticmethod: .. py:method:: get_content_hash(doc_id) :staticmethod: .. py:method:: get_pandas_pages_df(df, feature_group_version, doc_id_column, document_column, get_docstore_resource_bytes, get_document_processing_result_infos, max_workers = 10) :classmethod: .. py:method:: get_pandas_documents_df(df, feature_group_version, doc_id_column, document_column, get_docstore_resource_bytes, get_document_processing_result_infos, max_workers = 10) :classmethod: .. py:function:: try_abacus_internal_copy(src_suffix, dst_local, raise_exception=True) Retuns true if the file was copied, false otherwise