abacusai.api_client_utils
=========================

.. py:module:: abacusai.api_client_utils


Attributes
----------

.. autoapisummary::

   abacusai.api_client_utils.INVALID_PANDAS_COLUMN_NAME_CHARACTERS


Classes
-------

.. autoapisummary::

   abacusai.api_client_utils.StreamingHandler
   abacusai.api_client_utils.StreamType
   abacusai.api_client_utils.DocstoreUtils


Functions
---------

.. autoapisummary::

   abacusai.api_client_utils.clean_column_name
   abacusai.api_client_utils.avro_to_pandas_dtype
   abacusai.api_client_utils._get_spark_incompatible_columns
   abacusai.api_client_utils.get_non_nullable_type
   abacusai.api_client_utils.get_object_from_context
   abacusai.api_client_utils.load_as_pandas_from_avro_fd
   abacusai.api_client_utils.load_as_pandas_from_avro_files
   abacusai.api_client_utils.validate_workflow_node_inputs
   abacusai.api_client_utils.run
   abacusai.api_client_utils.evaluate_edge_condition
   abacusai.api_client_utils.execute_python_source
   abacusai.api_client_utils.process_node_response
   abacusai.api_client_utils.try_abacus_internal_copy


Module Contents
---------------

.. py:data:: INVALID_PANDAS_COLUMN_NAME_CHARACTERS
   :value: '[^A-Za-z0-9_]'


.. py:function:: clean_column_name(column)

.. py:function:: avro_to_pandas_dtype(avro_type)

.. py:function:: _get_spark_incompatible_columns(df)

.. py:function:: get_non_nullable_type(types)

.. py:class:: StreamingHandler

   Bases: :py:obj:`str`


   str(object='') -> str
   str(bytes_or_buffer[, encoding[, errors]]) -> str

   Create a new string object from the given object. If encoding or
   errors is specified, then the object must expose a data buffer
   that will be decoded using the given encoding and error handler.
   Otherwise, returns the result of object.__str__() (if defined)
   or repr(object).
   encoding defaults to sys.getdefaultencoding().
   errors defaults to 'strict'.


   .. py:method:: process_streaming_data(value, context, section_key, data_type, is_transient)
      :classmethod:


.. py:function:: get_object_from_context(client, context, variable_name, return_type)

.. py:function:: load_as_pandas_from_avro_fd(fd)

.. py:function:: load_as_pandas_from_avro_files(files, download_method, max_workers = 10)

.. py:function:: validate_workflow_node_inputs(nodes_info, agent_workflow_node_id, keyword_arguments, sample_user_inputs, filtered_workflow_vars)

.. py:function:: run(nodes, primary_start_node, graph_info, sample_user_inputs = None, agent_workflow_node_id = None, workflow_vars = {}, topological_dfs_stack = [])

.. py:function:: evaluate_edge_condition(source, target, details, workflow_vars)

.. py:function:: execute_python_source(python_expression, variables)

.. py:function:: process_node_response(node_response)

.. py:class:: StreamType

   Bases: :py:obj:`enum.Enum`


   Generic enumeration.

   Derive from this class to define new enumerations.


   .. py:attribute:: MESSAGE
      :value: 'message'


   .. py:attribute:: SECTION_OUTPUT
      :value: 'section_output'


   .. py:attribute:: SEGMENT
      :value: 'segment'


.. py:class:: DocstoreUtils

   Utility class for loading docstore data.
   Needs to be updated if docstore formats change.


   .. py:attribute:: DOC_ID
      :value: 'doc_id'


   .. py:attribute:: PREDICTION_PREFIX
      :value: 'prediction'


   .. py:attribute:: FIRST_PAGE
      :value: 'first_page'


   .. py:attribute:: LAST_PAGE
      :value: 'last_page'


   .. py:attribute:: PAGE_TEXT
      :value: 'page_text'


   .. py:attribute:: PAGES
      :value: 'pages'


   .. py:attribute:: CONTENT
      :value: 'content'


   .. py:attribute:: TOKENS
      :value: 'tokens'


   .. py:attribute:: PAGES_ZIP_METADATA
      :value: 'pages_zip_metadata'


   .. py:attribute:: PAGE_DATA
      :value: 'page_data'


   .. py:attribute:: HEIGHT
      :value: 'height'


   .. py:attribute:: WIDTH
      :value: 'width'


   .. py:attribute:: METADATA
      :value: 'metadata'


   .. py:attribute:: PAGE
      :value: 'page'


   .. py:attribute:: BLOCK
      :value: 'block'


   .. py:attribute:: LINE
      :value: 'line'


   .. py:attribute:: EXTRACTED_TEXT
      :value: 'extracted_text'


   .. py:attribute:: EMBEDDED_TEXT
      :value: 'embedded_text'


   .. py:attribute:: PAGE_MARKDOWN
      :value: 'page_markdown'


   .. py:attribute:: PAGE_LLM_OCR
      :value: 'page_llm_ocr'


   .. py:attribute:: PAGE_TABLE_TEXT
      :value: 'page_table_text'


   .. py:attribute:: MARKDOWN_FEATURES
      :value: 'markdown_features'


   .. py:attribute:: MULTI_MODE_OCR_TEXT
      :value: 'multi_mode_ocr_text'


   .. py:attribute:: DOCUMENT_PROCESSING_CONFIG
      :value: 'document_processing_config'


   .. py:attribute:: DOCUMENT_PROCESSING_VERSION
      :value: 'document_processing_version'


   .. py:method:: get_archive_id(doc_id)
      :staticmethod:


   .. py:method:: get_page_id(doc_id, page)
      :staticmethod:


   .. py:method:: get_content_hash(doc_id)
      :staticmethod:


   .. py:method:: get_pandas_pages_df(df, feature_group_version, doc_id_column, document_column, get_docstore_resource_bytes, get_document_processing_result_infos, max_workers = 10)
      :classmethod:


   .. py:method:: get_pandas_documents_df(df, feature_group_version, doc_id_column, document_column, get_docstore_resource_bytes, get_document_processing_result_infos, max_workers = 10)
      :classmethod:


.. py:function:: try_abacus_internal_copy(src_suffix, dst_local, raise_exception=True)

   Retuns true if the file was copied, false otherwise