class abacusai.api_class.feature_group.SamplingConfig

Bases: abacusai.api_class.abstract.ApiClass

sampling_method: abacusai.api_class.enums.SamplingMethodType = None
classmethod _get_builder()
class abacusai.api_class.feature_group.NSamplingConfig

Bases: SamplingConfig

  • sample_count (int) – The number of rows to include in the sample

  • key_columns (List[str]) – The feature(s) to use as the key(s) when sampling

sample_count: int
key_columns: List[str] = []
class abacusai.api_class.feature_group.PercentSamplingConfig

Bases: SamplingConfig

  • sample_percent (float) – The percentage of the rows to sample

  • key_columns (List[str]) – The feature(s) to use as the key(s) when sampling

sample_percent: float
key_columns: List[str] = []
class abacusai.api_class.feature_group._SamplingConfigFactory

Bases: abacusai.api_class.abstract._ApiClassFactory

config_class_key = 'sampling_method'
class abacusai.api_class.feature_group.MergeConfig

Bases: abacusai.api_class.abstract.ApiClass

merge_mode: abacusai.api_class.enums.MergeMode = None
classmethod _get_builder()
class abacusai.api_class.feature_group.LastNMergeConfig

Bases: MergeConfig

  • num_versions (int) – The number of versions to merge. num_versions == 0 means merge all versions.

  • include_version_timestamp_column (bool) – If set, include a column with the creation timestamp of source FG versions.

num_versions: int
include_version_timestamp_column: bool = None
class abacusai.api_class.feature_group.TimeWindowMergeConfig

Bases: MergeConfig

  • feature_name (str) – Time based column to index on

  • time_window_size_ms (int) – Range of merged rows will be [MAX_TIME - time_window_size_ms, MAX_TIME]

  • include_version_timestamp_column (bool) – If set, include a column with the creation timestamp of source FG versions.

feature_name: str
time_window_size_ms: int
include_version_timestamp_column: bool = None
class abacusai.api_class.feature_group._MergeConfigFactory

Bases: abacusai.api_class.abstract._ApiClassFactory

config_class_key = 'merge_mode'
class abacusai.api_class.feature_group.OperatorConfig

Bases: abacusai.api_class.abstract.ApiClass

operator_type: abacusai.api_class.enums.OperatorType = None
classmethod _get_builder()
class abacusai.api_class.feature_group.UnpivotConfig

Bases: OperatorConfig

  • columns (List[str]) – Which columns to unpivot.

  • index_column (str) – Name of new column containing the unpivoted column names as its values

  • value_column (str) – Name of new column containing the row values that were unpivoted.

  • exclude (bool) – If True, the unpivoted columns are all the columns EXCEPT the ones in the columns argument. Default is False.

columns: List[str] = None
index_column: str = None
value_column: str = None
exclude: bool = None
class abacusai.api_class.feature_group.MarkdownConfig

Bases: OperatorConfig

  • input_column (str) – Name of input column to transform.

  • output_column (str) – Name of output column to store transformed data.

  • input_column_type (MarkdownOperatorInputType) – Type of input column to transform.

input_column: str = None
output_column: str = None
input_column_type: abacusai.api_class.enums.MarkdownOperatorInputType = None
class abacusai.api_class.feature_group.CrawlerTransformConfig

Bases: OperatorConfig

  • input_column (str) – Name of input column to transform.

  • output_column (str) – Name of output column to store transformed data.

  • depth_column (str) – Increasing depth explores more links, capturing more content

  • disable_host_restriction (bool) – If True, will not restrict crawling to the same host.

  • honour_website_rules (bool) – If True, will respect robots.txt rules.

  • user_agent (str) – If provided, will use this user agent instead of randomly selecting one.

input_column: str = None
output_column: str = None
depth_column: str = None
input_column_type: str = None
crawl_depth: int = None
disable_host_restriction: bool = None
honour_website_rules: bool = None
user_agent: str = None
class abacusai.api_class.feature_group.ExtractDocumentDataConfig

Bases: OperatorConfig

  • doc_id_column (str) – Name of input document ID column.

  • document_column (str) – Name of the input document column which contains the page infos. This column will be transformed to include the document processing config in the output feature group.

  • document_processing_config (DocumentProcessingConfig) – Document processing configuration.

doc_id_column: str = None
document_column: str = None
document_processing_config: abacusai.api_class.dataset.DocumentProcessingConfig = None
class abacusai.api_class.feature_group.DataGenerationConfig

Bases: OperatorConfig

  • prompt_col (str) – Name of the input prompt column.

  • completion_col (str) – Name of the output completion column.

  • description_col (str) – Name of the description column.

  • id_col (str) – Name of the identifier column.

  • generation_instructions (str) – Instructions for the data generation model.

  • temperature (float) – Sampling temperature for the model.

  • fewshot_examples (int) – Number of fewshot examples used to prompt the model.

  • concurrency (int) – Number of concurrent processes.

  • examples_per_target (int) – Number of examples per target.

  • subset_size (Optional[int]) – Size of the subset to use for generation.

  • verify_response (bool) – Whether to verify the response.

  • token_budget (int) – Token budget for generation.

  • oversample (bool) – Whether to oversample the data.

  • documentation_char_limit (int) – Character limit for documentation.

  • frequency_penalty (float) – Penalty for frequency of token appearance.

  • model (str) – Model to use for data generation.

  • seed (Optional[int]) – Seed for random number generation.

prompt_col: str = None
completion_col: str = None
description_col: str = None
id_col: str = None
generation_instructions: str = None
temperature: float = None
fewshot_examples: int = None
concurrency: int = None
examples_per_target: int = None
subset_size: int = None
verify_response: bool = None
token_budget: int = None
oversample: bool = None
documentation_char_limit: int = None
frequency_penalty: float = None
model: str = None
seed: int = None
class abacusai.api_class.feature_group.UnionTransformConfig

Bases: OperatorConfig

  • feature_group_ids (List[str]) – List of feature group IDs to union with source FG.

  • drop_non_intersecting_columns (bool) – If true, will drop columns that are not present in all feature groups. If false fills missing columns with nulls.

feature_group_ids: List[str] = None
drop_non_intersecting_columns: bool = False
class abacusai.api_class.feature_group._OperatorConfigFactory

Bases: abacusai.api_class.abstract._ApiClassFactory

config_class_key = 'operator_type'