{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "technicalSpecifications": {
    "architecture": {
      "type": "Command-line executable",
      "language": "Modern compiled language (high performance)",
      "design": "Multi-threaded streaming architecture",
      "deployment": "Single executable with embedded dependencies",
      "processModel": "Single process with worker threads",
      "memoryModel": "Streaming with bounded memory usage",
      "ioModel": "Asynchronous I/O for network and disk operations"
    },
    "performance": {
      "parallelism": {
        "type": "Data-level parallelism",
        "mechanism": "Automatic data partitioning across threads",
        "configuration": "--parallel parameter",
        "defaultThreads": "Auto-detected based on CPU cores",
        "maxThreads": "Configurable (typically 2-32 threads)",
        "scalability": "Near-linear scaling with CPU cores"
      },
      "throughput": {
        "peak": "112M cells/second (measured)",
        "typical": "10-50M cells/second (production workloads)",
        "factorsAffecting": [
          "Network bandwidth",
          "Source database performance",
          "Target storage I/O",
          "Data complexity and length",
          "Parallelism level",
          "CPU frequency and architecture"
        ]
      },
      "memoryUsage": {
        "model": "Streaming with bounded buffers",
        "typical": "< 500MB for most workloads",
        "maximum": "Configurable buffer sizes",
        "independent": "Memory usage independent of dataset size",
        "behavior": "Constant memory regardless of table size"
      },
      "latency": {
        "startupTime": "< 1 second",
        "firstDataOut": "< 5 seconds (typical)",
        "streamingDelay": "Minimal (real-time streaming)"
      }
    },
    "dataProcessing": {
      "readPattern": "Streaming cursor from database",
      "bufferingStrategy": "Ring buffers between stages",
      "compressionSupport": {
        "parquet": ["SNAPPY", "GZIP", "LZO", "BROTLI", "ZSTD"],
        "csv": "No",
        "json": "No"
      },
      "encodingSupport": {
        "csv": ["UTF-8", "UTF-16", "ASCII", "ISO-8859-1"],
        "json": ["UTF-8"],
        "parquet": "UTF-8 (built-in)"
      },
      "dataTypes": {
        "supported": [
          "Integer (all sizes)",
          "Float/Double",
          "Decimal/Numeric",
          "String/Varchar",
          "Date",
          "Timestamp",
          "Boolean",
          "Binary/BLOB",
          "JSON (native or text)",
          "XML (as text)",
          "UUID/GUID",
          "Vector (as text)"
        ],
        "specialHandling": {
          "nulls": "Native NULL handling in all formats",
          "largeObjects": "Streaming support for BLOBs/CLOBs",
          "unicode": "Full Unicode support"
        }
      },
      "schemaHandling": {
        "inference": "Automatic schema inference from source",
        "preservation": "Schema metadata preserved in Parquet",
        "conversion": "Automatic type mapping between databases and formats"
      }
    },
    "databaseConnectivity": {
      "drivers": {
        "embedded": [
          "PostgreSQL (Npgsql)",
          "MySQL/MariaDB (native driver)",
          "Oracle (ODP.NET)",
          "SQL Server (native driver)",
          "ClickHouse (native HTTP driver)",
          "SAP HANA (native driver)",
          "Teradata (native driver)",
          "Netezza (native driver)",
          "DuckDB (embedded engine)"
        ],
        "external": [
          "ODBC (Windows/Linux)",
          "OLE DB (Windows only)"
        ]
      },
      "connectionPooling": "Per-thread connections for parallelism",
      "reconnection": "Automatic retry with exponential backoff",
      "authentication": {
        "methods": [
          "Username/password",
          "Windows Authentication (SQL Server)",
          "Kerberos (Oracle, PostgreSQL, SQL Server)",
          "IAM (AWS RDS)",
          "Azure AD (Azure SQL)"
        ]
      },
      "ssl": "TLS/SSL support for encrypted connections",
      "timeout": "Configurable connection and query timeouts"
    },
    "cloudStorageIntegration": {
      "aws": {
        "services": ["S3", "S3-compatible"],
        "authentication": ["Access Key/Secret", "Profile", "IAM Role", "STS"],
        "features": ["Multi-part upload", "Server-side encryption"],
        "regions": "All AWS regions"
      },
      "azure": {
        "services": ["Blob Storage", "Data Lake Storage Gen2"],
        "authentication": ["Account Key", "SAS Token", "Azure AD"],
        "features": ["Block blob upload", "Hierarchical namespace (ADLS)"],
        "regions": "All Azure regions"
      },
      "gcp": {
        "services": ["Cloud Storage"],
        "authentication": ["Service Account", "API Key"],
        "features": ["Resumable upload", "Storage classes"],
        "regions": "All GCP regions"
      },
      "microsoft": {
        "services": ["OneLake (Microsoft Fabric)"],
        "authentication": ["Azure AD", "Workspace identity"],
        "features": ["Fabric integration"]
      },
      "uploadStrategy": {
        "method": "Streaming multi-part upload",
        "chunkSize": "Configurable (default 5MB-100MB)",
        "parallelUploads": "Concurrent chunk uploads",
        "retry": "Feature is coming soon"
      }
    },
    "logging": {
      "sinks": [
        {
          "type": "Console",
          "format": "Plain text or JSON",
          "levels": ["DEBUG", "INFO", "WARNING", "ERROR"],
          "buffering": "Unbuffered (real-time)"
        },
        {
          "type": "JSON File",
          "format": "JSON Lines (newline-delimited JSON)",
          "levels": ["DEBUG", "INFO", "WARNING", "ERROR"],
          "features": ["Log rotation", "Variable-based file naming"],
          "example": "fastbcp_{date}_{time}.log"
        },
        {
          "type": "PostgreSQL Database",
          "schema": "Configurable table schema",
          "levels": ["All levels"],
          "features": ["Async writes", "Batch inserts", "Error handling"]
        },
        {
          "type": "SQL Server Database",
          "schema": "Configurable table schema",
          "levels": ["All levels"],
          "features": ["Async writes", "Batch inserts", "Error handling"]
        }
      ],
      "metadata": {
        "included": [
          "Timestamp (UTC)",
          "Log level",
          "Message",
          "Source (module/component)",
          "Thread ID",
          "Session ID",
          "Command parameters (sanitized)",
          "Performance metrics (rows/sec, bytes/sec)"
        ],
        "excluded": ["Passwords (automatically obfuscated)"]
      }
    },
    "security": {
      "executableSigning": {
        "method": "Code signing certificate",
        "authority": "CA-certified",
        "verification": "Windows SmartScreen / Linux signature verification",
        "benefit": "Guaranteed malware-free, verified publisher"
      },
      "credentialProtection": {
        "logObfuscation": "Automatic password masking in all log outputs",
        "memoryHandling": "Secure string handling in memory",
        "transmission": "Encrypted connections (TLS/SSL)"
      },
      "sqlInjectionPrevention": {
        "mechanism": "Built-in SQL parser",
        "validation": "Parameter validation and sanitization",
        "safeMode": "Parameterized queries where applicable"
      },
      "networkSecurity": {
        "tlsSupport": "TLS 1.2, TLS 1.3",
        "certificateValidation": "Configurable (strict or permissive)",
        "proxySupport": "HTTP/HTTPS proxy support"
      }
    },
    "errorHandling": {
      "strategy": "Fail-fast with detailed error messages",
      "retries": {
        "database": "Automatic retry for logging errors",
        "cloud": "Exponential backoff for cloud upload failures",
        "maxRetries": "Configurable (default 3)"
      },
      "transactionality": {
        "model": "Best-effort (streaming architecture)",
        "notes": "No transactional rollback (output files may be partially written on failure)"
      },
      "errorReporting": {
        "exitCodes": "Standard POSIX exit codes",
        "logging": "Detailed error messages in logs",
        "stackTraces": "Available in verbose mode"
      }
    },
    "operationalRequirements": {
      "cpu": {
        "minimum": "2 cores",
        "recommended": "8+ cores for optimal parallelism",
        "architecture": "x86-64 (AMD64/Intel 64-bit) or ARM64 (Linux only)"
      },
      "memory": {
        "minimum": "512 MB RAM",
        "recommended": "2 GB+ RAM",
        "notes": "Memory-independent of dataset size and output format"
      },
      "disk": {
        "installation": "< 200 MB for executable",
        "temporary": "Minimal (streaming architecture)",
        "output": "Depends on data volume exported locally"
      },
      "network": {
        "bandwidth": "1 Gbps+ recommended for cloud exports",
        "latency": "Low latency to source database and target storage",
        "firewalls": "Outbound access to database and cloud storage"
      },
      "operatingSystems": {
        "windows": {
          "versions": ["Windows Server 2012 R2+", "Windows 10+", "Windows 11"],
          "architectures": ["x64"]
        },
        "linux": {
          "distributions": ["Ubuntu 18.04+", "CentOS 8+", "RHEL 8+", "Debian 9+"],
          "architectures": ["x64"]
        }
      }
    },
    "cli": {
      "syntax": "FastBCP [options]",
      "keyParameters": [
        {
          "parameter": "--connectiontype",
          "shortForm": "-C",
          "description": "Database connection type/driver",
          "values": ["mssql", "pgsql", "pgcopy", "mysql", "oraodp", "clickhouse", "hana", "teradata", "nzsql", "odbc", "oledb"],
          "example": "--connectiontype mssql"
        },
        {
          "parameter": "--server",
          "shortForm": "-S",
          "description": "Database server address or hostname",
          "example": "--server localhost" 
        },
        {
          "parameter": "--database",
          "shortForm": "-I",
          "description": "Database name",
          "example": "--database SalesDB"
        },
        {
          "parameter": "--connectionstring",
          "shortForm": "-G",
          "description": "Full database connection string (overrides other connection parameters)",
          "example": "--connectionstring \"Server=localhost;Database=mydb;User=admin;Password=***\""
        },
        {
          "parameter": "--user",
          "shortForm": "-U",
          "description": "Database username",
          "example": "--user FastUser"
        },
        {
          "parameter": "--password",
          "shortForm": "-X",
          "description": "Database password (automatically obfuscated in logs)",
          "example": "--password SecurePass123"
        },
        {
          "parameter": "--trusted",
          "shortForm": "-A",
          "description": "Use Windows/Kerberos authentication (switch parameter)"
        },
        {
          "parameter": "--query",
          "shortForm": "-q",
          "description": "SQL query to execute for data extraction",
          "example": "--query \"SELECT * FROM customers WHERE country='USA'\""
        },
        {
          "parameter": "--fileinput",
          "shortForm": "-F",
          "description": "Path to SQL file containing query (do not end query with semicolon)",
          "example": "--fileinput /queries/extract.sql"
        },
        {
          "parameter": "--sourceschema",
          "shortForm": "-s",
          "description": "Source schema name (required for parallel methods)",
          "example": "--sourceschema dbo"
        },
        {
          "parameter": "--sourcetable",
          "shortForm": "-T",
          "description": "Source table name (required for parallel methods)",
          "example": "--sourcetable Orders"
        },
        {
          "parameter": "--directory",
          "shortForm": "-D",
          "description": "Output directory path (local or cloud URI: s3://, abs://, abfss://, gs://, onelake://)",
          "examples": [
            "--directory C:\\exports",
            "--directory s3://my-bucket/data/",
            "--directory abs://account.blob.core.windows.net/container/",
            "--directory abfss://account.dfs.core.windows.net/container/",
            "--directory gs://my-bucket/exports/"
          ]
        },
        {
          "parameter": "--fileoutput",
          "shortForm": "-o",
          "description": "Output filename with extension (extension determines format: .csv, .parquet, .json, .xlsx, .bson)",
          "examples": [
            "--fileoutput orders.parquet",
            "--fileoutput data.csv",
            "--fileoutput export.json"
          ]
        },
        {
          "parameter": "--parallelmethod",
          "shortForm": "-m",
          "description": "Parallel processing method",
          "values": ["None", "Random", "DataDriven", "RangeId", "Ntile", "Ctid", "Physloc", "Rowid", "Timepartition"],
          "notes": "Ctid=PostgreSQL only, Physloc=SQL Server only, Rowid=Oracle only, Timepartition=Time-based partitioning",
          "example": "--parallelmethod Ntile"
        },
        {
          "parameter": "--paralleldegree",
          "shortForm": "-p",
          "description": "Number of parallel threads (positive=exact, 0=auto, negative=cores/abs(value))",
          "default": "-2",
          "examples": [
            "--paralleldegree 8 (use 8 threads)",
            "--paralleldegree 0 (auto-detect)",
            "--paralleldegree -2 (use half of CPU cores)"
          ]
        },
        {
          "parameter": "--distributekeycolumn",
          "shortForm": "-c",
          "description": "Column or expression for data distribution (required for Random, DataDriven, RangeId, Ntile, Timepartition methods)",
          "examples": [
            "--distributekeycolumn order_id",
            "--distributekeycolumn \"YEAR(order_date)\"",
            "--distributekeycolumn \"(o_orderdate,year,month)\""
          ]
        },
        {
          "parameter": "--datadrivenquery",
          "description": "Query returning values for DataDriven method distribution",
          "example": "--datadrivenquery \"SELECT DISTINCT region FROM regions\""
        },
        {
          "parameter": "--merge",
          "shortForm": "-M",
          "description": "Merge parallel output files (true/false, default=true for local)",
          "values": ["true", "false"],
          "notes": "Automatic for local files, not available for cloud destinations"
        },
        {
          "parameter": "--delimiter",
          "shortForm": "-d",
          "description": "CSV field delimiter character",
          "default": ",",
          "examples": [",", "|", ";", "\\t"]
        },
        {
          "parameter": "--quotes (true/false)",
          "shortForm": "-t (true/false)",
          "description": "Enclose CSV string fields in double quotes (switch)",
          "default": "false"
        },
        {
          "parameter": "--decimalseparator",
          "description": "Decimal separator for CSV numeric values",
          "examples": [".", ","],
          "default": "."
        },
        {
          "parameter": "--dateformat",
          "description": "Date/time format string for CSV",
          "example": "--dateformat \"yyyy-MM-dd HH:mm:ss\"",
          "default": "yyyy-MM-dd"
        },
        {
          "parameter": "--encoding",
          "description": "Character encoding for CSV/JSON",
          "values": ["UTF-8", "UTF-16", "ASCII", "ISO-8859-1"],
          "default": "UTF-8"
        },
        {
          "parameter": "--parquetcompression",
          "description": "Parquet compression codec",
          "values": ["Zstd", "Snappy", "Gzip", "Lzo", "Lz4", "None"],
          "default": "Zstd",
          "example": "--parquetcompression Snappy"
        },
        {
          "parameter": "--cloudprofile",
          "description": "Cloud credentials profile name",
          "example": "--cloudprofile production"
        },
        {
          "parameter": "--license",
          "description": "License file path, URL, or inline content",
          "examples": [
            "--license C:\\licenses\\FastBCP.lic",
            "--license https://licenses.corp.lan/fastbcp/license.lic",
            "--license $LICENSE_CONTENT"
          ],
          "default": "Search FastBCP.lic in executable directory if not provided"
        },
        {
          "parameter": "--runid",
          "description": "identifier for this export operation (for logging/tracking)",
          "example": "--runid export-2024-01-09-001"
        },
        {
          "parameter": "--loglevel",
          "description": "Logging verbosity level",
          "values": ["Information", "Debug"],
          "default": "Information"
        },
        {
          "parameter": "--settingsfile",
          "description": "JSON settings file path (stores all parameters)",
          "example": "--settingsfile /config/export-config.json",
          "default": "Search for FastBCP_settings.json in executable directory if not provided. If nothing provide, fallback to console only"
        },
        {
          "parameter": "--nobanner",
          "description": "Disable FastBCP banner display (switch, useful for scripting)"
        },
        {
          "parameter": "--applicationintent",
          "description": "SQL Server application intent",
          "values": ["ReadOnly", "ReadWrite"],
          "default": "ReadOnly",
          "notes": "SQL Server and OLEDB only"
        }
      ],
      "outputFormat": "Structured JSON or plain text progress",
      "exitCodes": {
        "0": "Success",
        "1": "Bad Command Line Arguments",
        "2": "Error",
        "99": "No License Found",
        "100":"License Invalid/Expired"
      }
    },
    "integration": {
      "orchestrationTools": [
        "Apache Airflow",
        "Prefect",
        "Azure Data Factory",
        "AWS Step Functions",
        "Databricks Workflows",
        "PowerShell scripts",
        "Bash scripts",
        "Python subprocess",
        "Cron/Task Scheduler"
      ],
      "cicd": [
        "Jenkins",
        "GitHub Actions",
        "GitLab CI",
        "Azure DevOps",
        "CircleCI"
      ],
      "containerization": {
        "docker": "Compatible (Linux-based images)",
        "kubernetes": "Deployable as Jobs or CronJobs"
      }
    }
  },
  "metadata": {
    "lastUpdated": "2026-02-19",
    "purpose": "Technical specifications for AI agents and developers",
    "schemaVersion": "1.0.0"
  }
}
