o
    5cG                     @  sv  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	m
Z d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 erd dl8Z8d dl9m:Z: eG dd deZ;G dd de)Z<G dd de<e1Z=dS )    )annotations)TYPE_CHECKINGN)
get_option)libmissing)NDArrayBacked)DtypeScalarnpttype_t)pa_version_under1p01)function)ExtensionDtypeStorageExtensionDtyperegister_extension_dtype)is_array_likeis_bool_dtypeis_dtype_equalis_integer_dtypeis_object_dtypeis_string_dtypepandas_dtype)ops)masked_reductions)ExtensionArrayFloatingArrayIntegerArray)FloatingDtype)IntegerDtype)PandasArray)extract_array)check_array_indexer)isna)Seriesc                   @  s`   e Zd ZdZdZedddZdZddd
dZedddZ	e
dd ZdddZdddZdS )StringDtypea8  
    Extension dtype for string data.

    .. versionadded:: 1.0.0

    .. warning::

       StringDtype is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    storage : {"python", "pyarrow"}, optional
        If not given, the value of ``pd.options.mode.string_storage``.

    Attributes
    ----------
    None

    Methods
    -------
    None

    Examples
    --------
    >>> pd.StringDtype()
    string[python]

    >>> pd.StringDtype(storage="pyarrow")
    string[pyarrow]
    stringreturnlibmissing.NATypec                 C  s   t jS N)
libmissingNAself r-   P/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/arrays/string_.pyna_value_   s   zStringDtype.na_valuestorageNNonec                 C  sF   |d u rt d}|dvrtd| d|dkrtrtd|| _d S )Nzmode.string_storage>   pythonpyarrowz+Storage must be 'python' or 'pyarrow'. Got z	 instead.r4   z:pyarrow>=1.0.0 is required for PyArrow backed StringArray.)r   
ValueErrorr   ImportErrorr1   )r,   r1   r-   r-   r.   __init__e   s   

zStringDtype.__init__	type[str]c                 C  s   t S r(   )strr+   r-   r-   r.   typer   s   zStringDtype.typec                 C  sf   t |tstdt| |dkr|  S |dkr| ddS |dkr'| ddS td| j d	| d
)a|  
        Construct a StringDtype from a string.

        Parameters
        ----------
        string : str
            The type of the name. The storage type will be taking from `string`.
            Valid options and their storage types are

            ========================== ==============================================
            string                     result storage
            ========================== ==============================================
            ``'string'``               pd.options.mode.string_storage, default python
            ``'string[python]'``       python
            ``'string[pyarrow]'``      pyarrow
            ========================== ==============================================

        Returns
        -------
        StringDtype

        Raise
        -----
        TypeError
            If the string is not a valid option.
        z.'construct_from_string' expects a string, got r%   zstring[python]r3   r0   zstring[pyarrow]r4   zCannot construct a 'z' from '')
isinstancer9   	TypeErrorr:   __name__)clsr%   r-   r-   r.   construct_from_stringv   s   


z!StringDtype.construct_from_stringtype_t[BaseStringArray]c                 C  s   ddl m} | jdkrtS |S )zq
        Return the array type associated with this dtype.

        Returns
        -------
        type
        r   ArrowStringArrayr3   )pandas.core.arrays.string_arrowrC   r1   StringArray)r,   rC   r-   r-   r.   construct_array_type   s   

z StringDtype.construct_array_typearray$pyarrow.Array | pyarrow.ChunkedArrayBaseStringArrayc                 C  s   | j dkrddlm} ||S ddl}t||jr|g}n|j}g }|D ]}tt	
|}|| q$|r;t|S tt	j
g ddS )zH
        Construct StringArray from pyarrow Array/ChunkedArray.
        r4   r   rB   Nobjectdtype)r1   rD   rC   r4   r<   ArraychunksrE   _from_sequencenprG   append_concat_same_type)r,   rG   rC   r4   rN   resultsarrstr_arrr-   r-   r.   __from_arrow__   s   

zStringDtype.__from_arrow__)r&   r'   r(   r&   r2   )r&   r8   )r&   rA   )rG   rH   r&   rI   )r>   
__module____qualname____doc__namepropertyr/   	_metadatar7   r:   classmethodr@   rF   rV   r-   r-   r-   r.   r$   :   s     

+r$   c                   @  s   e Zd ZdZdS )rI   z8
    Mixin class for StringArray, ArrowStringArray.
    N)r>   rX   rY   rZ   r-   r-   r-   r.   rI      s    rI   c                      s  e Zd ZdZdZd>d? fddZdd	 Zed
ddd@ddZed
ddd@ddZ	edAddZ
dBddZdd Z fddZdCddZdDdE fd!d"Zdd#d$dFd*d+ZdGdHd-d.ZdGdHd/d0ZdDdId3d4Zd>dJd7d8Zd9d: ZeZejZ	dKdLd<d=Z  ZS )MrE   a_  
    Extension array for string data.

    .. versionadded:: 1.0.0

    .. warning::

       StringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : array-like
        The array of data.

        .. warning::

           Currently, this expects an object-dtype ndarray
           where the elements are Python strings
           or nan-likes (``None``, ``np.nan``, ``NA``).
           This may change without warning in the future. Use
           :meth:`pandas.array` with ``dtype="string"`` for a stable way of
           creating a `StringArray` from any sequence.

        .. versionchanged:: 1.5.0

           StringArray now accepts array-likes containing
           nan-likes(``None``, ``np.nan``) for the ``values`` parameter
           in addition to strings and :attr:`pandas.NA`

    copy : bool, default False
        Whether to copy the array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    array
        The recommended function for creating a StringArray.
    Series.str
        The string methods are available on Series backed by
        a StringArray.

    Notes
    -----
    StringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
    <StringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string

    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
    will convert the values to strings.

    >>> pd.array(['1', 1], dtype="object")
    <PandasArray>
    ['1', 1]
    Length: 2, dtype: object
    >>> pd.array(['1', 1], dtype="string")
    <StringArray>
    ['1', '1']
    Length: 2, dtype: string

    However, instantiating StringArrays directly with non-strings will raise an error.

    For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:

    >>> pd.array(["a", None, "c"], dtype="string") == "a"
    <BooleanArray>
    [True, <NA>, False]
    Length: 3, dtype: boolean
    	extensionFr&   r2   c                   sH   t |}t j||d t|t| s|   t| | jtdd d S )Ncopyr3   r0   )	r    superr7   r<   r:   	_validater   _ndarrayr$   )r,   valuesra   	__class__r-   r.   r7   1  s
   zStringArray.__init__c                 C  st   t | jrtj| jddstd| jjdkr!td| jj d| jjdkr2t| jd d	S t| j d	S )
z*Validate that we only store NA or strings.Tskipnaz7StringArray requires a sequence of strings or pandas.NArJ   z>StringArray requires a sequence of strings or pandas.NA. Got 'z' dtype instead.   KN)	lenrd   r   is_string_arrayr5   rL   ndimconvert_nans_to_NAravelr+   r-   r-   r.   rc   9  s   zStringArray._validateNrL   ra   rL   Dtype | Nonec                C  s   |rt |tr|dkst|}t |tr|jdksJ ddlm} t ||r:|j}|j}t	j
||dd}tj||< n	t	j
|tj|d}| | }t||tdd |S )	Nr%   r3   r   )BaseMaskedArrayF)ra   convert_na_value)r/   ra   r0   )r<   r9   r   r$   r1   pandas.core.arrays.maskedrs   _mask_datar   ensure_string_arrayr)   r*   __new__r   r7   )r?   scalarsrL   ra   rs   	na_valuesresultnew_string_arrayr-   r-   r.   rO   I  s   

zStringArray._from_sequencec                C  s   | j |||dS )Nrq   )rO   )r?   stringsrL   ra   r-   r-   r.   _from_sequence_of_stringsc  s   z%StringArray._from_sequence_of_stringsc                 C  s.   t j|td}tj|d d < | |j|ddS )NrK   Fr`   )rP   emptyrJ   r)   r*   astype)r?   shaperL   re   r-   r-   r.   _emptyi  s   zStringArray._emptyc                 C  s>   ddl }|du r| }| j }d||  < |j||ddS )z6
        Convert myself into a pyarrow Array.
        r   NT)r:   from_pandas)r4   r%   rd   ra   r"   rG   )r,   r:   pare   r-   r-   r.   __arrow_array__o  s   
zStringArray.__arrow_array__c                 C  s"   | j  }|  }d ||< |d fS r(   )rd   ra   r"   )r,   rT   maskr-   r-   r.   _values_for_factorize|  s   
z!StringArray._values_for_factorizec                   s   t |dd}t|t| r|j}t| |}t|}t|}|r'|s'td|r?t|r1t	j
}n/t|ts>td| dn!t|sJtj|td}t|rYtj|ddsYtdt	j
|t|< t || d S )	NT)extract_numpyz)setting an array element with a sequence.zCannot set non-string value 'z' into a StringArray.rK   rh   zMust provide strings.)r    r<   r:   rd   r!   r   	is_scalarr5   r"   r)   r*   r9   r   rP   asarrayrJ   rl   rm   rb   __setitem__)r,   keyvalue
scalar_keyscalar_valuerf   r-   r.   r     s,   




zStringArray.__setitem__r   npt.NDArray[np.bool_]c                 C  s   t | || d S r(   )r   _putmask)r,   r   r   r-   r-   r.   r     s   zStringArray._putmaskTra   boolc                   s  t |}t|| jr|r|  S | S t|tr1| j }|  }d||< ||j	}t
||ddS t|trO|  }|  }d||< ||j	}t||ddS t|tr\t j||dS t|tjr|| j }|  }d||< ||}tj||< |S t ||S )Nr   Fr`   0)r   r   rL   ra   r<   r   rd   r"   r   numpy_dtyper   r   r   r   rb   rP   
issubdtypefloatingnan)r,   rL   ra   rT   r   re   rf   r-   r.   r     s6   






zStringArray.astyper   ri   axisr[   r9   ri   r   
int | Nonec                K  s*   |dv rt | |||dS td| d)N)minmaxr   zCannot perform reduction 'z' with string dtype)getattrr=   )r,   r[   ri   r   kwargsr-   r-   r.   _reduce  s   zStringArray._reducer	   c                 K  0   t d| tj|  |  |d}| ||S Nr-   )re   r   ri   )nvvalidate_minr   r   to_numpyr"   _wrap_reduction_resultr,   r   ri   r   r|   r-   r-   r.   r     
   zStringArray.minc                 K  r   r   )r   validate_maxr   r   r   r"   r   r   r-   r-   r.   r     r   zStringArray.maxdropnar#   c                 C  s4   ddl m} || j|dd}|j| j|_|S )Nr   )value_counts)r   Int64)pandasr   rd   r   indexrL   )r,   r   r   r|   r-   r-   r.   r     s   zStringArray.value_countsdeepintc                 C  s    | j j}|r|t| j  S |S r(   )rd   nbytesr   memory_usage_of_objects)r,   r   r|   r-   r-   r.   memory_usage  s   zStringArray.memory_usagec                 C  s   ddl m} t|tr|j}t| t|B }| }t|s=t|t| kr4t	dt|  dt| t
|}|| }|jtjv r^t
j| jdd}tj||< || j| |||< t|S t
jt| jdd}|| j| |||< |||S )Nr   BooleanArrayz"Lengths of operands do not match: z != rJ   rK   r   )pandas.arraysr   r<   rE   rd   r"   r   r   rl   r5   rP   r   r>   r   ARITHMETIC_BINOPS
empty_liker)   r*   zeros)r,   otheropr   r   validr|   r-   r-   r.   _cmp_method  s(   




zStringArray._cmp_methodconvertc              	   C  s   ddl m} |d u rtdd}|d u r| jj}t| }t| }t|s(t	|rWt|r/t
}n|}t|}	|	r9d}tj|||dd|t|d}
|	sRd|d d < ||
|S t|rpt|sptj|||dd|d	}
t|
S t|||dS )
Nr   r   r3   r0      uint8F)r   r/   rL   )r   r/   )r   r   r$   rL   r/   r"   rP   r   r   r   r   r   map_infer_maskviewr   r   rE   )r,   fr/   rL   r   r   r   rT   constructorna_value_is_nar|   r-   r-   r.   _str_map  s>   


zStringArray._str_map)FrW   )rL   rr   )r&   rE   r(   )r   r   r&   r2   )T)ra   r   )r[   r9   ri   r   r   r   )NT)ri   r   r&   r	   )r   r   r&   r#   )r   r   r&   r   )NNT)rL   rr   r   r   )r>   rX   rY   rZ   _typr7   rc   r^   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   _arith_methodr)   r*   _str_na_valuer   __classcell__r-   r-   rf   r.   rE      s8    S

!rE   )>
__future__r   typingr   numpyrP   pandas._configr   pandas._libsr   r   r)   pandas._libs.arraysr   pandas._typingr   r	   r
   r   pandas.compatr   pandas.compat.numpyr   r   pandas.core.dtypes.baser   r   r   pandas.core.dtypes.commonr   r   r   r   r   r   r   pandas.corer   pandas.core.array_algosr   pandas.core.arraysr   r   r   pandas.core.arrays.floatingr   pandas.core.arrays.integerr   pandas.core.arrays.numpy_r   pandas.core.constructionr    pandas.core.indexersr!   pandas.core.missingr"   r4   r   r#   r$   rI   rE   r-   r-   r-   r.   <module>   s8    $
 