o
    5c%>                     @  sD  d dl mZ d dlmZ d dlZd dlmZ d dlZd dl	m
Z
mZ d dlmZmZmZmZ d dlmZmZmZmZ d dlmZmZmZmZmZmZmZ d d	lm Z  d d
l!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- esd dl.Z/d dl0m1Z2 d dl3m4Z4 ee5ej6f Z7dddZ8G dd de"e*e-Z9dS )    )annotations)CallableN)Union)libmissing)DtypeNpDtypeScalarnpt)pa_version_under1p01pa_version_under2p0pa_version_under3p0pa_version_under4p0)is_bool_dtypeis_dtype_equalis_integer_dtypeis_object_dtype	is_scalaris_string_dtypepandas_dtype)isna)ArrowExtensionArray)BooleanDtype)
Int64Dtype)NumericDtype)BaseStringArrayStringDtype)ObjectStringArrayMixin)fallback_performancewarningreturnNonec                  C  s   t rd} t| d S )NzBpyarrow>=1.0.0 is required for PyArrow backed ArrowExtensionArray.)r   ImportError)msg r#   U/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/arrays/string_arrow.py_chk_pyarrow_available8   s   r%   c                      s  e Zd ZU dZded< da fddZedbdcddZe		dbdcddZe	ddddZ
dedfddZdd	ejfdgddZdh fddZdd  Zdi fd"d#Zdjdk fd%d&ZejZ	$dldmd(d)Zd$d*ejd$fdn fd,d-Zdedo fd0d1Zdedo fd2d3Z	4	$	*	$dpdq fd;d<Z	drds fd?d@Z	drdt fdAdBZdCdD ZdEdF Z dGdH Z!dIdJ Z"dKdL Z#dMdN Z$ fdOdPZ%dQdR Z&dSdT Z' fdUdVZ(dWdX Z)dYdZ Z*de fd[d\	Z+de fd]d^	Z,de fd_d`	Z-  Z.S )uArrowStringArraya  
    Extension array for string data in a ``pyarrow.ChunkedArray``.

    .. versionadded:: 1.2.0

    .. warning::

       ArrowStringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : pyarrow.Array or pyarrow.ChunkedArray
        The array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    array
        The recommended function for creating a ArrowStringArray.
    Series.str
        The string methods are available on Series backed by
        a ArrowStringArray.

    Notes
    -----
    ArrowStringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]")
    <ArrowStringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string
    r   _dtyper   r    c                   s4   t  | tdd| _tj| jjst	dd S )Npyarrow)storagezBArrowStringArray requires a PyArrow (chunked) array of string type)
super__init__r   r'   patypes	is_string_datatype
ValueError)selfvalues	__class__r#   r$   r+   s   s   zArrowStringArray.__init__NFdtypeDtype | Nonecopyboolc                 C  s   ddl m} t  |r$t|tr|dks$t|}t|tr"|jdks$J t||rC|j}|j	}t
j||dd}| tj||t dS t
j||d}| tj|t d	d
S )Nr   )BaseMaskedArraystringr(   F)r8   convert_na_value)maskr0   r8   T)r0   from_pandas)pandas.core.arrays.maskedr:   r%   
isinstancestrr   r   r)   _maskr/   r   ensure_string_arrayr,   arrayr;   )clsscalarsr6   r8   r:   	na_valuesresultr#   r#   r$   _from_sequence|   s   
zArrowStringArray._from_sequencec                 C  s   | j |||dS )Nr6   r8   )rJ   )rF   stringsr6   r8   r#   r#   r$   _from_sequence_of_strings   s   z*ArrowStringArray._from_sequence_of_stringsc                 C  s   | j S )z3
        An instance of 'string[pyarrow]'.
        )r'   r2   r#   r#   r$   r6      s   zArrowStringArray.dtypeNpDtype | None
np.ndarrayc                 C  s   | j |dS )z?Correctly construct numpy arrays when passed to `np.asarray()`.r6   )to_numpy)r2   r6   r#   r#   r$   	__array__   s   zArrowStringArray.__array__npt.DTypeLike | Nonec                 C  sX   t j| j|d}| jjdkr*|tju r"|rt |t jr|S | jj	}| 
 }|||< |S )z-
        Convert to a NumPy ndarray.
        rQ   r   )nprE   r/   
null_countr   
no_default
issubdtypefloatingr'   na_valuer   )r2   r6   r8   rZ   rI   r=   r#   r#   r$   rR      s   
zArrowStringArray.to_numpylocintc                   s*   t |ts|tjurtdt ||S )NScalar must be NA or str)rA   rB   
libmissingNA	TypeErrorr*   insert)r2   r[   itemr4   r#   r$   ra      s   zArrowStringArray.insertc                 C  sr   t |rt|rd}|S t|tstd|S tj|tdd}d|t|< |D ]}|du s6t|ts6tdq'|S )z-Maybe convert value to be pyarrow compatible.Nr]   TrK   )r   r   rA   rB   r1   rU   rE   object)r2   valuevr#   r#   r$   _maybe_convert_setitem_value   s   	
z-ArrowStringArray._maybe_convert_setitem_valuenpt.NDArray[np.bool_]c                   s   t rtdd t |S dd dd |D D }t|s&tjt| tdS i }tr.d|d< t	j
| jfd	t|i|}tj|tjdS )
N2versionc                 S  s*   g | ]}|j t t fv r| qS r#   )r0   r,   r;   nullas_py).0	pa_scalarr#   r#   r$   
<listcomp>   s    z)ArrowStringArray.isin.<locals>.<listcomp>c                 S  s   g | ]	}t j|d dqS )T)r?   )r,   scalar)rm   rd   r#   r#   r$   ro      s    rQ   T	skip_null	value_set)r   r   r*   isinlenrU   zerosr9   r   pcis_inr/   r,   rE   bool_)r2   r3   rr   kwargsrI   r4   r#   r$   rs      s   
zArrowStringArray.isinTc                   s\   t |}t|| jr|r|  S | S t|tr&| jt	|j
}||S t j||dS )Nr>   )r   r   r6   r8   rA   r   r/   castr,   from_numpy_dtypenumpy_dtype__from_arrow__r*   astype)r2   r6   r8   datar4   r#   r$   r~      s   

zArrowStringArray.astypeconvertc              	   C  s  ddl m}m} |d u r| j}|d u r| jj}t| }t| }t|s(t	|rWt|r/|}	n|}	t|}
|
r9d}t
j|||dd|t|d}|
sRd|d d < |	||S t|r}t|s}t
j|||dd|d}tj||t dd	}t| |S t
|||dS )
Nr   )BooleanArrayIntegerArray   uint8F)r   rZ   r6   )r   rZ   T)r=   r0   r?   )pandas.arraysr   r   r6   rZ   r   rU   asarrayr   r   r   map_infer_maskviewr   r   r,   rE   r;   r0   )r2   frZ   r6   r   r   r   r=   arrconstructorna_value_is_narI   r#   r#   r$   _str_map   s@   

zArrowStringArray._str_mapr   regexc                   s   |rt   t |||||S |r.ts|du r&t dd t |||||S t| j|}n|r8t| j|}ntt| j|	 }t
 |}t|sVt||t|< |S )NF4ri   )r   r*   _str_containsr   rv   match_substring_regexr/   match_substring
utf8_upperupperr   r}   r   r9   )r2   patcaseflagsnar   rI   r4   r#   r$   r   8  s   
zArrowStringArray._str_containsr   rB   c                   s:   t rtdd t ||S dt| }| j||ddS )Nr   ri   ^Tr   r   )r   r   r*   _str_startswithreescaper   r2   r   r   r4   r#   r$   r   M  
   
z ArrowStringArray._str_startswithc                   s:   t rtdd t ||S t|d }| j||ddS )Nr   ri   $Tr   )r   r   r*   _str_endswithr   r   r   r   r4   r#   r$   r   U  r   zArrowStringArray._str_endswithstr | re.Patternreplstr | Callablenr   r   c           	        sn   t st|tjst|s|r|r tdd t ||||||S |r%tj	ntj
}|| j|||d}t| |S )Nr   ri   )patternreplacementmax_replacements)r   rA   r   Patterncallabler   r*   _str_replacerv   replace_substring_regexreplace_substringr/   r0   )	r2   r   r   r   r   r   r   funcrI   r4   r#   r$   r   ]  s   


zArrowStringArray._str_replacer   Scalar | Nonec                   sF   t rtdd t ||||S |dsd| }| j||||ddS )Nr   ri   r   T)r   )r   r   r*   
_str_match
startswithr   r2   r   r   r   r   r4   r#   r$   r   t  s   

zArrowStringArray._str_matchc                   sL   t rtdd t ||||S |dr|dr|d }| ||||S )Nr   ri   r   z//$)r   r   r*   _str_fullmatchendswithr   r   r4   r#   r$   r     s   
zArrowStringArray._str_fullmatchc                 C     t | j}t |S N)rv   utf8_is_alnumr/   r   r}   r2   rI   r#   r#   r$   _str_isalnum     zArrowStringArray._str_isalnumc                 C  r   r   )rv   utf8_is_alphar/   r   r}   r   r#   r#   r$   _str_isalpha  r   zArrowStringArray._str_isalphac                 C  r   r   )rv   utf8_is_decimalr/   r   r}   r   r#   r#   r$   _str_isdecimal  r   zArrowStringArray._str_isdecimalc                 C  r   r   )rv   utf8_is_digitr/   r   r}   r   r#   r#   r$   _str_isdigit  r   zArrowStringArray._str_isdigitc                 C  r   r   )rv   utf8_is_lowerr/   r   r}   r   r#   r#   r$   _str_islower  r   zArrowStringArray._str_islowerc                 C  r   r   )rv   utf8_is_numericr/   r   r}   r   r#   r#   r$   _str_isnumeric  r   zArrowStringArray._str_isnumericc                   0   t rtdd t  S t| j}t |S )Nrh   ri   )	r   r   r*   _str_isspacerv   utf8_is_spacer/   r   r}   r   r4   r#   r$   r     
   

zArrowStringArray._str_isspacec                 C  r   r   )rv   utf8_is_titler/   r   r}   r   r#   r#   r$   _str_istitle  r   zArrowStringArray._str_istitlec                 C  r   r   )rv   utf8_is_upperr/   r   r}   r   r#   r#   r$   _str_isupper  r   zArrowStringArray._str_isupperc                   r   )Nr   ri   )	r   r   r*   _str_lenrv   utf8_lengthr/   r   r}   r   r4   r#   r$   r     r   zArrowStringArray._str_lenc                 C     t | t| jS r   )r0   rv   
utf8_lowerr/   rN   r#   r#   r$   
_str_lower     zArrowStringArray._str_lowerc                 C  r   r   )r0   rv   r   r/   rN   r#   r#   r$   
_str_upper  r   zArrowStringArray._str_upperc                   L   t rtdd t |S |d u rt| j}ntj| j|d}t| |S Nr   ri   )
characters)	r   r   r*   
_str_striprv   utf8_trim_whitespacer/   	utf8_trimr0   r2   to_striprI   r4   r#   r$   r        
zArrowStringArray._str_stripc                   r   r   )	r   r   r*   _str_lstriprv   utf8_ltrim_whitespacer/   
utf8_ltrimr0   r   r4   r#   r$   r     r   zArrowStringArray._str_lstripc                   r   r   )	r   r   r*   _str_rstriprv   utf8_rtrim_whitespacer/   
utf8_rtrimr0   r   r4   r#   r$   r     r   zArrowStringArray._str_rstripr   r    )NF)r6   r7   r8   r9   )r   r   r   )r6   rO   r   rP   )r6   rT   r8   r9   r   rP   )r[   r\   r   r&   )r   rg   )T)r8   r9   )NNT)r6   r7   r   r9   )r   r9   )r   rB   )r   Tr   T)r   r   r   r   r   r\   r   r9   r   r\   r   r9   )Tr   N)r   rB   r   r9   r   r\   r   r   )r   r9   r   r\   r   r   )/__name__
__module____qualname____doc____annotations__r+   classmethodrJ   rM   propertyr6   rS   r   rW   rR   ra   rf   rs   r~   r^   r_   _str_na_valuer   rU   nanr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r#   r#   r4   r$   r&   C   sd   
 -	<r&   r   ):
__future__r   collections.abcr   r   typingr   numpyrU   pandas._libsr   r   r^   pandas._typingr   r   r	   r
   pandas.compatr   r   r   r   pandas.core.dtypes.commonr   r   r   r   r   r   r   pandas.core.dtypes.missingr   pandas.core.arrays.arrowr   pandas.core.arrays.booleanr   pandas.core.arrays.integerr   pandas.core.arrays.numericr   pandas.core.arrays.string_r   r    pandas.core.strings.object_arrayr   r(   r,   pyarrow.computecomputerv   %pandas.core.arrays.arrow._arrow_utilsr   rB   NATypeArrowStringScalarOrNATr%   r&   r#   r#   r#   r$   <module>   s.    $	
