o
    5³c(5  ã                   @  s<  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZmZmZmZ ejejejej ej!ej"ej"dœZ#ejej$dfej"ej%e	fejej&dfejej&dfej ej&dfej'ej%dfej!ej(d fiZ)ej&dej$dej%diZ*G dd„ deƒZ+dS )é    )Úannotations)ÚAnyN)Úinfer_dtype)ÚiNaT)Úcache_readonly)Úis_categorical_dtypeÚis_string_dtype)ÚPandasBuffer)ÚColumnÚColumnBuffersÚColumnNullTypeÚ	DtypeKind)ÚArrowCTypesÚ
EndiannessÚNoBufferPresentÚdtype_to_arrow_c_fmt)ÚiÚuÚfÚbÚUÚMÚméÿÿÿÿzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s¾   e Zd ZdZd/d0d	d
„Zd1dd„Zed1dd„ƒZed2dd„ƒZ	d2dd„Z
edd„ ƒZedd„ ƒZed1dd„ƒZed3dd„ƒZd1dd„Zd4d5d#d$„Zd6d&d'„Zd7d)d*„Zd7d+d,„Zd7d-d.„Zd S )8ÚPandasColumnaö  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    TÚcolumnú	pd.SeriesÚ
allow_copyÚboolÚreturnÚNonec                 C  s0   t |tjƒstdt|ƒ› dƒ‚|| _|| _dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zColumns of type ú not handled yetN)Ú
isinstanceÚpdÚSeriesÚNotImplementedErrorÚtypeÚ_colÚ_allow_copy)Úselfr   r   © r*   úT/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/interchange/column.pyÚ__init__H   s   
zPandasColumn.__init__Úintc                 C  s   | j jS )z2
        Size of the column, in elements.
        )r'   Úsize©r)   r*   r*   r+   r.   T   s   zPandasColumn.sizec                 C  ó   dS )z7
        Offset of first element. Always zero.
        r   r*   r/   r*   r*   r+   ÚoffsetZ   s   zPandasColumn.offsetútuple[DtypeKind, int, str, str]c                 C  sz   | j j}t|ƒr| j jj}|  |j¡\}}}}tj||tj	fS t
|ƒr8t| j ƒdkr4tjdt|ƒtj	fS tdƒ‚|  |¡S )NÚstringé   z.Non-string object dtypes are not supported yet)r'   Údtyper   ÚvaluesÚcodesÚ_dtype_from_pandasdtyper   ÚCATEGORICALr   ÚNATIVEr   r   ÚSTRINGr   r%   )r)   r5   r7   Ú_ÚbitwidthÚc_arrow_dtype_f_strr*   r*   r+   r5   b   s.   

ûüü
zPandasColumn.dtypec                 C  s>   t  |jd¡}|du rtd|› dƒ‚||jd t|ƒ|jfS )z/
        See `self.dtype` for details.
        Nú
Data type z& not supported by interchange protocolr4   )Ú	_NP_KINDSÚgetÚkindÚ
ValueErrorÚitemsizer   Ú	byteorder)r)   r5   rB   r*   r*   r+   r8   €   s   z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstdƒ‚| jjjdtt 	| jjj
¡ƒdœS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)Ú
is_orderedÚis_dictionaryÚ
categories)r5   r   r9   Ú	TypeErrorr'   ÚcatÚorderedr   r#   r$   rH   r/   r*   r*   r+   Údescribe_categorical   s   ÿýz!PandasColumn.describe_categoricalc                 C  s@   | j d }zt| \}}W ||fS  ty   td|› dƒ‚w )Nr   r?   z not yet supported)r5   Ú_NULL_DESCRIPTIONÚKeyErrorr%   )r)   rB   ÚnullÚvaluer*   r*   r+   Údescribe_null¬   s   
ýÿzPandasColumn.describe_nullc                 C  s   | j  ¡  ¡  ¡ S )zB
        Number of null elements. Should always be known.
        )r'   ÚisnaÚsumÚitemr/   r*   r*   r+   Ú
null_count¶   s   zPandasColumn.null_countúdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r'   Úindexr/   r*   r*   r+   Úmetadata½   s   zPandasColumn.metadatac                 C  r0   )zE
        Return the number of chunks the column consists of.
        é   r*   r/   r*   r*   r+   Ú
num_chunksÄ   s   zPandasColumn.num_chunksNÚn_chunksú
int | Nonec                 c  sv    |r6|dkr6t | jƒ}|| }|| dkr|d7 }td|| |ƒD ]}t| jj||| … | jƒV  q"dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rY   r   N)Úlenr'   Úranger   Úilocr(   )r)   r[   r.   ÚstepÚstartr*   r*   r+   Ú
get_chunksÊ   s   €
ÿÿ
zPandasColumn.get_chunksr   c                 C  s\   |   ¡ dddœ}z|  ¡ |d< W n	 ty   Y nw z	|  ¡ |d< W |S  ty-   Y |S w )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)ÚdataÚvalidityÚoffsetsrd   re   )Ú_get_data_bufferÚ_get_validity_bufferr   Ú_get_offsets_buffer)r)   Úbuffersr*   r*   r+   Úget_buffersÛ   s    ýÿýýzPandasColumn.get_buffersútuple[PandasBuffer, Any]c                 C  s  | j d tjtjtjtjtjfv r"t| j 	¡ | j
d}| j }||fS | j d tjkr@| jjj}t|| j
d}|  |j ¡}||fS | j d tjkry| j 	¡ }tƒ }|D ]}t|tƒrb| |jdd¡ qRttj|ddƒ}tjdtjtjf}||fS td| jj › d	ƒ‚)
zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r   )r   úutf-8©ÚencodingÚuint8)r5   r4   r?   r!   )r5   r   ÚINTÚUINTÚFLOATÚBOOLÚDATETIMEr	   r'   Úto_numpyr(   r9   r6   Ú_codesr8   r;   Ú	bytearrayr"   ÚstrÚextendÚencodeÚnpÚ
frombufferr   r   r:   r%   )r)   Úbufferr5   r7   Úbufr   Úobjr*   r*   r+   rf      s<   ûä
è

€ü	þzPandasColumn._get_data_bufferc                 C  sÀ   | j \}}| jd tjkrI| j ¡ }|dk}| }tjt|ƒftj	d}t
|ƒD ]\}}t|tƒr3|n|||< q(t|ƒ}tjdtjtjf}	||	fS zt| d }
W t|
ƒ‚ ty_   tdƒ‚w )zÒ
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   ©Úshaper5   r4   z! so does not have a separate maskzSee self.describe_null)rQ   r5   r   r;   r'   ru   r{   Úzerosr]   Úbool8Ú	enumerater"   rx   r	   rs   r   r   r:   Ú_NO_VALIDITY_BUFFERrN   r%   r   )r)   rO   Úinvalidr~   ÚvalidÚmaskr   r   r}   r5   Úmsgr*   r*   r+   rg   -  s"   

üþz!PandasColumn._get_validity_bufferc           	      C  s¢   | j d tjkrM| j ¡ }d}tjt|ƒd ftjd}t	|ƒD ]\}}t
|tƒr5|jdd}|t|ƒ7 }|||d < q t|ƒ}tjdtjtjf}||fS tdƒ‚)a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rY   r€   rl   rm   é@   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r5   r   r;   r'   ru   r{   r‚   r]   Úint64r„   r"   rx   rz   r	   rp   r   ÚINT64r   r:   r   )	r)   r6   Úptrre   r   Úvr   r}   r5   r*   r*   r+   rh   S  s&   

üûÿz PandasColumn._get_offsets_buffer)T)r   r   r   r   r   r    )r   r-   )r   r2   )r   rV   )N)r[   r\   )r   r   )r   rk   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r,   r.   Úpropertyr1   r   r5   r8   rL   rQ   rU   rX   rZ   rb   rj   rf   rg   rh   r*   r*   r*   r+   r   <   s.    



	


%
-&r   ),Ú
__future__r   Útypingr   Únumpyr{   Úpandas._libs.libr   Úpandas._libs.tslibsr   Úpandas.util._decoratorsr   Úpandasr#   Úpandas.api.typesr   r   Úpandas.core.interchange.bufferr	   Ú*pandas.core.interchange.dataframe_protocolr
   r   r   r   Úpandas.core.interchange.utilsr   r   r   r   rp   rq   rr   rs   r;   rt   r@   ÚUSE_NANÚUSE_SENTINELÚNON_NULLABLEr9   ÚUSE_BYTEMASKrM   r…   r   r*   r*   r*   r+   Ú<module>   s@    ùöý