o
    5c(5                     @  s<  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZmZmZmZ ejejejej ej!ej"ej"dZ#ejej$dfej"ej%e	fejej&dfejej&dfej ej&dfej'ej%dfej!ej(d fiZ)ej&dej$dej%diZ*G dd deZ+dS )    )annotations)AnyN)infer_dtype)iNaT)cache_readonly)is_categorical_dtypeis_string_dtype)PandasBuffer)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
EndiannessNoBufferPresentdtype_to_arrow_c_fmt)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd/d0d	d
Zd1ddZed1ddZed2ddZ	d2ddZ
edd Zedd Zed1ddZed3ddZd1ddZd4d5d#d$Zd6d&d'Zd7d)d*Zd7d+d,Zd7d-d.Zd S )8PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                 C  s0   t |tjstdt| d|| _|| _dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zColumns of type  not handled yetN)
isinstancepdSeriesNotImplementedErrortype_col_allow_copy)selfr   r    r*   T/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/core/interchange/column.py__init__H   s   
zPandasColumn.__init__intc                 C  s   | j jS )z2
        Size of the column, in elements.
        )r'   sizer)   r*   r*   r+   r.   T   s   zPandasColumn.sizec                 C     dS )z7
        Offset of first element. Always zero.
        r   r*   r/   r*   r*   r+   offsetZ   s   zPandasColumn.offsettuple[DtypeKind, int, str, str]c                 C  sz   | j j}t|r| j jj}| |j\}}}}tj||tj	fS t
|r8t| j dkr4tjdt|tj	fS td| |S )Nstring   z.Non-string object dtypes are not supported yet)r'   dtyper   valuescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r%   )r)   r5   r7   _bitwidthc_arrow_dtype_f_strr*   r*   r+   r5   b   s.   


zPandasColumn.dtypec                 C  s>   t |jd}|du rtd| d||jd t||jfS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolr4   )	_NP_KINDSgetkind
ValueErroritemsizer   	byteorder)r)   r5   rB   r*   r*   r+   r8      s   z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)r5   r   r9   	TypeErrorr'   catorderedr   r#   r$   rH   r/   r*   r*   r+   describe_categorical   s   z!PandasColumn.describe_categoricalc                 C  s@   | j d }zt| \}}W ||fS  ty   td| dw )Nr   r?   z not yet supported)r5   _NULL_DESCRIPTIONKeyErrorr%   )r)   rB   nullvaluer*   r*   r+   describe_null   s   
zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r'   isnasumitemr/   r*   r*   r+   
null_count   s   zPandasColumn.null_countdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r'   indexr/   r*   r*   r+   metadata   s   zPandasColumn.metadatac                 C  r0   )zE
        Return the number of chunks the column consists of.
           r*   r/   r*   r*   r+   
num_chunks   s   zPandasColumn.num_chunksNn_chunks
int | Nonec                 c  sv    |r6|dkr6t | j}|| }|| dkr|d7 }td|| |D ]}t| jj|||  | jV  q"dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rY   r   N)lenr'   ranger   ilocr(   )r)   r[   r.   stepstartr*   r*   r+   
get_chunks   s   

zPandasColumn.get_chunksr   c                 C  s\   |   ddd}z|  |d< W n	 ty   Y nw z	|  |d< W |S  ty-   Y |S w )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrd   re   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r)   buffersr*   r*   r+   get_buffers   s    zPandasColumn.get_bufferstuple[PandasBuffer, Any]c                 C  s  | j d tjtjtjtjtjfv r"t| j	 | j
d}| j }||fS | j d tjkr@| jjj}t|| j
d}| |j }||fS | j d tjkry| j	 }t }|D ]}t|trb||jdd qRttj|dd}tjdtjtjf}||fS td| jj  d	)
zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r   )r   utf-8encodinguint8)r5   r4   r?   r!   )r5   r   INTUINTFLOATBOOLDATETIMEr	   r'   to_numpyr(   r9   r6   _codesr8   r;   	bytearrayr"   strextendencodenp
frombufferr   r   r:   r%   )r)   bufferr5   r7   bufr   objr*   r*   r+   rf      s<   


	zPandasColumn._get_data_bufferc                 C  s   | j \}}| jd tjkrI| j }|dk}| }tjt|ftj	d}t
|D ]\}}t|tr3|n|||< q(t|}tjdtjtjf}	||	fS zt| d }
W t|
 ty_   tdw )z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   shaper5   r4   z! so does not have a separate maskzSee self.describe_null)rQ   r5   r   r;   r'   ru   r{   zerosr]   bool8	enumerater"   rx   r	   rs   r   r   r:   _NO_VALIDITY_BUFFERrN   r%   r   )r)   rO   invalidr~   validmaskr   r   r}   r5   msgr*   r*   r+   rg   -  s"   

z!PandasColumn._get_validity_bufferc           	      C  s   | j d tjkrM| j }d}tjt|d ftjd}t	|D ]\}}t
|tr5|jdd}|t|7 }|||d < q t|}tjdtjtjf}||fS td)a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rY   r   rl   rm   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r5   r   r;   r'   ru   r{   r   r]   int64r   r"   rx   rz   r	   rp   r   INT64r   r:   r   )	r)   r6   ptrre   r   vr   r}   r5   r*   r*   r+   rh   S  s&   

z PandasColumn._get_offsets_buffer)T)r   r   r   r   r   r    )r   r-   )r   r2   )r   rV   )N)r[   r\   )r   r   )r   rk   )__name__
__module____qualname____doc__r,   r.   propertyr1   r   r5   r8   rL   rQ   rU   rX   rZ   rb   rj   rf   rg   rh   r*   r*   r*   r+   r   <   s.    



	


%
-&r   ),
__future__r   typingr   numpyr{   pandas._libs.libr   pandas._libs.tslibsr   pandas.util._decoratorsr   pandasr#   pandas.api.typesr   r   pandas.core.interchange.bufferr	   *pandas.core.interchange.dataframe_protocolr
   r   r   r   pandas.core.interchange.utilsr   r   r   r   rp   rq   rr   rs   r;   rt   r@   USE_NANUSE_SENTINELNON_NULLABLEr9   USE_BYTEMASKrM   r   r   r*   r*   r*   r+   <module>   s@    